powerpc updates for 4.17

Notable changes:
 
  - Support for 4PB user address space on 64-bit, opt-in via mmap().
 
  - Removal of POWER4 support, which was accidentally broken in 2016 and no one
    noticed, and blocked use of some modern instructions.
 
  - Workarounds so that the hypervisor can enable Transactional Memory on Power9.
 
  - A series to disable the DAWR (Data Address Watchpoint Register) on Power9.
 
  - More information displayed in the meltdown/spectre_v1/v2 sysfs files.
 
  - A vpermxor (Power8 Altivec) implementation for the raid6 Q Syndrome.
 
  - A big series to make the allocation of our pacas (per cpu area), kernel page
    tables, and per-cpu stacks NUMA aware when using the Radix MMU on Power9.
 
 And as usual many fixes, reworks and cleanups.
 
 Thanks to:
   Aaro Koskinen, Alexandre Belloni, Alexey Kardashevskiy, Alistair Popple, Andy
   Shevchenko, Aneesh Kumar K.V, Anshuman Khandual, Balbir Singh, Benjamin
   Herrenschmidt, Christophe Leroy, Christophe Lombard, Cyril Bur, Daniel Axtens,
   Dave Young, Finn Thain, Frederic Barrat, Gustavo Romero, Horia Geantă,
   Jonathan Neuschäfer, Kees Cook, Larry Finger, Laurent Dufour, Laurent Vivier,
   Logan Gunthorpe, Madhavan Srinivasan, Mark Greer, Mark Hairgrove, Markus
   Elfring, Mathieu Malaterre, Matt Brown, Matt Evans, Mauricio Faria de
   Oliveira, Michael Neuling, Naveen N. Rao, Nicholas Piggin, Paul Mackerras,
   Philippe Bergheaud, Ram Pai, Rob Herring, Sam Bobroff, Segher Boessenkool,
   Simon Guo, Simon Horman, Stewart Smith, Sukadev Bhattiprolu, Suraj Jitindar
   Singh, Thiago Jung Bauermann, Vaibhav Jain, Vaidyanathan Srinivasan, Vasant
   Hegde, Wei Yongjun.
 -----BEGIN PGP SIGNATURE-----
 
 iQIwBAABCAAaBQJayKxDExxtcGVAZWxsZXJtYW4uaWQuYXUACgkQUevqPMjhpYAr
 JQ/6A9Xs4zHDn9OeT9esEIxciETqUlrP0Wp64c4JVC7EkG1E7xRDZ4Xb4m8R2nNt
 9sPhtNO1yCtEk6kFQtPNB0N8v6pud4I6+aMcYnn+tP8mJRYQ4x9bYaF3Hw98IKmE
 Kd6TglmsUQvh2GpwPiF93KpzzWu1HB2kZzzqJcAMTMh7C79Qz00BjrTJltzXB2jx
 tJ+B4lVy8BeU8G5nDAzJEEwb5Ypkn8O40rS/lpAwVTYOBJ8Rbyq8Fj82FeREK9YO
 4EGaEKPkC/FdzX7OJV3v2/nldCd8pzV471fAoGuBUhJiJBMBoBybcTHIdDex7LlL
 zMLV1mUtGo8iolRPhL8iCH+GGifZz2WzstYCozz7hgIraWtc/frq9rZp6q0LdH/K
 trk7UbPGlVb92ecWZVpZyEcsMzKrCgZqnAe9wRNh1uEKScEdzd/bmRaMhENUObRh
 Hili6AVvmSKExpy7k2sZP/oUMaeC15/xz8Lk7l8a/iCkYhNmPYh5iSXM5+UKpcRT
 FYOcO0o3DwXsN46Whow3nJ7TqAsDy9/ecPUG71JQi3ZrHnRrm8jxkn8MCG5pZ1Fi
 KvKDxlg6RiJo3DF9/fSOpJUokvMwqBS5dJo4eh5eiDy94aBTqmBKFecvPxQm7a0L
 l3uXCF/6JuXEvMukFjGBO4RiYhw8i+B2uKsh81XUh7HKrgE=
 =HAB1
 -----END PGP SIGNATURE-----

Merge tag 'powerpc-4.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc updates from Michael Ellerman:
 "Notable changes:

   - Support for 4PB user address space on 64-bit, opt-in via mmap().

   - Removal of POWER4 support, which was accidentally broken in 2016
     and no one noticed, and blocked use of some modern instructions.

   - Workarounds so that the hypervisor can enable Transactional Memory
     on Power9.

   - A series to disable the DAWR (Data Address Watchpoint Register) on
     Power9.

   - More information displayed in the meltdown/spectre_v1/v2 sysfs
     files.

   - A vpermxor (Power8 Altivec) implementation for the raid6 Q
     Syndrome.

   - A big series to make the allocation of our pacas (per cpu area),
     kernel page tables, and per-cpu stacks NUMA aware when using the
     Radix MMU on Power9.

  And as usual many fixes, reworks and cleanups.

  Thanks to: Aaro Koskinen, Alexandre Belloni, Alexey Kardashevskiy,
  Alistair Popple, Andy Shevchenko, Aneesh Kumar K.V, Anshuman Khandual,
  Balbir Singh, Benjamin Herrenschmidt, Christophe Leroy, Christophe
  Lombard, Cyril Bur, Daniel Axtens, Dave Young, Finn Thain, Frederic
  Barrat, Gustavo Romero, Horia Geantă, Jonathan Neuschäfer, Kees Cook,
  Larry Finger, Laurent Dufour, Laurent Vivier, Logan Gunthorpe,
  Madhavan Srinivasan, Mark Greer, Mark Hairgrove, Markus Elfring,
  Mathieu Malaterre, Matt Brown, Matt Evans, Mauricio Faria de Oliveira,
  Michael Neuling, Naveen N. Rao, Nicholas Piggin, Paul Mackerras,
  Philippe Bergheaud, Ram Pai, Rob Herring, Sam Bobroff, Segher
  Boessenkool, Simon Guo, Simon Horman, Stewart Smith, Sukadev
  Bhattiprolu, Suraj Jitindar Singh, Thiago Jung Bauermann, Vaibhav
  Jain, Vaidyanathan Srinivasan, Vasant Hegde, Wei Yongjun"

* tag 'powerpc-4.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (207 commits)
  powerpc/64s/idle: Fix restore of AMOR on POWER9 after deep sleep
  powerpc/64s: Fix POWER9 DD2.2 and above in cputable features
  powerpc/64s: Fix pkey support in dt_cpu_ftrs, add CPU_FTR_PKEY bit
  powerpc/64s: Fix dt_cpu_ftrs to have restore_cpu clear unwanted LPCR bits
  Revert "powerpc/64s/idle: POWER9 ESL=0 stop avoid save/restore overhead"
  powerpc: iomap.c: introduce io{read|write}64_{lo_hi|hi_lo}
  powerpc: io.h: move iomap.h include so that it can use readq/writeq defs
  cxl: Fix possible deadlock when processing page faults from cxllib
  powerpc/hw_breakpoint: Only disable hw breakpoint if cpu supports it
  powerpc/mm/radix: Update command line parsing for disable_radix
  powerpc/mm/radix: Parse disable_radix commandline correctly.
  powerpc/mm/hugetlb: initialize the pagetable cache correctly for hugetlb
  powerpc/mm/radix: Update pte fragment count from 16 to 256 on radix
  powerpc/mm/keys: Update documentation and remove unnecessary check
  powerpc/64s/idle: POWER9 ESL=0 stop avoid save/restore overhead
  powerpc/64s/idle: Consolidate power9_offline_stop()/power9_idle_stop()
  powerpc/powernv: Always stop secondaries before reboot/shutdown
  powerpc: hard disable irqs in smp_send_stop loop
  powerpc: use NMI IPI for smp_send_stop
  powerpc/powernv: Fix SMT4 forcing idle code
  ...
This commit is contained in:
Linus Torvalds 2018-04-07 12:08:19 -07:00
commit 49a695ba72
275 changed files with 4647 additions and 2432 deletions

View file

@ -4,3 +4,4 @@ int*.c
tables.c
neon?.c
s390vx?.c
vpermxor*.c

View file

@ -5,7 +5,8 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
int8.o int16.o int32.o
raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
@ -90,6 +91,30 @@ $(obj)/altivec8.c: UNROLL := 8
$(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
CFLAGS_vpermxor1.o += $(altivec_flags)
targets += vpermxor1.c
$(obj)/vpermxor1.c: UNROLL := 1
$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
CFLAGS_vpermxor2.o += $(altivec_flags)
targets += vpermxor2.c
$(obj)/vpermxor2.c: UNROLL := 2
$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
CFLAGS_vpermxor4.o += $(altivec_flags)
targets += vpermxor4.c
$(obj)/vpermxor4.c: UNROLL := 4
$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
CFLAGS_vpermxor8.o += $(altivec_flags)
targets += vpermxor8.c
$(obj)/vpermxor8.c: UNROLL := 8
$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
CFLAGS_neon1.o += $(NEON_FLAGS)
targets += neon1.c
$(obj)/neon1.c: UNROLL := 1

View file

@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
&raid6_altivec2,
&raid6_altivec4,
&raid6_altivec8,
&raid6_vpermxor1,
&raid6_vpermxor2,
&raid6_vpermxor4,
&raid6_vpermxor8,
#endif
#if defined(CONFIG_S390)
&raid6_s390vx8,

View file

@ -24,10 +24,13 @@
#include <linux/raid/pq.h>
#ifdef CONFIG_ALTIVEC
#include <altivec.h>
#ifdef __KERNEL__
# include <asm/cputable.h>
# include <asm/switch_to.h>
#endif /* __KERNEL__ */
/*
* This is the C data type to use. We use a vector of

View file

@ -45,10 +45,12 @@ else ifeq ($(HAS_NEON),yes)
CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
else
HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
gcc -c -x c - >&/dev/null && \
rm ./-.o && echo yes)
gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
ifeq ($(HAS_ALTIVEC),yes)
OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
CFLAGS += -I../../../arch/powerpc/include
CFLAGS += -DCONFIG_ALTIVEC
OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
endif
endif
@ -95,6 +97,18 @@ altivec4.c: altivec.uc ../unroll.awk
altivec8.c: altivec.uc ../unroll.awk
$(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
vpermxor1.c: vpermxor.uc ../unroll.awk
$(AWK) ../unroll.awk -vN=1 < vpermxor.uc > $@
vpermxor2.c: vpermxor.uc ../unroll.awk
$(AWK) ../unroll.awk -vN=2 < vpermxor.uc > $@
vpermxor4.c: vpermxor.uc ../unroll.awk
$(AWK) ../unroll.awk -vN=4 < vpermxor.uc > $@
vpermxor8.c: vpermxor.uc ../unroll.awk
$(AWK) ../unroll.awk -vN=8 < vpermxor.uc > $@
int1.c: int.uc ../unroll.awk
$(AWK) ../unroll.awk -vN=1 < int.uc > $@
@ -117,7 +131,7 @@ tables.c: mktables
./mktables > tables.c
clean:
rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test
rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c vpermxor*.c neon*.c tables.c raid6test
spotless: clean
rm -f *~

105
lib/raid6/vpermxor.uc Normal file
View file

@ -0,0 +1,105 @@
/*
* Copyright 2017, Matt Brown, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* vpermxor$#.c
*
* Based on H. Peter Anvin's paper - The mathematics of RAID-6
*
* $#-way unrolled portable integer math RAID-6 instruction set
* This file is postprocessed using unroll.awk
*
* vpermxor$#.c makes use of the vpermxor instruction to optimise the RAID6 Q
* syndrome calculations.
* This can be run on systems which have both Altivec and vpermxor instruction.
*
* This instruction was introduced in POWER8 - ISA v2.07.
*/
#include <linux/raid/pq.h>
#ifdef CONFIG_ALTIVEC
#include <altivec.h>
#ifdef __KERNEL__
#include <asm/cputable.h>
#include <asm/ppc-opcode.h>
#include <asm/switch_to.h>
#endif
typedef vector unsigned char unative_t;
#define NSIZE sizeof(unative_t)
static const vector unsigned char gf_low = {0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14,
0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08,
0x06, 0x04, 0x02,0x00};
static const vector unsigned char gf_high = {0xfd, 0xdd, 0xbd, 0x9d, 0x7d, 0x5d,
0x3d, 0x1d, 0xe0, 0xc0, 0xa0, 0x80,
0x60, 0x40, 0x20, 0x00};
static void noinline raid6_vpermxor$#_gen_syndrome_real(int disks, size_t bytes,
void **ptrs)
{
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;
unative_t wp$$, wq$$, wd$$;
z0 = disks - 3; /* Highest data disk */
p = dptr[z0+1]; /* XOR parity */
q = dptr[z0+2]; /* RS syndrome */
for (d = 0; d < bytes; d += NSIZE*$#) {
wp$$ = wq$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
for (z = z0-1; z>=0; z--) {
wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
/* P syndrome */
wp$$ = vec_xor(wp$$, wd$$);
/* Q syndrome */
asm(VPERMXOR(%0,%1,%2,%3):"=v"(wq$$):"v"(gf_high), "v"(gf_low), "v"(wq$$));
wq$$ = vec_xor(wq$$, wd$$);
}
*(unative_t *)&p[d+NSIZE*$$] = wp$$;
*(unative_t *)&q[d+NSIZE*$$] = wq$$;
}
}
static void raid6_vpermxor$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
{
preempt_disable();
enable_kernel_altivec();
raid6_vpermxor$#_gen_syndrome_real(disks, bytes, ptrs);
disable_kernel_altivec();
preempt_enable();
}
int raid6_have_altivec_vpermxor(void);
#if $# == 1
int raid6_have_altivec_vpermxor(void)
{
/* Check if arch has both altivec and the vpermxor instructions */
# ifdef __KERNEL__
return (cpu_has_feature(CPU_FTR_ALTIVEC_COMP) &&
cpu_has_feature(CPU_FTR_ARCH_207S));
# else
return 1;
#endif
}
#endif
const struct raid6_calls raid6_vpermxor$# = {
raid6_vpermxor$#_gen_syndrome,
NULL,
raid6_have_altivec_vpermxor,
"vpermxor$#",
0
};
#endif