diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-07 10:15:40 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-07 10:15:40 -0700 |
commit | bac65d9d87b383471d8d29128319508d71b74180 (patch) | |
tree | c087cca1f1db1045cce08a3bff7c775c66e437bf /arch/powerpc/kernel | |
parent | f92e3da18b7d5941468040af962c201235148301 (diff) | |
parent | 265601f034df3566f22da11240977aab8860f6a7 (diff) |
Merge tag 'powerpc-4.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
"Nothing really major this release, despite quite a lot of activity.
Just lots of things all over the place.
Some things of note include:
- Access via perf to a new type of PMU (IMC) on Power9, which can
count both core events as well as nest unit events (Memory
controller etc).
- Optimisations to the radix MMU TLB flushing, mostly to avoid
unnecessary Page Walk Cache (PWC) flushes when the structure of the
tree is not changing.
- Reworks/cleanups of do_page_fault() to modernise it and bring it
closer to other architectures where possible.
- Rework of our page table walking so that THP updates only need to
send IPIs to CPUs where the affected mm has run, rather than all
CPUs.
- The size of our vmalloc area is increased to 56T on 64-bit hash MMU
systems. This avoids problems with the percpu allocator on systems
with very sparse NUMA layouts.
- STRICT_KERNEL_RWX support on PPC32.
- A new sched domain topology for Power9, to capture the fact that
pairs of cores may share an L2 cache.
- Power9 support for VAS, which is a new mechanism for accessing
coprocessors, and initial support for using it with the NX
compression accelerator.
- Major work on the instruction emulation support, adding support for
many new instructions, and reworking it so it can be used to
implement the emulation needed to fixup alignment faults.
- Support for guests under PowerVM to use the Power9 XIVE interrupt
controller.
And probably that many things again that are almost as interesting,
but I had to keep the list short. Plus the usual fixes and cleanups as
always.
Thanks to: Alexey Kardashevskiy, Alistair Popple, Andreas Schwab,
Aneesh Kumar K.V, Anju T Sudhakar, Arvind Yadav, Balbir Singh,
Benjamin Herrenschmidt, Bhumika Goyal, Breno Leitao, Bryant G. Ly,
Christophe Leroy, Cédric Le Goater, Dan Carpenter, Dou Liyang,
Frederic Barrat, Gautham R. Shenoy, Geliang Tang, Geoff Levand, Hannes
Reinecke, Haren Myneni, Ivan Mikhaylov, John Allen, Julia Lawall,
LABBE Corentin, Laurentiu Tudor, Madhavan Srinivasan, Markus Elfring,
Masahiro Yamada, Matt Brown, Michael Neuling, Murilo Opsfelder Araujo,
Nathan Fontenot, Naveen N. Rao, Nicholas Piggin, Oliver O'Halloran,
Paul Mackerras, Rashmica Gupta, Rob Herring, Rui Teng, Sam Bobroff,
Santosh Sivaraj, Scott Wood, Shilpasri G Bhat, Sukadev Bhattiprolu,
Suraj Jitindar Singh, Tobin C. Harding, Victor Aoqui"
* tag 'powerpc-4.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (321 commits)
powerpc/xive: Fix section __init warning
powerpc: Fix kernel crash in emulation of vector loads and stores
powerpc/xive: improve debugging macros
powerpc/xive: add XIVE Exploitation Mode to CAS
powerpc/xive: introduce H_INT_ESB hcall
powerpc/xive: add the HW IRQ number under xive_irq_data
powerpc/xive: introduce xive_esb_write()
powerpc/xive: rename xive_poke_esb() in xive_esb_read()
powerpc/xive: guest exploitation of the XIVE interrupt controller
powerpc/xive: introduce a common routine xive_queue_page_alloc()
powerpc/sstep: Avoid used uninitialized error
axonram: Return directly after a failed kzalloc() in axon_ram_probe()
axonram: Improve a size determination in axon_ram_probe()
axonram: Delete an error message for a failed memory allocation in axon_ram_probe()
powerpc/powernv/npu: Move tlb flush before launching ATSD
powerpc/macintosh: constify wf_sensor_ops structures
powerpc/iommu: Use permission-specific DEVICE_ATTR variants
powerpc/eeh: Delete an error out of memory message at init time
powerpc/mm: Use seq_putc() in two functions
macintosh: Convert to using %pOF instead of full_name
...
Diffstat (limited to 'arch/powerpc/kernel')
52 files changed, 949 insertions, 1487 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 4aa7c147e447..91960f83039c 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -38,7 +38,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ signal_64.o ptrace32.o \ paca.o nvram_64.o firmware.o obj-$(CONFIG_VDSO32) += vdso32/ -obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog.o +obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o @@ -83,7 +83,7 @@ extra-y := head_$(BITS).o extra-$(CONFIG_40x) := head_40x.o extra-$(CONFIG_44x) := head_44x.o extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o -extra-$(CONFIG_8xx) := head_8xx.o +extra-$(CONFIG_PPC_8xx) := head_8xx.o extra-y += vmlinux.lds obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index ec7a8b099dd9..26b9994d27ee 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -27,6 +27,7 @@ #include <asm/switch_to.h> #include <asm/disassemble.h> #include <asm/cpu_has_feature.h> +#include <asm/sstep.h> struct aligninfo { unsigned char len; @@ -40,364 +41,9 @@ struct aligninfo { #define LD 0 /* load */ #define ST 1 /* store */ #define SE 2 /* sign-extend value, or FP ld/st as word */ -#define F 4 /* to/from fp regs */ -#define U 8 /* update index register */ -#define M 0x10 /* multiple load/store */ #define SW 0x20 /* byte swap */ -#define S 0x40 /* single-precision fp or... */ -#define SX 0x40 /* ... byte count in XER */ -#define HARD 0x80 /* string, stwcx. */ #define E4 0x40 /* SPE endianness is word */ #define E8 0x80 /* SPE endianness is double word */ -#define SPLT 0x80 /* VSX SPLAT load */ - -/* DSISR bits reported for a DCBZ instruction: */ -#define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */ - -/* - * The PowerPC stores certain bits of the instruction that caused the - * alignment exception in the DSISR register. This array maps those - * bits to information about the operand length and what the - * instruction would do. - */ -static struct aligninfo aligninfo[128] = { - { 4, LD }, /* 00 0 0000: lwz / lwarx */ - INVALID, /* 00 0 0001 */ - { 4, ST }, /* 00 0 0010: stw */ - INVALID, /* 00 0 0011 */ - { 2, LD }, /* 00 0 0100: lhz */ - { 2, LD+SE }, /* 00 0 0101: lha */ - { 2, ST }, /* 00 0 0110: sth */ - { 4, LD+M }, /* 00 0 0111: lmw */ - { 4, LD+F+S }, /* 00 0 1000: lfs */ - { 8, LD+F }, /* 00 0 1001: lfd */ - { 4, ST+F+S }, /* 00 0 1010: stfs */ - { 8, ST+F }, /* 00 0 1011: stfd */ - { 16, LD }, /* 00 0 1100: lq */ - { 8, LD }, /* 00 0 1101: ld/ldu/lwa */ - INVALID, /* 00 0 1110 */ - { 8, ST }, /* 00 0 1111: std/stdu */ - { 4, LD+U }, /* 00 1 0000: lwzu */ - INVALID, /* 00 1 0001 */ - { 4, ST+U }, /* 00 1 0010: stwu */ - INVALID, /* 00 1 0011 */ - { 2, LD+U }, /* 00 1 0100: lhzu */ - { 2, LD+SE+U }, /* 00 1 0101: lhau */ - { 2, ST+U }, /* 00 1 0110: sthu */ - { 4, ST+M }, /* 00 1 0111: stmw */ - { 4, LD+F+S+U }, /* 00 1 1000: lfsu */ - { 8, LD+F+U }, /* 00 1 1001: lfdu */ - { 4, ST+F+S+U }, /* 00 1 1010: stfsu */ - { 8, ST+F+U }, /* 00 1 1011: stfdu */ - { 16, LD+F }, /* 00 1 1100: lfdp */ - INVALID, /* 00 1 1101 */ - { 16, ST+F }, /* 00 1 1110: stfdp */ - INVALID, /* 00 1 1111 */ - { 8, LD }, /* 01 0 0000: ldx */ - INVALID, /* 01 0 0001 */ - { 8, ST }, /* 01 0 0010: stdx */ - INVALID, /* 01 0 0011 */ - INVALID, /* 01 0 0100 */ - { 4, LD+SE }, /* 01 0 0101: lwax */ - INVALID, /* 01 0 0110 */ - INVALID, /* 01 0 0111 */ - { 4, LD+M+HARD+SX }, /* 01 0 1000: lswx */ - { 4, LD+M+HARD }, /* 01 0 1001: lswi */ - { 4, ST+M+HARD+SX }, /* 01 0 1010: stswx */ - { 4, ST+M+HARD }, /* 01 0 1011: stswi */ - INVALID, /* 01 0 1100 */ - { 8, LD+U }, /* 01 0 1101: ldu */ - INVALID, /* 01 0 1110 */ - { 8, ST+U }, /* 01 0 1111: stdu */ - { 8, LD+U }, /* 01 1 0000: ldux */ - INVALID, /* 01 1 0001 */ - { 8, ST+U }, /* 01 1 0010: stdux */ - INVALID, /* 01 1 0011 */ - INVALID, /* 01 1 0100 */ - { 4, LD+SE+U }, /* 01 1 0101: lwaux */ - INVALID, /* 01 1 0110 */ - INVALID, /* 01 1 0111 */ - INVALID, /* 01 1 1000 */ - INVALID, /* 01 1 1001 */ - INVALID, /* 01 1 1010 */ - INVALID, /* 01 1 1011 */ - INVALID, /* 01 1 1100 */ - INVALID, /* 01 1 1101 */ - INVALID, /* 01 1 1110 */ - INVALID, /* 01 1 1111 */ - INVALID, /* 10 0 0000 */ - INVALID, /* 10 0 0001 */ - INVALID, /* 10 0 0010: stwcx. */ - INVALID, /* 10 0 0011 */ - INVALID, /* 10 0 0100 */ - INVALID, /* 10 0 0101 */ - INVALID, /* 10 0 0110 */ - INVALID, /* 10 0 0111 */ - { 4, LD+SW }, /* 10 0 1000: lwbrx */ - INVALID, /* 10 0 1001 */ - { 4, ST+SW }, /* 10 0 1010: stwbrx */ - INVALID, /* 10 0 1011 */ - { 2, LD+SW }, /* 10 0 1100: lhbrx */ - { 4, LD+SE }, /* 10 0 1101 lwa */ - { 2, ST+SW }, /* 10 0 1110: sthbrx */ - { 16, ST }, /* 10 0 1111: stq */ - INVALID, /* 10 1 0000 */ - INVALID, /* 10 1 0001 */ - INVALID, /* 10 1 0010 */ - INVALID, /* 10 1 0011 */ - INVALID, /* 10 1 0100 */ - INVALID, /* 10 1 0101 */ - INVALID, /* 10 1 0110 */ - INVALID, /* 10 1 0111 */ - INVALID, /* 10 1 1000 */ - INVALID, /* 10 1 1001 */ - INVALID, /* 10 1 1010 */ - INVALID, /* 10 1 1011 */ - INVALID, /* 10 1 1100 */ - INVALID, /* 10 1 1101 */ - INVALID, /* 10 1 1110 */ - { 0, ST+HARD }, /* 10 1 1111: dcbz */ - { 4, LD }, /* 11 0 0000: lwzx */ - INVALID, /* 11 0 0001 */ - { 4, ST }, /* 11 0 0010: stwx */ - INVALID, /* 11 0 0011 */ - { 2, LD }, /* 11 0 0100: lhzx */ - { 2, LD+SE }, /* 11 0 0101: lhax */ - { 2, ST }, /* 11 0 0110: sthx */ - INVALID, /* 11 0 0111 */ - { 4, LD+F+S }, /* 11 0 1000: lfsx */ - { 8, LD+F }, /* 11 0 1001: lfdx */ - { 4, ST+F+S }, /* 11 0 1010: stfsx */ - { 8, ST+F }, /* 11 0 1011: stfdx */ - { 16, LD+F }, /* 11 0 1100: lfdpx */ - { 4, LD+F+SE }, /* 11 0 1101: lfiwax */ - { 16, ST+F }, /* 11 0 1110: stfdpx */ - { 4, ST+F }, /* 11 0 1111: stfiwx */ - { 4, LD+U }, /* 11 1 0000: lwzux */ - INVALID, /* 11 1 0001 */ - { 4, ST+U }, /* 11 1 0010: stwux */ - INVALID, /* 11 1 0011 */ - { 2, LD+U }, /* 11 1 0100: lhzux */ - { 2, LD+SE+U }, /* 11 1 0101: lhaux */ - { 2, ST+U }, /* 11 1 0110: sthux */ - INVALID, /* 11 1 0111 */ - { 4, LD+F+S+U }, /* 11 1 1000: lfsux */ - { 8, LD+F+U }, /* 11 1 1001: lfdux */ - { 4, ST+F+S+U }, /* 11 1 1010: stfsux */ - { 8, ST+F+U }, /* 11 1 1011: stfdux */ - INVALID, /* 11 1 1100 */ - { 4, LD+F }, /* 11 1 1101: lfiwzx */ - INVALID, /* 11 1 1110 */ - INVALID, /* 11 1 1111 */ -}; - -/* - * The dcbz (data cache block zero) instruction - * gives an alignment fault if used on non-cacheable - * memory. We handle the fault mainly for the - * case when we are running with the cache disabled - * for debugging. - */ -static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr) -{ - long __user *p; - int i, size; - -#ifdef __powerpc64__ - size = ppc64_caches.l1d.block_size; -#else - size = L1_CACHE_BYTES; -#endif - p = (long __user *) (regs->dar & -size); - if (user_mode(regs) && !access_ok(VERIFY_WRITE, p, size)) - return -EFAULT; - for (i = 0; i < size / sizeof(long); ++i) - if (__put_user_inatomic(0, p+i)) - return -EFAULT; - return 1; -} - -/* - * Emulate load & store multiple instructions - * On 64-bit machines, these instructions only affect/use the - * bottom 4 bytes of each register, and the loads clear the - * top 4 bytes of the affected register. - */ -#ifdef __BIG_ENDIAN__ -#ifdef CONFIG_PPC64 -#define REG_BYTE(rp, i) *((u8 *)((rp) + ((i) >> 2)) + ((i) & 3) + 4) -#else -#define REG_BYTE(rp, i) *((u8 *)(rp) + (i)) -#endif -#else -#define REG_BYTE(rp, i) (*(((u8 *)((rp) + ((i)>>2)) + ((i)&3)))) -#endif - -#define SWIZ_PTR(p) ((unsigned char __user *)((p) ^ swiz)) - -static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr, - unsigned int reg, unsigned int nb, - unsigned int flags, unsigned int instr, - unsigned long swiz) -{ - unsigned long *rptr; - unsigned int nb0, i, bswiz; - unsigned long p; - - /* - * We do not try to emulate 8 bytes multiple as they aren't really - * available in our operating environments and we don't try to - * emulate multiples operations in kernel land as they should never - * be used/generated there at least not on unaligned boundaries - */ - if (unlikely((nb > 4) || !user_mode(regs))) - return 0; - - /* lmw, stmw, lswi/x, stswi/x */ - nb0 = 0; - if (flags & HARD) { - if (flags & SX) { - nb = regs->xer & 127; - if (nb == 0) - return 1; - } else { - unsigned long pc = regs->nip ^ (swiz & 4); - - if (__get_user_inatomic(instr, - (unsigned int __user *)pc)) - return -EFAULT; - if (swiz == 0 && (flags & SW)) - instr = cpu_to_le32(instr); - nb = (instr >> 11) & 0x1f; - if (nb == 0) - nb = 32; - } - if (nb + reg * 4 > 128) { - nb0 = nb + reg * 4 - 128; - nb = 128 - reg * 4; - } -#ifdef __LITTLE_ENDIAN__ - /* - * String instructions are endian neutral but the code - * below is not. Force byte swapping on so that the - * effects of swizzling are undone in the load/store - * loops below. - */ - flags ^= SW; -#endif - } else { - /* lwm, stmw */ - nb = (32 - reg) * 4; - } - - if (!access_ok((flags & ST ? VERIFY_WRITE: VERIFY_READ), addr, nb+nb0)) - return -EFAULT; /* bad address */ - - rptr = ®s->gpr[reg]; - p = (unsigned long) addr; - bswiz = (flags & SW)? 3: 0; - - if (!(flags & ST)) { - /* - * This zeroes the top 4 bytes of the affected registers - * in 64-bit mode, and also zeroes out any remaining - * bytes of the last register for lsw*. - */ - memset(rptr, 0, ((nb + 3) / 4) * sizeof(unsigned long)); - if (nb0 > 0) - memset(®s->gpr[0], 0, - ((nb0 + 3) / 4) * sizeof(unsigned long)); - - for (i = 0; i < nb; ++i, ++p) - if (__get_user_inatomic(REG_BYTE(rptr, i ^ bswiz), - SWIZ_PTR(p))) - return -EFAULT; - if (nb0 > 0) { - rptr = ®s->gpr[0]; - addr += nb; - for (i = 0; i < nb0; ++i, ++p) - if (__get_user_inatomic(REG_BYTE(rptr, - i ^ bswiz), - SWIZ_PTR(p))) - return -EFAULT; - } - - } else { - for (i = 0; i < nb; ++i, ++p) - if (__put_user_inatomic(REG_BYTE(rptr, i ^ bswiz), - SWIZ_PTR(p))) - return -EFAULT; - if (nb0 > 0) { - rptr = ®s->gpr[0]; - addr += nb; - for (i = 0; i < nb0; ++i, ++p) - if (__put_user_inatomic(REG_BYTE(rptr, - i ^ bswiz), - SWIZ_PTR(p))) - return -EFAULT; - } - } - return 1; -} - -/* - * Emulate floating-point pair loads and stores. - * Only POWER6 has these instructions, and it does true little-endian, - * so we don't need the address swizzling. - */ -static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg, - unsigned int flags) -{ - char *ptr0 = (char *) ¤t->thread.TS_FPR(reg); - char *ptr1 = (char *) ¤t->thread.TS_FPR(reg+1); - int i, ret, sw = 0; - - if (reg & 1) - return 0; /* invalid form: FRS/FRT must be even */ - if (flags & SW) - sw = 7; - ret = 0; - for (i = 0; i < 8; ++i) { - if (!(flags & ST)) { - ret |= __get_user(ptr0[i^sw], addr + i); - ret |= __get_user(ptr1[i^sw], addr + i + 8); - } else { - ret |= __put_user(ptr0[i^sw], addr + i); - ret |= __put_user(ptr1[i^sw], addr + i + 8); - } - } - if (ret) - return -EFAULT; - return 1; /* exception handled and fixed up */ -} - -#ifdef CONFIG_PPC64 -static int emulate_lq_stq(struct pt_regs *regs, unsigned char __user *addr, - unsigned int reg, unsigned int flags) -{ - char *ptr0 = (char *)®s->gpr[reg]; - char *ptr1 = (char *)®s->gpr[reg+1]; - int i, ret, sw = 0; - - if (reg & 1) - return 0; /* invalid form: GPR must be even */ - if (flags & SW) - sw = 7; - ret = 0; - for (i = 0; i < 8; ++i) { - if (!(flags & ST)) { - ret |= __get_user(ptr0[i^sw], addr + i); - ret |= __get_user(ptr1[i^sw], addr + i + 8); - } else { - ret |= __put_user(ptr0[i^sw], addr + i); - ret |= __put_user(ptr1[i^sw], addr + i + 8); - } - } - if (ret) - return -EFAULT; - return 1; /* exception handled and fixed up */ -} -#endif /* CONFIG_PPC64 */ #ifdef CONFIG_SPE @@ -636,133 +282,21 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, } #endif /* CONFIG_SPE */ -#ifdef CONFIG_VSX -/* - * Emulate VSX instructions... - */ -static int emulate_vsx(unsigned char __user *addr, unsigned int reg, - unsigned int areg, struct pt_regs *regs, - unsigned int flags, unsigned int length, - unsigned int elsize) -{ - char *ptr; - unsigned long *lptr; - int ret = 0; - int sw = 0; - int i, j; - - /* userland only */ - if (unlikely(!user_mode(regs))) - return 0; - - flush_vsx_to_thread(current); - - if (reg < 32) - ptr = (char *) ¤t->thread.fp_state.fpr[reg][0]; - else - ptr = (char *) ¤t->thread.vr_state.vr[reg - 32]; - - lptr = (unsigned long *) ptr; - -#ifdef __LITTLE_ENDIAN__ - if (flags & SW) { - elsize = length; - sw = length-1; - } else { - /* - * The elements are BE ordered, even in LE mode, so process - * them in reverse order. - */ - addr += length - elsize; - - /* 8 byte memory accesses go in the top 8 bytes of the VR */ - if (length == 8) - ptr += 8; - } -#else - if (flags & SW) - sw = elsize-1; -#endif - - for (j = 0; j < length; j += elsize) { - for (i = 0; i < elsize; ++i) { - if (flags & ST) - ret |= __put_user(ptr[i^sw], addr + i); - else - ret |= __get_user(ptr[i^sw], addr + i); - } - ptr += elsize; -#ifdef __LITTLE_ENDIAN__ - addr -= elsize; -#else - addr += elsize; -#endif - } - -#ifdef __BIG_ENDIAN__ -#define VSX_HI 0 -#define VSX_LO 1 -#else -#define VSX_HI 1 -#define VSX_LO 0 -#endif - - if (!ret) { - if (flags & U) - regs->gpr[areg] = regs->dar; - - /* Splat load copies the same data to top and bottom 8 bytes */ - if (flags & SPLT) - lptr[VSX_LO] = lptr[VSX_HI]; - /* For 8 byte loads, zero the low 8 bytes */ - else if (!(flags & ST) && (8 == length)) - lptr[VSX_LO] = 0; - } else - return -EFAULT; - - return 1; -} -#endif - /* * Called on alignment exception. Attempts to fixup * * Return 1 on success * Return 0 if unable to handle the interrupt * Return -EFAULT if data address is bad + * Other negative return values indicate that the instruction can't + * be emulated, and the process should be given a SIGBUS. */ int fix_alignment(struct pt_regs *regs) { - unsigned int instr, nb, flags, instruction = 0; - unsigned int reg, areg; - unsigned int dsisr; - unsigned char __user *addr; - unsigned long p, swiz; - int ret, i; - union data { - u64 ll; - double dd; - unsigned char v[8]; - struct { -#ifdef __LITTLE_ENDIAN__ - int low32; - unsigned hi32; -#else - unsigned hi32; - int low32; -#endif - } x32; - struct { -#ifdef __LITTLE_ENDIAN__ - short low16; - unsigned char hi48[6]; -#else - unsigned char hi48[6]; - short low16; -#endif - } x16; - } data; + unsigned int instr; + struct instruction_op op; + int r, type; /* * We require a complete register set, if not, then our assembly @@ -770,121 +304,23 @@ int fix_alignment(struct pt_regs *regs) */ CHECK_FULL_REGS(regs); - dsisr = regs->dsisr; - - /* Some processors don't provide us with a DSISR we can use here, - * let's make one up from the instruction - */ - if (cpu_has_feature(CPU_FTR_NODSISRALIGN)) { - unsigned long pc = regs->nip; - - if (cpu_has_feature(CPU_FTR_PPC_LE) && (regs->msr & MSR_LE)) - pc ^= 4; - if (unlikely(__get_user_inatomic(instr, - (unsigned int __user *)pc))) - return -EFAULT; - if (cpu_has_feature(CPU_FTR_REAL_LE) && (regs->msr & MSR_LE)) - instr = cpu_to_le32(instr); - dsisr = make_dsisr(instr); - instruction = instr; + if (unlikely(__get_user(instr, (unsigned int __user *)regs->nip))) + return -EFAULT; + if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) { + /* We don't handle PPC little-endian any more... */ + if (cpu_has_feature(CPU_FTR_PPC_LE)) + return -EIO; + instr = swab32(instr); } - /* extract the operation and registers from the dsisr */ - reg = (dsisr >> 5) & 0x1f; /* source/dest register */ - areg = dsisr & 0x1f; /* register to update */ - #ifdef CONFIG_SPE if ((instr >> 26) == 0x4) { + int reg = (instr >> 21) & 0x1f; PPC_WARN_ALIGNMENT(spe, regs); return emulate_spe(regs, reg, instr); } #endif - instr = (dsisr >> 10) & 0x7f; - instr |= (dsisr >> 13) & 0x60; - - /* Lookup the operation in our table */ - nb = aligninfo[instr].len; - flags = aligninfo[instr].flags; - - /* - * Handle some cases which give overlaps in the DSISR values. - */ - if (IS_XFORM(instruction)) { - switch (get_xop(instruction)) { - case 532: /* ldbrx */ - nb = 8; - flags = LD+SW; - break; - case 660: /* stdbrx */ - nb = 8; - flags = ST+SW; - break; - case 20: /* lwarx */ - case 84: /* ldarx */ - case 116: /* lharx */ - case 276: /* lqarx */ - return 0; /* not emulated ever */ - } - } - - /* Byteswap little endian loads and stores */ - swiz = 0; - if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) { - flags ^= SW; -#ifdef __BIG_ENDIAN__ - /* - * So-called "PowerPC little endian" mode works by - * swizzling addresses rather than by actually doing - * any byte-swapping. To emulate this, we XOR each - * byte address with 7. We also byte-swap, because - * the processor's address swizzling depends on the - * operand size (it xors the address with 7 for bytes, - * 6 for halfwords, 4 for words, 0 for doublewords) but - * we will xor with 7 and load/store each byte separately. - */ - if (cpu_has_feature(CPU_FTR_PPC_LE)) - swiz = 7; -#endif - } - - /* DAR has the operand effective address */ - addr = (unsigned char __user *)regs->dar; - -#ifdef CONFIG_VSX - if ((instruction & 0xfc00003e) == 0x7c000018) { - unsigned int elsize; - - /* Additional register addressing bit (64 VSX vs 32 FPR/GPR) */ - reg |= (instruction & 0x1) << 5; - /* Simple inline decoder instead of a table */ - /* VSX has only 8 and 16 byte memory accesses */ - nb = 8; - if (instruction & 0x200) - nb = 16; - - /* Vector stores in little-endian mode swap individual - elements, so process them separately */ - elsize = 4; - if (instruction & 0x80) - elsize = 8; - - flags = 0; - if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) - flags |= SW; - if (instruction & 0x100) - flags |= ST; - if (instruction & 0x040) - flags |= U; - /* splat load needs a special decoder */ - if ((instruction & 0x400) == 0){ - flags |= SPLT; - nb = 8; - } - PPC_WARN_ALIGNMENT(vsx, regs); - return emulate_vsx(addr, reg, areg, regs, flags, nb, elsize); - } -#endif /* * ISA 3.0 (such as P9) copy, copy_first, paste and paste_last alignment @@ -896,173 +332,27 @@ int fix_alignment(struct pt_regs *regs) * when pasting to a co-processor. Furthermore, paste_last is the * synchronisation point for preceding copy/paste sequences. */ - if ((instruction & 0xfc0006fe) == PPC_INST_COPY) + if ((instr & 0xfc0006fe) == PPC_INST_COPY) return -EIO; - /* A size of 0 indicates an instruction we don't support, with - * the exception of DCBZ which is handled as a special case here - */ - if (instr == DCBZ) { - PPC_WARN_ALIGNMENT(dcbz, regs); - return emulate_dcbz(regs, addr); - } - if (unlikely(nb == 0)) - return 0; - - /* Load/Store Multiple instructions are handled in their own - * function - */ - if (flags & M) { - PPC_WARN_ALIGNMENT(multiple, regs); - return emulate_multiple(regs, addr, reg, nb, - flags, instr, swiz); - } - - /* Verify the address of the operand */ - if (unlikely(user_mode(regs) && - !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ), - addr, nb))) - return -EFAULT; - - /* Force the fprs into the save area so we can reference them */ - if (flags & F) { - /* userland only */ - if (unlikely(!user_mode(regs))) - return 0; - flush_fp_to_thread(current); - } + r = analyse_instr(&op, regs, instr); + if (r < 0) + return -EINVAL; - if (nb == 16) { - if (flags & F) { - /* Special case for 16-byte FP loads and stores */ - PPC_WARN_ALIGNMENT(fp_pair, regs); - return emulate_fp_pair(addr, reg, flags); - } else { -#ifdef CONFIG_PPC64 - /* Special case for 16-byte loads and stores */ - PPC_WARN_ALIGNMENT(lq_stq, regs); - return emulate_lq_stq(regs, addr, reg, flags); -#else - return 0; -#endif - } - } - - PPC_WARN_ALIGNMENT(unaligned, regs); - - /* If we are loading, get the data from user space, else - * get it from register values - */ - if (!(flags & ST)) { - unsigned int start = 0; - - switch (nb) { - case 4: - start = offsetof(union data, x32.low32); - break; - case 2: - start = offsetof(union data, x16.low16); - break; - } - - data.ll = 0; - ret = 0; - p = (unsigned long)addr; - - for (i = 0; i < nb; i++) - ret |= __get_user_inatomic(data.v[start + i], - SWIZ_PTR(p++)); - - if (unlikely(ret)) - return -EFAULT; - - } else if (flags & F) { - data.ll = current->thread.TS_FPR(reg); - if (flags & S) { - /* Single-precision FP store requires conversion... */ -#ifdef CONFIG_PPC_FPU - preempt_disable(); - enable_kernel_fp(); - cvt_df(&data.dd, (float *)&data.x32.low32); - disable_kernel_fp(); - preempt_enable(); -#else - return 0; -#endif - } - } else - data.ll = regs->gpr[reg]; - - if (flags & SW) { - switch (nb) { - case 8: - data.ll = swab64(data.ll); - break; - case 4: - data.x32.low32 = swab32(data.x32.low32); - break; - case 2: - data.x16.low16 = swab16(data.x16.low16); - break; - } - } - - /* Perform other misc operations like sign extension - * or floating point single precision conversion - */ - switch (flags & ~(U|SW)) { - case LD+SE: /* sign extending integer loads */ - case LD+F+SE: /* sign extend for lfiwax */ - if ( nb == 2 ) - data.ll = data.x16.low16; - else /* nb must be 4 */ - data.ll = data.x32.low32; - break; - - /* Single-precision FP load requires conversion... */ - case LD+F+S: -#ifdef CONFIG_PPC_FPU - preempt_disable(); - enable_kernel_fp(); - cvt_fd((float *)&data.x32.low32, &data.dd); - disable_kernel_fp(); - preempt_enable(); -#else - return 0; -#endif - break; + type = op.type & INSTR_TYPE_MASK; + if (!OP_IS_LOAD_STORE(type)) { + if (type != CACHEOP + DCBZ) + return -EINVAL; + PPC_WARN_ALIGNMENT(dcbz, regs); + r = emulate_dcbz(op.ea, regs); + } else { + if (type == LARX || type == STCX) + return -EIO; + PPC_WARN_ALIGNMENT(unaligned, regs); + r = emulate_loadstore(regs, &op); } - /* Store result to memory or update registers */ - if (flags & ST) { - unsigned int start = 0; - - switch (nb) { - case 4: - start = offsetof(union data, x32.low32); - break; - case 2: - start = offsetof(union data, x16.low16); - break; - } - - ret = 0; - p = (unsigned long)addr; - - for (i = 0; i < nb; i++) - ret |= __put_user_inatomic(data.v[start + i], - SWIZ_PTR(p++)); - - if (unlikely(ret)) - return -EFAULT; - } else if (flags & F) - current->thread.TS_FPR(reg) = data.ll; - else - regs->gpr[reg] = data.ll; - - /* Update RA as needed */ - if (flags & U) - regs->gpr[areg] = regs->dar; - - return 1; + if (!r) + return 1; + return r; } diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6e95c2c19a7e..8cfb20e38cfe 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -746,6 +746,14 @@ int main(void) OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask); OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas); OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr); +#define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f) + STOP_SPR(STOP_PID, pid); + STOP_SPR(STOP_LDBAR, ldbar); + STOP_SPR(STOP_FSCR, fscr); + STOP_SPR(STOP_HFSCR, hfscr); + STOP_SPR(STOP_MMCR1, mmcr1); + STOP_SPR(STOP_MMCR2, mmcr2); + STOP_SPR(STOP_MMCRA, mmcra); #endif DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 8275858a434d..3f46ca1c59f9 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -253,7 +253,7 @@ int __init btext_find_display(int allow_nonstdout) for_each_node_by_type(np, "display") { if (of_get_property(np, "linux,opened", NULL)) { - printk("trying %s ...\n", np->full_name); + printk("trying %pOF ...\n", np); rc = btext_initialize(np); printk("result: %d\n", rc); } diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index c641983bbdd6..a8f20e5928e1 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -167,10 +167,10 @@ static void release_cache_debugcheck(struct cache *cache) list_for_each_entry(iter, &cache_list, list) WARN_ONCE(iter->next_local == cache, - "cache for %s(%s) refers to cache for %s(%s)\n", - iter->ofnode->full_name, + "cache for %pOF(%s) refers to cache for %pOF(%s)\n", + iter->ofnode, cache_type_string(iter), - cache->ofnode->full_name, + cache->ofnode, cache_type_string(cache)); } @@ -179,8 +179,8 @@ static void release_cache(struct cache *cache) if (!cache) return; - pr_debug("freeing L%d %s cache for %s\n", cache->level, - cache_type_string(cache), cache->ofnode->full_name); + pr_debug("freeing L%d %s cache for %pOF\n", cache->level, + cache_type_string(cache), cache->ofnode); release_cache_debugcheck(cache); list_del(&cache->list); @@ -194,8 +194,8 @@ static void cache_cpu_set(struct cache *cache, int cpu) while (next) { WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map), - "CPU %i already accounted in %s(%s)\n", - cpu, next->ofnode->full_name, + "CPU %i already accounted in %pOF(%s)\n", + cpu, next->ofnode, cache_type_string(next)); cpumask_set_cpu(cpu, &next->shared_cpu_map); next = next->next_local; @@ -355,7 +355,7 @@ static int cache_is_unified_d(const struct device_node *np) */ static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level) { - pr_debug("creating L%d ucache for %s\n", level, node->full_name); + pr_debug("creating L%d ucache for %pOF\n", level, node); return new_cache(cache_is_unified_d(node), level, node); } @@ -365,8 +365,8 @@ static struct cache *cache_do_one_devnode_split(struct device_node *node, { struct cache *dcache, *icache; - pr_debug("creating L%d dcache and icache for %s\n", level, - node->full_name); + pr_debug("creating L%d dcache and icache for %pOF\n", level, + node); dcache = new_cache(CACHE_TYPE_DATA, level, node); icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node); @@ -679,7 +679,6 @@ static struct kobj_type cache_index_type = { static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) { - const char *cache_name; const char *cache_type; struct cache *cache; char *buf; @@ -690,7 +689,6 @@ static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) return; cache = dir->cache; - cache_name = cache->ofnode->full_name; cache_type = cache_type_string(cache); /* We don't want to create an attribute that can't provide a @@ -707,14 +705,14 @@ static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) rc = attr->show(&dir->kobj, attr, buf); if (rc <= 0) { pr_debug("not creating %s attribute for " - "%s(%s) (rc = %zd)\n", - attr->attr.name, cache_name, + "%pOF(%s) (rc = %zd)\n", + attr->attr.name, cache->ofnode, cache_type, rc); continue; } if (sysfs_create_file(&dir->kobj, &attr->attr)) - pr_debug("could not create %s attribute for %s(%s)\n", - attr->attr.name, cache_name, cache_type); + pr_debug("could not create %s attribute for %pOF(%s)\n", + attr->attr.name, cache->ofnode, cache_type); } kfree(buf); @@ -831,8 +829,8 @@ static void cache_cpu_clear(struct cache *cache, int cpu) struct cache *next = cache->next_local; WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map), - "CPU %i not accounted in %s(%s)\n", - cpu, cache->ofnode->full_name, + "CPU %i not accounted in %pOF(%s)\n", + cpu, cache->ofnode, cache_type_string(cache)); cpumask_clear_cpu(cpu, &cache->shared_cpu_map); diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 6f849832a669..760872916013 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1259,10 +1259,10 @@ static struct cpu_spec __initdata cpu_specs[] = { .platform = "ppc603", }, #endif /* CONFIG_PPC_BOOK3S_32 */ -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx { /* 8xx */ .pvr_mask = 0xffff0000, - .pvr_value = 0x00500000, + .pvr_value = PVR_8xx, .cpu_name = "8xx", /* CPU_FTR_MAYBE_CAN_DOZE is possible, * if the 8xx code is there.... */ @@ -1274,7 +1274,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_8xx, .platform = "ppc823", }, -#endif /* CONFIG_8xx */ +#endif /* CONFIG_PPC_8xx */ #ifdef CONFIG_40x { /* 403GC */ .pvr_mask = 0xffffff00, @@ -1936,6 +1936,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_440A, .platform = "ppc440", }, +#ifdef CONFIG_PPC_47x { /* 476 DD2 core */ .pvr_mask = 0xffffffff, .pvr_value = 0x11a52080, @@ -1992,6 +1993,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_47x, .platform = "ppc470", }, +#endif /* CONFIG_PPC_47x */ { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 63992b2d8e15..9e816787c0d4 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -44,6 +44,7 @@ #include <asm/machdep.h> #include <asm/ppc-pci.h> #include <asm/rtas.h> +#include <asm/pte-walk.h> /** Overview: @@ -169,10 +170,10 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) char buffer[128]; n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", - edev->phb->global_number, pdn->busno, + pdn->phb->global_number, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n", - edev->phb->global_number, pdn->busno, + pdn->phb->global_number, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); @@ -352,8 +353,7 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) * worried about _PAGE_SPLITTING/collapse. Also we will not hit * page table free, because of init_mm. */ - ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token, - NULL, &hugepage_shift); + ptep = find_init_mm_pte(token, &hugepage_shift); if (!ptep) return token; WARN_ON(hugepage_shift); @@ -435,7 +435,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev) int ret; int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); unsigned long flags; - struct pci_dn *pdn; + struct device_node *dn; struct pci_dev *dev; struct eeh_pe *pe, *parent_pe, *phb_pe; int rc = 0; @@ -493,9 +493,10 @@ int eeh_dev_check_failure(struct eeh_dev *edev) if (pe->state & EEH_PE_ISOLATED) { pe->check_count++; if (pe->check_count % EEH_MAX_FAILS == 0) { - pdn = eeh_dev_to_pdn(edev); - if (pdn->node) - location = of_get_property(pdn->node, "ibm,loc-code", NULL); + dn = pci_device_to_OF_node(dev); + if (dn) + location = of_get_property(dn, "ibm,loc-code", + NULL); printk(KERN_ERR "EEH: %d reads ignored for recovering device at " "location=%s driver=%s pci addr=%s\n", pe->check_count, @@ -1064,7 +1065,7 @@ core_initcall_sync(eeh_init); */ void eeh_add_device_early(struct pci_dn *pdn) { - struct pci_controller *phb; + struct pci_controller *phb = pdn ? pdn->phb : NULL; struct eeh_dev *edev = pdn_to_eeh_dev(pdn); if (!edev) @@ -1074,7 +1075,6 @@ void eeh_add_device_early(struct pci_dn *pdn) return; /* USB Bus children of PCI devices will not have BUID's */ - phb = edev->phb; if (NULL == phb || (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid)) return; diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c index d6b2ca70d14d..ad04ecd63c20 100644 --- a/arch/powerpc/kernel/eeh_dev.c +++ b/arch/powerpc/kernel/eeh_dev.c @@ -50,21 +50,16 @@ */ struct eeh_dev *eeh_dev_init(struct pci_dn *pdn) { - struct pci_controller *phb = pdn->phb; struct eeh_dev *edev; /* Allocate EEH device */ edev = kzalloc(sizeof(*edev), GFP_KERNEL); - if (!edev) { - pr_warn("%s: out of memory\n", - __func__); + if (!edev) return NULL; - } /* Associate EEH device with OF node */ pdn->edev = edev; edev->pdn = pdn; - edev->phb = phb; INIT_LIST_HEAD(&edev->list); INIT_LIST_HEAD(&edev->rmv_list); diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index c405c79e50cd..8b840191df59 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -428,7 +428,7 @@ static void *eeh_add_virt_device(void *data, void *userdata) if (!(edev->physfn)) { pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n", - __func__, edev->phb->global_number, pdn->busno, + __func__, pdn->phb->global_number, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); return NULL; } diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index cc4b206f77e4..2e8d1b2b5af4 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -230,10 +230,15 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root, * Bus/Device/Function number. The extra data referred by flag * indicates which type of address should be used. */ +struct eeh_pe_get_flag { + int pe_no; + int config_addr; +}; + static void *__eeh_pe_get(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; - struct eeh_dev *edev = (struct eeh_dev *)flag; + struct eeh_pe_get_flag *tmp = (struct eeh_pe_get_flag *) flag; /* Unexpected PHB PE */ if (pe->type & EEH_PE_PHB) @@ -244,17 +249,17 @@ static void *__eeh_pe_get(void *data, void *flag) * have non-zero PE address */ if (eeh_has_flag(EEH_VALID_PE_ZERO)) { - if (edev->pe_config_addr == pe->addr) + if (tmp->pe_no == pe->addr) return pe; } else { - if (edev->pe_config_addr && - (edev->pe_config_addr == pe->addr)) + if (tmp->pe_no && + (tmp->pe_no == pe->addr)) return pe; } /* Try BDF address */ - if (edev->config_addr && - (edev->config_addr == pe->config_addr)) + if (tmp->config_addr && + (tmp->config_addr == pe->config_addr)) return pe; return NULL; @@ -262,7 +267,9 @@ static void *__eeh_pe_get(void *data, void *flag) /** * eeh_pe_get - Search PE based on the given address - * @edev: EEH device + * @phb: PCI controller + * @pe_no: PE number + * @config_addr: Config address * * Search the corresponding PE based on the specified address which * is included in the eeh device. The function is used to check if @@ -271,12 +278,14 @@ static void *__eeh_pe_get(void *data, void *flag) * which is composed of PCI bus/device/function number, or unified * PE address. */ -struct eeh_pe *eeh_pe_get(struct eeh_dev *edev) +struct eeh_pe *eeh_pe_get(struct pci_controller *phb, + int pe_no, int config_addr) { - struct eeh_pe *root = eeh_phb_pe_get(edev->phb); + struct eeh_pe *root = eeh_phb_pe_get(phb); + struct eeh_pe_get_flag tmp = { pe_no, config_addr }; struct eeh_pe *pe; - pe = eeh_pe_traverse(root, __eeh_pe_get, edev); + pe = eeh_pe_traverse(root, __eeh_pe_get, &tmp); return pe; } @@ -330,11 +339,13 @@ static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev) int eeh_add_to_parent_pe(struct eeh_dev *edev) { struct eeh_pe *pe, *parent; + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + int config_addr = (pdn->busno << 8) | (pdn->devfn); /* Check if the PE number is valid */ if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) { pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%x\n", - __func__, edev->config_addr, edev->phb->global_number); + __func__, config_addr, pdn->phb->global_number); return -EINVAL; } @@ -344,7 +355,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) * PE should be composed of PCI bus and its subordinate * components. */ - pe = eeh_pe_get(edev); + pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, config_addr); if (pe && !(pe->type & EEH_PE_INVALID)) { /* Mark the PE as type of PCI bus */ pe->type = EEH_PE_BUS; @@ -353,11 +364,11 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) /* Put the edev to PE */ list_add_tail(&edev->list, &pe->edevs); pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n", - edev->phb->global_number, - edev->config_addr >> 8, - PCI_SLOT(edev->config_addr & 0xFF), - PCI_FUNC(edev->config_addr & 0xFF), - pe->addr); + pdn->phb->global_number, + pdn->busno, + PCI_SLOT(pdn->devfn), + PCI_FUNC(pdn->devfn), + pe->addr); return 0; } else if (pe && (pe->type & EEH_PE_INVALID)) { list_add_tail(&edev->list, &pe->edevs); @@ -376,25 +387,25 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) pr_debug("EEH: Add %04x:%02x:%02x.%01x to Device " "PE#%x, Parent PE#%x\n", - edev->phb->global_number, - edev->config_addr >> 8, - PCI_SLOT(edev->config_addr & 0xFF), - PCI_FUNC(edev->config_addr & 0xFF), - pe->addr, pe->parent->addr); + pdn->phb->global_number, + pdn->busno, + PCI_SLOT(pdn->devfn), + PCI_FUNC(pdn->devfn), + pe->addr, pe->parent->addr); return 0; } /* Create a new EEH PE */ if (edev->physfn) - pe = eeh_pe_alloc(edev->phb, EEH_PE_VF); + pe = eeh_pe_alloc(pdn->phb, EEH_PE_VF); else - pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE); + pe = eeh_pe_alloc(pdn->phb, EEH_PE_DEVICE); if (!pe) { pr_err("%s: out of memory!\n", __func__); return -ENOMEM; } pe->addr = edev->pe_config_addr; - pe->config_addr = edev->config_addr; + pe->config_addr = config_addr; /* * Put the new EEH PE into hierarchy tree. If the parent @@ -404,10 +415,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) */ parent = eeh_pe_get_parent(edev); if (!parent) { - parent = eeh_phb_pe_get(edev->phb); + parent = eeh_phb_pe_get(pdn->phb); if (!parent) { pr_err("%s: No PHB PE is found (PHB Domain=%d)\n", - __func__, edev->phb->global_number); + __func__, pdn->phb->global_number); edev->pe = NULL; kfree(pe); return -EEXIST; @@ -424,10 +435,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) edev->pe = pe; pr_debug("EEH: Add %04x:%02x:%02x.%01x to " "Device PE#%x, Parent PE#%x\n", - edev->phb->global_number, - edev->config_addr >> 8, - PCI_SLOT(edev->config_addr & 0xFF), - PCI_FUNC(edev->config_addr & 0xFF), + pdn->phb->global_number, + pdn->busno, + PCI_SLOT(pdn->devfn), + PCI_FUNC(pdn->devfn), pe->addr, pe->parent->addr); return 0; @@ -446,13 +457,14 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev) { struct eeh_pe *pe, *parent, *child; int cnt; + struct pci_dn *pdn = eeh_dev_to_pdn(edev); if (!edev->pe) { pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n", - __func__, edev->phb->global_number, - edev->config_addr >> 8, - PCI_SLOT(edev->config_addr & 0xFF), - PCI_FUNC(edev->config_addr & 0xFF)); + __func__, pdn->phb->global_number, + pdn->busno, + PCI_SLOT(pdn->devfn), + PCI_FUNC(pdn->devfn)); return -EEXIST; } @@ -712,10 +724,10 @@ static void eeh_bridge_check_link(struct eeh_dev *edev) return; pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n", - __func__, edev->phb->global_number, - edev->config_addr >> 8, - PCI_SLOT(edev->config_addr & 0xFF), - PCI_FUNC(edev->config_addr & 0xFF)); + __func__, pdn->phb->global_number, + pdn->busno, + PCI_SLOT(pdn->devfn), + PCI_FUNC(pdn->devfn)); /* Check slot status */ cap = edev->pcie_cap; diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index 1ceecdda810b..797549289798 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -51,7 +51,6 @@ static ssize_t eeh_show_##_name(struct device *dev, \ static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL); EEH_SHOW_ATTR(eeh_mode, mode, "0x%x"); -EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x"); EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x"); static ssize_t eeh_pe_state_show(struct device *dev, @@ -103,7 +102,6 @@ void eeh_sysfs_add_device(struct pci_dev *pdev) return; rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode); - rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr); rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_state); @@ -128,7 +126,6 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev) } device_remove_file(&pdev->dev, &dev_attr_eeh_mode); - device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 8587059ad848..e780e1fbf6c2 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -43,6 +43,13 @@ #define LOAD_MSR_KERNEL(r, x) li r,(x) #endif +/* + * Align to 4k in order to ensure that all functions modyfing srr0/srr1 + * fit into one page in order to not encounter a TLB miss between the + * modification of srr0/srr1 and the associated rfi. + */ + .align 12 + #ifdef CONFIG_BOOKE .globl mcheck_transfer_to_handler mcheck_transfer_to_handler: @@ -586,6 +593,10 @@ ppc_swapcontext: handle_page_fault: stw r4,_DAR(r1) addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_6xx + andis. r0,r5,DSISR_DABRMATCH@h + bne- handle_dabr_fault +#endif bl do_page_fault cmpwi r3,0 beq+ ret_from_except @@ -599,6 +610,17 @@ handle_page_fault: bl bad_page_fault b ret_from_except_full +#ifdef CONFIG_6xx + /* We have a data breakpoint exception - handle it */ +handle_dabr_fault: + SAVE_NVGPRS(r1) + lwz r0,_TRAP(r1) + clrrwi r0,r0,1 + stw r0,_TRAP(r1) + bl do_break + b ret_from_except_full +#endif + /* * This routine switches between two different tasks. The process * state of one is saved on its kernel stack. Then the state diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index e925c1c99c71..4a0fd4f40245 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -966,16 +966,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) #ifdef CONFIG_PPC_BOOK3E cmpwi cr0,r3,0x280 #else - BEGIN_FTR_SECTION - cmpwi cr0,r3,0xe80 - FTR_SECTION_ELSE - cmpwi cr0,r3,0xa00 - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) + cmpwi cr0,r3,0xa00 #endif /* CONFIG_PPC_BOOK3E */ bne 1f addi r3,r1,STACK_FRAME_OVERHEAD; bl doorbell_exception - b ret_from_except #endif /* CONFIG_PPC_DOORBELL */ 1: b ret_from_except /* What else to do here ? */ @@ -1109,7 +1104,7 @@ _ASM_NOKPROBE_SYMBOL(__enter_rtas) _ASM_NOKPROBE_SYMBOL(rtas_return_loc) .align 3 -1: .llong rtas_restore_regs +1: .8byte rtas_restore_regs rtas_restore_regs: /* relocation is on at this point */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f14f3c04ec7e..48da0f5d2f7f 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -541,7 +541,7 @@ EXC_COMMON_BEGIN(instruction_access_common) RECONCILE_IRQ_STATE(r10, r11) ld r12,_MSR(r1) ld r3,_NIP(r1) - andis. r4,r12,0x5820 + andis. r4,r12,DSISR_BAD_FAULT_64S@h li r5,0x400 std r3,_DAR(r1) std r4,_DSISR(r1) @@ -1314,7 +1314,7 @@ EXC_REAL_NONE(0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) #endif -#if defined(CONFIG_HARDLOCKUP_DETECTOR) && defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH) +#ifdef CONFIG_PPC_WATCHDOG #define MASKED_DEC_HANDLER_LABEL 3f @@ -1343,10 +1343,10 @@ EXC_COMMON_BEGIN(soft_nmi_common) ADD_NVGPRS;ADD_RECONCILE) b ret_from_except -#else +#else /* CONFIG_PPC_WATCHDOG */ #define MASKED_DEC_HANDLER_LABEL 2f /* normal return */ #define MASKED_DEC_HANDLER(_H) -#endif +#endif /* CONFIG_PPC_WATCHDOG */ /* * An interrupt came in while soft-disabled. We set paca->irq_happened, then: @@ -1370,19 +1370,16 @@ masked_##_H##interrupt: \ ori r10,r10,0xffff; \ mtspr SPRN_DEC,r10; \ b MASKED_DEC_HANDLER_LABEL; \ -1: cmpwi r10,PACA_IRQ_DBELL; \ - beq 2f; \ - cmpwi r10,PACA_IRQ_HMI; \ - beq 2f; \ +1: andi. r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI); \ + bne 2f; \ mfspr r10,SPRN_##_H##SRR1; \ - rldicl r10,r10,48,1; /* clear MSR_EE */ \ - rotldi r10,r10,16; \ + xori r10,r10,MSR_EE; /* clear MSR_EE */ \ mtspr SPRN_##_H##SRR1,r10; \ 2: mtcrf 0x80,r9; \ ld r9,PACA_EXGEN+EX_R9(r13); \ ld r10,PACA_EXGEN+EX_R10(r13); \ ld r11,PACA_EXGEN+EX_R11(r13); \ - GET_SCRATCH0(r13); \ + /* returns to kernel where r13 must be set up, so don't restore it */ \ ##_H##rfid; \ b .; \ MASKED_DEC_HANDLER(_H) @@ -1485,8 +1482,10 @@ USE_TEXT_SECTION() */ .balign IFETCH_ALIGN_BYTES do_hash_page: -#ifdef CONFIG_PPC_STD_MMU_64 - andis. r0,r4,0xa450 /* weird error? */ + #ifdef CONFIG_PPC_STD_MMU_64 + lis r0,DSISR_BAD_FAULT_64S@h + ori r0,r0,DSISR_BAD_FAULT_64S@l + and. r0,r4,r0 /* weird error? */ bne- handle_page_fault /* if not, try to insert a HPTE */ CURRENT_THREAD_INFO(r11, r1) lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ @@ -1669,25 +1668,27 @@ _GLOBAL(__replay_interrupt) * we don't give a damn about, so we don't bother storing them. */ mfmsr r12 - LOAD_REG_ADDR(r11, 1f) + LOAD_REG_ADDR(r11, replay_interrupt_return) mfcr r9 ori r12,r12,MSR_EE cmpwi r3,0x900 beq decrementer_common cmpwi r3,0x500 +BEGIN_FTR_SECTION + beq h_virt_irq_common +FTR_SECTION_ELSE beq hardware_interrupt_common +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300) BEGIN_FTR_SECTION - cmpwi r3,0xe80 + cmpwi r3,0xa00 beq h_doorbell_common_msgclr - cmpwi r3,0xea0 - beq h_virt_irq_common cmpwi r3,0xe60 beq hmi_exception_common FTR_SECTION_ELSE cmpwi r3,0xa00 beq doorbell_super_common_msgclr ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) -1: +replay_interrupt_return: blr _ASM_NOKPROBE_SYMBOL(__replay_interrupt) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index dc0c49cfd90a..e1431800bfb9 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -125,6 +125,13 @@ int is_fadump_boot_memory_area(u64 addr, ulong size) return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size; } +int should_fadump_crash(void) +{ + if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr) + return 0; + return 1; +} + int is_fadump_active(void) { return fw_dump.dump_active; @@ -518,7 +525,7 @@ void crash_fadump(struct pt_regs *regs, const char *str) struct fadump_crash_info_header *fdh = NULL; int old_cpu, this_cpu; - if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr) + if (!should_fadump_crash()) return; /* @@ -1446,6 +1453,25 @@ static void fadump_init_files(void) return; } +static int fadump_panic_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + /* + * If firmware-assisted dump has been registered then trigger + * firmware-assisted dump and let firmware handle everything + * else. If this returns, then fadump was not registered, so + * go through the rest of the panic path. + */ + crash_fadump(NULL, ptr); + + return NOTIFY_DONE; +} + +static struct notifier_block fadump_panic_block = { + .notifier_call = fadump_panic_event, + .priority = INT_MIN /* may not return; must be done last */ +}; + /* * Prepare for firmware-assisted dump. */ @@ -1478,6 +1504,9 @@ int __init setup_fadump(void) init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start); fadump_init_files(); + atomic_notifier_chain_register(&panic_notifier_list, + &fadump_panic_block); + return 1; } subsys_initcall(setup_fadump); diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index e22734278458..8c54166491e7 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -388,7 +388,7 @@ DataAccess: EXCEPTION_PROLOG mfspr r10,SPRN_DSISR stw r10,_DSISR(r11) - andis. r0,r10,0xa470 /* weird error? */ + andis. r0,r10,DSISR_BAD_FAULT_32S@h bne 1f /* if not, try to put a PTE */ mfspr r4,SPRN_DAR /* into the hash table */ rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */ @@ -403,13 +403,13 @@ DataAccess: DO_KVM 0x400 InstructionAccess: EXCEPTION_PROLOG - andis. r0,r9,0x4000 /* no pte found? */ + andis. r0,r9,SRR1_ISI_NOPT@h /* no pte found? */ beq 1f /* if so, try to put a PTE */ li r3,0 /* into the hash table */ mr r4,r12 /* SRR0 is fault address */ bl hash_page 1: mr r4,r12 - mr r5,r9 + andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ EXC_XFER_LITE(0x400, handle_page_fault) /* External interrupt */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 0ddc602b33a4..ff8511d6d8ea 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -92,13 +92,13 @@ END_FTR_SECTION(0, 1) .balign 8 .globl __secondary_hold_spinloop __secondary_hold_spinloop: - .llong 0x0 + .8byte 0x0 /* Secondary processors write this value with their cpu # */ /* after they enter the spin loop immediately below. */ .globl __secondary_hold_acknowledge __secondary_hold_acknowledge: - .llong 0x0 + .8byte 0x0 #ifdef CONFIG_RELOCATABLE /* This flag is set to 1 by a loader if the kernel should run @@ -650,7 +650,7 @@ __after_prom_start: bctr .balign 8 -p_end: .llong _end - copy_to_here +p_end: .8byte _end - copy_to_here 4: /* @@ -892,7 +892,7 @@ _GLOBAL(relative_toc) blr .balign 8 -p_toc: .llong __toc_start + 0x8000 - 0b +p_toc: .8byte __toc_start + 0x8000 - 0b /* * This is where the main kernel code starts. diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index c032fe8c2d26..4fee00d414e8 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -50,18 +50,20 @@ mtspr spr, reg #endif -/* Macro to test if an address is a kernel address */ #if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 -#define IS_KERNEL(tmp, addr) \ - andis. tmp, addr, 0x8000 /* Address >= 0x80000000 */ -#define BRANCH_UNLESS_KERNEL(label) beq label -#else -#define IS_KERNEL(tmp, addr) \ - rlwinm tmp, addr, 16, 16, 31; \ - cmpli cr0, tmp, PAGE_OFFSET >> 16 -#define BRANCH_UNLESS_KERNEL(label) blt label +/* By simply checking Address >= 0x80000000, we know if its a kernel address */ +#define SIMPLE_KERNEL_ADDRESS 1 #endif +/* + * We need an ITLB miss handler for kernel addresses if: + * - Either we have modules + * - Or we have not pinned the first 8M + */ +#if defined(CONFIG_MODULES) || !defined(CONFIG_PIN_TLB_TEXT) || \ + defined(CONFIG_DEBUG_PAGEALLOC) +#define ITLB_MISS_KERNEL 1 +#endif /* * Value for the bits that have fixed value in RPN entries. @@ -123,7 +125,6 @@ turn_on_mmu: lis r0,start_here@h ori r0,r0,start_here@l mtspr SPRN_SRR0,r0 - SYNC rfi /* enables MMU */ /* @@ -170,7 +171,7 @@ turn_on_mmu: stw r1,0(r11); \ tovirt(r1,r11); /* set new kernel sp */ \ li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \ - MTMSRD(r10); /* (except for mach check in rtas) */ \ + mtmsr r10; \ stw r0,GPR0(r11); \ SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) @@ -300,7 +301,7 @@ SystemCall: /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. */ - EXCEPTION(0x1000, SoftEmu, SoftwareEmulation, EXC_XFER_STD) + EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD) . = 0x1100 /* @@ -325,7 +326,7 @@ SystemCall: #endif InstructionTLBMiss: -#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) +#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) mtspr SPRN_SPRG_SCRATCH2, r3 #endif EXCEPTION_PROLOG_0 @@ -343,15 +344,32 @@ InstructionTLBMiss: INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) /* Only modules will cause ITLB Misses as we always * pin the first 8MB of kernel memory */ -#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) +#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) mfcr r3 #endif -#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) - IS_KERNEL(r11, r10) +#ifdef ITLB_MISS_KERNEL +#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT) + andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ +#else + rlwinm r11, r10, 16, 0xfff8 + cmpli cr0, r11, PAGE_OFFSET@h +#ifndef CONFIG_PIN_TLB_TEXT + /* It is assumed that kernel code fits into the first 8M page */ +_ENTRY(ITLBMiss_cmp) + cmpli cr7, r11, (PAGE_OFFSET + 0x0800000)@h +#endif +#endif #endif mfspr r11, SPRN_M_TW /* Get level 1 table */ -#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) - BRANCH_UNLESS_KERNEL(3f) +#ifdef ITLB_MISS_KERNEL +#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT) + beq+ 3f +#else + blt+ 3f +#endif +#ifndef CONFIG_PIN_TLB_TEXT + blt cr7, ITLBMissLinear +#endif lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: #endif @@ -369,7 +387,7 @@ InstructionTLBMiss: rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ 4: -#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) +#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) mtcr r3 #endif /* Insert the APG into the TWC from the Linux PTE. */ @@ -400,7 +418,7 @@ InstructionTLBMiss: MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ -#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) +#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) mfspr r3, SPRN_SPRG_SCRATCH2 #endif EXCEPTION_EPILOG_0 @@ -447,23 +465,23 @@ DataStoreTLBMiss: * kernel page tables. */ mfspr r10, SPRN_MD_EPN - rlwinm r10, r10, 16, 0xfff8 - cmpli cr0, r10, PAGE_OFFSET@h + rlwinm r11, r10, 16, 0xfff8 + cmpli cr0, r11, PAGE_OFFSET@h mfspr r11, SPRN_M_TW /* Get level 1 table */ blt+ 3f + rlwinm r11, r10, 16, 0xfff8 #ifndef CONFIG_PIN_TLB_IMMR - cmpli cr0, r10, VIRT_IMMR_BASE@h + cmpli cr0, r11, VIRT_IMMR_BASE@h #endif _ENTRY(DTLBMiss_cmp) - cmpli cr7, r10, (PAGE_OFFSET + 0x1800000)@h - lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha + cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h #ifndef CONFIG_PIN_TLB_IMMR _ENTRY(DTLBMiss_jmp) beq- DTLBMissIMMR #endif blt cr7, DTLBMissLinear + lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: - mfspr r10, SPRN_MD_EPN /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 @@ -569,8 +587,8 @@ _ENTRY(DTLBMiss_jmp) InstructionTLBError: EXCEPTION_PROLOG mr r4,r12 - mr r5,r9 - andis. r10,r5,0x4000 + andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ + andis. r10,r9,SRR1_ISI_NOPT@h beq+ 1f tlbie r4 itlbie: @@ -595,7 +613,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */ mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) mfspr r4,SPRN_DAR - andis. r10,r5,0x4000 + andis. r10,r5,DSISR_NOHPTE@h beq+ 1f tlbie r4 dtlbie: @@ -684,7 +702,7 @@ DTLBMissLinear: /* Set 8M byte page and mark it valid */ li r11, MD_PS8MEG | MD_SVALID MTSPR_CPU6(SPRN_MD_TWC, r11, r3) - rlwinm r10, r10, 16, 0x0f800000 /* 8xx supports max 256Mb RAM */ + rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ _PAGE_PRESENT MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */ @@ -695,6 +713,22 @@ DTLBMissLinear: EXCEPTION_EPILOG_0 rfi +#ifndef CONFIG_PIN_TLB_TEXT +ITLBMissLinear: + mtcr r3 + /* Set 8M byte page and mark it valid */ + li r11, MI_PS8MEG | MI_SVALID | _PAGE_EXEC + MTSPR_CPU6(SPRN_MI_TWC, r11, r3) + rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ + ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ + _PAGE_PRESENT + MTSPR_CPU6(SPRN_MI_RPN, r10, r11) /* Update TLB entry */ + + mfspr r3, SPRN_SPRG_SCRATCH2 + EXCEPTION_EPILOG_0 + rfi +#endif + /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions * by decoding the registers used by the dcbx instruction and adding them. * DAR is set to the calculated address. @@ -705,9 +739,10 @@ FixupDAR:/* Entry point for dcbx workaround. */ mtspr SPRN_SPRG_SCRATCH2, r10 /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 - IS_KERNEL(r11, r10) + rlwinm r11, r10, 16, 0xfff8 + cmpli cr0, r11, PAGE_OFFSET@h mfspr r11, SPRN_M_TW /* Get level 1 table */ - BRANCH_UNLESS_KERNEL(3f) + blt+ 3f rlwinm r11, r10, 16, 0xfff8 _ENTRY(FixupDAR_cmp) cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h @@ -915,10 +950,8 @@ start_here: rfi /* Load up the kernel context */ 2: - SYNC /* Force all PTE updates to finish */ tlbia /* Clear all TLB entries */ sync /* wait for tlbia/tlbie to finish */ - TLBSYNC /* ... on all CPUs */ /* set up the PTE pointers for the Abatron bdiGDB. */ @@ -955,15 +988,14 @@ initial_mmu: mtspr SPRN_MD_CTR, r10 /* remove PINNED DTLB entries */ tlbia /* Invalidate all TLB entries */ -/* Always pin the first 8 MB ITLB to prevent ITLB - misses while mucking around with SRR0/SRR1 in asm -*/ +#ifdef CONFIG_PIN_TLB_TEXT lis r8, MI_RSV4I@h ori r8, r8, 0x1c00 mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */ +#endif -#ifdef CONFIG_PIN_TLB +#ifdef CONFIG_PIN_TLB_DATA oris r10, r10, MD_RSV4I@h mtspr SPRN_MD_CTR, r10 /* Set data TLB control */ #endif @@ -989,6 +1021,7 @@ initial_mmu: * internal registers (among other things). */ #ifdef CONFIG_PIN_TLB_IMMR + oris r10, r10, MD_RSV4I@h ori r10, r10, 0x1c00 mtspr SPRN_MD_CTR, r10 diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index e6252c5a57a4..1125c9be9e06 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -85,7 +85,61 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) std r3,_WORT(r1) mfspr r3,SPRN_WORC std r3,_WORC(r1) +/* + * On POWER9, there are idle states such as stop4, invoked via cpuidle, + * that lose hypervisor resources. In such cases, we need to save + * additional SPRs before entering those idle states so that they can + * be restored to their older values on wakeup from the idle state. + * + * On POWER8, the only such deep idle state is winkle which is used + * only in the context of CPU-Hotplug, where these additional SPRs are + * reinitiazed to a sane value. Hence there is no need to save/restore + * these SPRs. + */ +BEGIN_FTR_SECTION + blr +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) + +power9_save_additional_sprs: + mfspr r3, SPRN_PID + mfspr r4, SPRN_LDBAR + std r3, STOP_PID(r13) + std r4, STOP_LDBAR(r13) + mfspr r3, SPRN_FSCR + mfspr r4, SPRN_HFSCR + std r3, STOP_FSCR(r13) + std r4, STOP_HFSCR(r13) + + mfspr r3, SPRN_MMCRA + mfspr r4, SPRN_MMCR1 + std r3, STOP_MMCRA(r13) + std r4, STOP_MMCR1(r13) + + mfspr r3, SPRN_MMCR2 + std r3, STOP_MMCR2(r13) + blr + +power9_restore_additional_sprs: + ld r3,_LPCR(r1) + ld r4, STOP_PID(r13) + mtspr SPRN_LPCR,r3 + mtspr SPRN_PID, r4 + + ld r3, STOP_LDBAR(r13) + ld r4, STOP_FSCR(r13) + mtspr SPRN_LDBAR, r3 + mtspr SPRN_FSCR, r4 + + ld r3, STOP_HFSCR(r13) + ld r4, STOP_MMCRA(r13) + mtspr SPRN_HFSCR, r3 + mtspr SPRN_MMCRA, r4 + /* We have already restored PACA_MMCR0 */ + ld r3, STOP_MMCR1(r13) + ld r4, STOP_MMCR2(r13) + mtspr SPRN_MMCR1, r3 + mtspr SPRN_MMCR2, r4 blr /* @@ -141,7 +195,16 @@ pnv_powersave_common: std r5,_CCR(r1) std r1,PACAR1(r13) +BEGIN_FTR_SECTION + /* + * POWER9 does not require real mode to stop, and presently does not + * set hwthread_state for KVM (threads don't share MMU context), so + * we can remain in virtual mode for this. + */ + bctr +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) /* + * POWER8 * Go to real mode to do the nap, as required by the architecture. * Also, we need to be in real mode before setting hwthread_state, * because as soon as we do that, another thread can switch @@ -151,6 +214,20 @@ pnv_powersave_common: mtmsrd r7,0 bctr +/* + * This is the sequence required to execute idle instructions, as + * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0. + */ +#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ + /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ + std r0,0(r1); \ + ptesync; \ + ld r0,0(r1); \ +236: cmpd cr0,r0,r0; \ + bne 236b; \ + IDLE_INST; + + .globl pnv_enter_arch207_idle_mode pnv_enter_arch207_idle_mode: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE @@ -242,20 +319,27 @@ enter_winkle: /* * r3 - PSSCR value corresponding to the requested stop state. */ -power_enter_stop: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - /* Tell KVM we're entering idle */ +power_enter_stop_kvm_rm: + /* + * This is currently unused because POWER9 KVM does not have to + * gather secondary threads into sibling mode, but the code is + * here in case that function is required. + * + * Tell KVM we're entering idle. + */ li r4,KVM_HWTHREAD_IN_IDLE /* DO THIS IN REAL MODE! See comment above. */ stb r4,HSTATE_HWTHREAD_STATE(r13) #endif +power_enter_stop: /* * Check if we are executing the lite variant with ESL=EC=0 */ andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */ bne .Lhandle_esl_ec_set - IDLE_STATE_ENTER_SEQ(PPC_STOP) + PPC_STOP li r3,0 /* Since we didn't lose state, return 0 */ /* @@ -288,7 +372,8 @@ power_enter_stop: ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) cmpd r3,r4 bge .Lhandle_deep_stop - IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) + PPC_STOP /* Does not return (system reset interrupt) */ + .Lhandle_deep_stop: /* * Entering deep idle state. @@ -310,7 +395,7 @@ lwarx_loop_stop: bl save_sprs_to_stack - IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) + PPC_STOP /* Does not return (system reset interrupt) */ /* * Entered with MSR[EE]=0 and no soft-masked interrupts pending. @@ -411,6 +496,18 @@ pnv_powersave_wakeup_mce: b pnv_powersave_wakeup +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +kvm_start_guest_check: + li r0,KVM_HWTHREAD_IN_KERNEL + stb r0,HSTATE_HWTHREAD_STATE(r13) + /* Order setting hwthread_state vs. testing hwthread_req */ + sync + lbz r0,HSTATE_HWTHREAD_REQ(r13) + cmpwi r0,0 + beqlr + b kvm_start_guest +#endif + /* * Called from reset vector for powersave wakeups. * cr3 - set to gt if waking up with partial/complete hypervisor state loss @@ -435,15 +532,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) mr r3,r12 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - li r0,KVM_HWTHREAD_IN_KERNEL - stb r0,HSTATE_HWTHREAD_STATE(r13) - /* Order setting hwthread_state vs. testing hwthread_req */ - sync - lbz r0,HSTATE_HWTHREAD_REQ(r13) - cmpwi r0,0 - beq 1f - b kvm_start_guest -1: +BEGIN_FTR_SECTION + bl kvm_start_guest_check +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) #endif /* Return SRR1 from power7_nap() */ @@ -809,9 +900,16 @@ no_segments: mtctr r12 bctrl +/* + * On POWER9, we can come here on wakeup from a cpuidle stop state. + * Hence restore the additional SPRs to the saved value. + * + * On POWER8, we come here only on winkle. Since winkle is used + * only in the case of CPU-Hotplug, we don't need to restore + * the additional SPRs. + */ BEGIN_FTR_SECTION - ld r4,_LPCR(r1) - mtspr SPRN_LPCR,r4 + bl power9_restore_additional_sprs END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) hypervisor_state_restored: diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index a582e0d42525..aa9f1b8261db 100644 --- a/arch/powerpc/kernel/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c @@ -19,6 +19,8 @@ #include <asm/pgtable.h> #include <asm/ppc-pci.h> #include <asm/io-workarounds.h> +#include <asm/pte-walk.h> + #define IOWA_MAX_BUS 8 @@ -75,8 +77,7 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) * We won't find huge pages here (iomem). Also can't hit * a page table free due to init_mm */ - ptep = __find_linux_pte_or_hugepte(init_mm.pgd, vaddr, - NULL, &hugepage_shift); + ptep = find_init_mm_pte(vaddr, &hugepage_shift); if (ptep == NULL) paddr = 0; else { @@ -192,7 +193,7 @@ void iowa_register_bus(struct pci_controller *phb, struct ppc_pci_io *ops, if (iowa_bus_count >= IOWA_MAX_BUS) { pr_err("IOWA:Too many pci bridges, " - "workarounds disabled for %s\n", np->full_name); + "workarounds disabled for %pOF\n", np); return; } @@ -207,6 +208,6 @@ void iowa_register_bus(struct pci_controller *phb, struct ppc_pci_io *ops, iowa_bus_count++; - pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name); + pr_debug("IOWA:[%d]Add bus, %pOF.\n", iowa_bus_count-1, np); } diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 233ca3fe4754..af7a20dc6e09 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -127,8 +127,7 @@ static ssize_t fail_iommu_store(struct device *dev, return count; } -static DEVICE_ATTR(fail_iommu, S_IRUGO|S_IWUSR, fail_iommu_show, - fail_iommu_store); +static DEVICE_ATTR_RW(fail_iommu); static int fail_iommu_bus_notify(struct notifier_block *nb, unsigned long action, void *data) @@ -190,7 +189,7 @@ static unsigned long iommu_range_alloc(struct device *dev, unsigned int pool_nr; struct iommu_pool *pool; - align_mask = 0xffffffffffffffffl >> (64 - align_order); + align_mask = (1ull << align_order) - 1; /* This allocator was derived from x86_64's bit string search */ @@ -208,7 +207,7 @@ static unsigned long iommu_range_alloc(struct device *dev, * We don't need to disable preemption here because any CPU can * safely use any IOMMU pool. */ - pool_nr = __this_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1); + pool_nr = raw_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1); if (largealloc) pool = &(tbl->large_pool); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index f291f7826abc..4e65bf82f5e0 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -24,7 +24,7 @@ * mask register (of which only 16 are defined), hence the weird shifting * and complement of the cached_irq_mask. I want to be able to stuff * this right into the SIU SMASK register. - * Many of the prep/chrp functions are conditional compiled on CONFIG_8xx + * Many of the prep/chrp functions are conditional compiled on CONFIG_PPC_8xx * to reduce code space and undefined function references. */ @@ -143,9 +143,10 @@ notrace unsigned int __check_irq_replay(void) */ unsigned char happened = local_paca->irq_happened; - /* Clear bit 0 which we wouldn't clear otherwise */ - local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; if (happened & PACA_IRQ_HARD_DIS) { + /* Clear bit 0 which we wouldn't clear otherwise */ + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + /* * We may have missed a decrementer interrupt if hard disabled. * Check the decrementer register in case we had a rollover @@ -173,41 +174,39 @@ notrace unsigned int __check_irq_replay(void) * This is a higher priority interrupt than the others, so * replay it first. */ - local_paca->irq_happened &= ~PACA_IRQ_HMI; - if (happened & PACA_IRQ_HMI) + if (happened & PACA_IRQ_HMI) { + local_paca->irq_happened &= ~PACA_IRQ_HMI; return 0xe60; + } - /* - * We may have missed a decrementer interrupt. We check the - * decrementer itself rather than the paca irq_happened field - * in case we also had a rollover while hard disabled - */ - local_paca->irq_happened &= ~PACA_IRQ_DEC; - if (happened & PACA_IRQ_DEC) + if (happened & PACA_IRQ_DEC) { + local_paca->irq_happened &= ~PACA_IRQ_DEC; return 0x900; + } - /* Finally check if an external interrupt happened */ - local_paca->irq_happened &= ~PACA_IRQ_EE; - if (happened & PACA_IRQ_EE) + if (happened & PACA_IRQ_EE) { + local_paca->irq_happened &= ~PACA_IRQ_EE; return 0x500; + } #ifdef CONFIG_PPC_BOOK3E - /* Finally check if an EPR external interrupt happened - * this bit is typically set if we need to handle another - * "edge" interrupt from within the MPIC "EPR" handler + /* + * Check if an EPR external interrupt happened this bit is typically + * set if we need to handle another "edge" interrupt from within the + * MPIC "EPR" handler. */ - local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE; - if (happened & PACA_IRQ_EE_EDGE) + if (happened & PACA_IRQ_EE_EDGE) { + local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE; return 0x500; + } - local_paca->irq_happened &= ~PACA_IRQ_DBELL; - if (happened & PACA_IRQ_DBELL) + if (happened & PACA_IRQ_DBELL) { + local_paca->irq_happened &= ~PACA_IRQ_DBELL; return 0x280; + } #else - local_paca->irq_happened &= ~PACA_IRQ_DBELL; if (happened & PACA_IRQ_DBELL) { - if (cpu_has_feature(CPU_FTR_HVMODE)) - return 0xe80; + local_paca->irq_happened &= ~PACA_IRQ_DBELL; return 0xa00; } #endif /* CONFIG_PPC_BOOK3E */ @@ -483,6 +482,18 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, " Hypervisor Maintenance Interrupts\n"); } + seq_printf(p, "%*s: ", prec, "NMI"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).sreset_irqs); + seq_printf(p, " System Reset interrupts\n"); + +#ifdef CONFIG_PPC_WATCHDOG + seq_printf(p, "%*s: ", prec, "WDG"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).soft_nmi_irqs); + seq_printf(p, " Watchdog soft-NMI interrupts\n"); +#endif + #ifdef CONFIG_PPC_DOORBELL if (cpu_has_feature(CPU_FTR_DBELL)) { seq_printf(p, "%*s: ", prec, "DBL"); @@ -507,6 +518,10 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += per_cpu(irq_stat, cpu).spurious_irqs; sum += per_cpu(irq_stat, cpu).timer_irqs_others; sum += per_cpu(irq_stat, cpu).hmi_exceptions; + sum += per_cpu(irq_stat, cpu).sreset_irqs; +#ifdef CONFIG_PPC_WATCHDOG + sum += per_cpu(irq_stat, cpu).soft_nmi_irqs; +#endif #ifdef CONFIG_PPC_DOORBELL sum += per_cpu(irq_stat, cpu).doorbell_irqs; #endif diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c index bb6f8993412e..1df6c74aa731 100644 --- a/arch/powerpc/kernel/isa-bridge.c +++ b/arch/powerpc/kernel/isa-bridge.c @@ -164,7 +164,7 @@ void __init isa_bridge_find_early(struct pci_controller *hose) /* Set the global ISA io base to indicate we have an ISA bridge */ isa_io_base = ISA_IO_BASE; - pr_debug("ISA bridge (early) is %s\n", np->full_name); + pr_debug("ISA bridge (early) is %pOF\n", np); } /** @@ -187,15 +187,15 @@ void __init isa_bridge_init_non_pci(struct device_node *np) pna = of_n_addr_cells(np); if (of_property_read_u32(np, "#address-cells", &na) || of_property_read_u32(np, "#size-cells", &ns)) { - pr_warn("ISA: Non-PCI bridge %s is missing address format\n", - np->full_name); + pr_warn("ISA: Non-PCI bridge %pOF is missing address format\n", + np); return; } /* Check it's a supported address format */ if (na != 2 || ns != 1) { - pr_warn("ISA: Non-PCI bridge %s has unsupported address format\n", - np->full_name); + pr_warn("ISA: Non-PCI bridge %pOF has unsupported address format\n", + np); return; } rs = na + ns + pna; @@ -203,8 +203,8 @@ void __init isa_bridge_init_non_pci(struct device_node *np) /* Grab the ranges property */ ranges = of_get_property(np, "ranges", &rlen); if (ranges == NULL || rlen < rs) { - pr_warn("ISA: Non-PCI bridge %s has absent or invalid ranges\n", - np->full_name); + pr_warn("ISA: Non-PCI bridge %pOF has absent or invalid ranges\n", + np); return; } @@ -220,8 +220,8 @@ void __init isa_bridge_init_non_pci(struct device_node *np) /* Got something ? */ if (!size || !pbasep) { - pr_warn("ISA: Non-PCI bridge %s has no usable IO range\n", - np->full_name); + pr_warn("ISA: Non-PCI bridge %pOF has no usable IO range\n", + np); return; } @@ -233,15 +233,15 @@ void __init isa_bridge_init_non_pci(struct device_node *np) /* Map pbase */ pbase = of_translate_address(np, pbasep); if (pbase == OF_BAD_ADDR) { - pr_warn("ISA: Non-PCI bridge %s failed to translate IO base\n", - np->full_name); + pr_warn("ISA: Non-PCI bridge %pOF failed to translate IO base\n", + np); return; } /* We need page alignment */ if ((cbase & ~PAGE_MASK) || (pbase & ~PAGE_MASK)) { - pr_warn("ISA: Non-PCI bridge %s has non aligned IO range\n", - np->full_name); + pr_warn("ISA: Non-PCI bridge %pOF has non aligned IO range\n", + np); return; } @@ -255,7 +255,7 @@ void __init isa_bridge_init_non_pci(struct device_node *np) __ioremap_at(pbase, (void *)ISA_IO_BASE, size, pgprot_val(pgprot_noncached(__pgprot(0)))); - pr_debug("ISA: Non-PCI bridge is %s\n", np->full_name); + pr_debug("ISA: Non-PCI bridge is %pOF\n", np); } /** @@ -277,8 +277,8 @@ static void isa_bridge_find_late(struct pci_dev *pdev, /* Set the global ISA io base to indicate we have an ISA bridge */ isa_io_base = ISA_IO_BASE; - pr_debug("ISA bridge (late) is %s on %s\n", - devnode->full_name, pci_name(pdev)); + pr_debug("ISA bridge (late) is %pOF on %s\n", + devnode, pci_name(pdev)); } /** diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index dbf098121ce6..35e240a0a408 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -67,9 +67,9 @@ static struct hard_trap_info #endif #else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */ { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */ -#if defined(CONFIG_8xx) +#if defined(CONFIG_PPC_8xx) { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */ -#else /* ! CONFIG_8xx */ +#else /* ! CONFIG_PPC_8xx */ { 0x0f00, 0x04 /* SIGILL */ }, /* performance monitor */ { 0x0f20, 0x08 /* SIGFPE */ }, /* altivec unavailable */ { 0x1300, 0x05 /* SIGTRAP */ }, /* instruction address break */ diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 1086ea37c832..9ad37f827a97 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -25,7 +25,6 @@ #include <linux/kvm_para.h> #include <linux/slab.h> #include <linux/of.h> -#include <linux/nmi.h> /* hardlockup_detector_disable() */ #include <asm/reg.h> #include <asm/sections.h> @@ -719,12 +718,6 @@ static __init void kvm_free_tmp(void) static int __init kvm_guest_init(void) { - /* - * The hardlockup detector is likely to get false positives in - * KVM guests, so disable it by default. - */ - hardlockup_detector_disable(); - if (!kvm_para_available()) goto free_tmp; diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S index 97ec8557f974..6408f09dbbd9 100644 --- a/arch/powerpc/kernel/l2cr_6xx.S +++ b/arch/powerpc/kernel/l2cr_6xx.S @@ -181,7 +181,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) mtctr r4 li r4,0 1: - lwzx r0,r0,r4 + lwzx r0,0,r4 addi r4,r4,32 /* Go to start of next cache line */ bdnz 1b isync @@ -328,7 +328,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR) mtctr r4 li r4,0 1: - lwzx r0,r0,r4 + lwzx r0,0,r4 dcbf 0,r4 addi r4,r4,32 /* Go to start of next cache line */ bdnz 1b diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 0694d20f85b6..5e5a64a8b4e4 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -147,8 +147,8 @@ static int __init add_legacy_port(struct device_node *np, int want_index, legacy_serial_ports[index].serial_out = tsi_serial_out; } - printk(KERN_DEBUG "Found legacy serial port %d for %s\n", - index, np->full_name); + printk(KERN_DEBUG "Found legacy serial port %d for %pOF\n", + index, np); printk(KERN_DEBUG " %s=%llx, taddr=%llx, irq=%lx, clk=%d, speed=%d\n", (iotype == UPIO_PORT) ? "port" : "mem", (unsigned long long)base, (unsigned long long)taddr, irq, @@ -207,7 +207,7 @@ static int __init add_legacy_isa_port(struct device_node *np, int index = -1; u64 taddr; - DBG(" -> add_legacy_isa_port(%s)\n", np->full_name); + DBG(" -> add_legacy_isa_port(%pOF)\n", np); /* Get the ISA port number */ reg = of_get_property(np, "reg", NULL); @@ -256,7 +256,7 @@ static int __init add_legacy_pci_port(struct device_node *np, unsigned int flags; int iotype, index = -1, lindex = 0; - DBG(" -> add_legacy_pci_port(%s)\n", np->full_name); + DBG(" -> add_legacy_pci_port(%pOF)\n", np); /* We only support ports that have a clock frequency properly * encoded in the device-tree (that is have an fcode). Anything @@ -374,7 +374,7 @@ void __init find_legacy_serial_ports(void) if (path != NULL) { stdout = of_find_node_by_path(path); if (stdout) - DBG("stdout is %s\n", stdout->full_name); + DBG("stdout is %pOF\n", stdout); } else { DBG(" no linux,stdout-path !\n"); } @@ -603,7 +603,7 @@ static int __init check_legacy_serial_console(void) DBG(" can't find stdout package %s !\n", name); return -ENODEV; } - DBG("stdout is %s\n", prom_stdout->full_name); + DBG("stdout is %pOF\n", prom_stdout); name = of_get_property(prom_stdout, "name", NULL); if (!name) { diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index e0e131e662ed..9b2ea7e71c06 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -22,11 +22,14 @@ #undef DEBUG #define pr_fmt(fmt) "mce: " fmt +#include <linux/hardirq.h> #include <linux/types.h> #include <linux/ptrace.h> #include <linux/percpu.h> #include <linux/export.h> #include <linux/irq_work.h> + +#include <asm/machdep.h> #include <asm/mce.h> static DEFINE_PER_CPU(int, mce_nest_count); @@ -446,3 +449,33 @@ uint64_t get_mce_fault_addr(struct machine_check_event *evt) return 0; } EXPORT_SYMBOL(get_mce_fault_addr); + +/* + * This function is called in real mode. Strictly no printk's please. + * + * regs->nip and regs->msr contains srr0 and ssr1. + */ +long machine_check_early(struct pt_regs *regs) +{ + long handled = 0; + + __this_cpu_inc(irq_stat.mce_exceptions); + + if (cur_cpu_spec && cur_cpu_spec->machine_check_early) + handled = cur_cpu_spec->machine_check_early(regs); + return handled; +} + +long hmi_exception_realmode(struct pt_regs *regs) +{ + __this_cpu_inc(irq_stat.hmi_exceptions); + + wait_for_subcore_guest_exit(); + + if (ppc_md.hmi_exception_early) + ppc_md.hmi_exception_early(regs); + + wait_for_tb_resync(); + + return 0; +} diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 34aeac54f120..becaec990140 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -45,7 +45,7 @@ static int of_pci_phb_probe(struct platform_device *dev) if (ppc_md.pci_setup_phb == NULL) return -ENODEV; - pr_info("Setting up PCI bus %s\n", dev->dev.of_node->full_name); + pr_info("Setting up PCI bus %pOF\n", dev->dev.of_node); /* Alloc and setup PHB data structure */ phb = pcibios_alloc_controller(dev->dev.of_node); diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index 4937bef7652f..52fc864cdec4 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -60,10 +60,6 @@ optprobe_template_entry: std r5,_CCR(r1) lbz r5,PACASOFTIRQEN(r13) std r5,SOFTE(r1) - mfdar r5 - std r5,_DAR(r1) - mfdsisr r5 - std r5,_DSISR(r1) /* * We may get here from a module, so load the kernel TOC in r2. @@ -122,10 +118,6 @@ optprobe_template_call_emulate: mtxer r5 ld r5,_CCR(r1) mtcr r5 - ld r5,_DAR(r1) - mtdar r5 - ld r5,_DSISR(r1) - mtdsisr r5 REST_GPR(0,r1) REST_10GPRS(2,r1) REST_10GPRS(12,r1) diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 8d63627e067f..70f073d6c3b2 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -99,18 +99,27 @@ static inline void free_lppacas(void) { } * If you make the number of persistent SLB entries dynamic, please also * update PR KVM to flush and restore them accordingly. */ -static struct slb_shadow *slb_shadow; +static struct slb_shadow * __initdata slb_shadow; static void __init allocate_slb_shadows(int nr_cpus, int limit) { int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus); + + if (early_radix_enabled()) + return; + slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit)); memset(slb_shadow, 0, size); } static struct slb_shadow * __init init_slb_shadow(int cpu) { - struct slb_shadow *s = &slb_shadow[cpu]; + struct slb_shadow *s; + + if (early_radix_enabled()) + return NULL; + + s = &slb_shadow[cpu]; /* * When we come through here to initialise boot_paca, the slb_shadow diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 341a7469cab8..02831a396419 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -373,9 +373,8 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) if (virq) irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW); } else { - pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n", - oirq.args_count, oirq.args[0], oirq.args[1], - of_node_full_name(oirq.np)); + pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %pOF\n", + oirq.args_count, oirq.args[0], oirq.args[1], oirq.np); virq = irq_create_of_mapping(&oirq); } @@ -741,8 +740,8 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose, struct of_pci_range range; struct of_pci_range_parser parser; - printk(KERN_INFO "PCI host bridge %s %s ranges:\n", - dev->full_name, primary ? "(primary)" : ""); + printk(KERN_INFO "PCI host bridge %pOF %s ranges:\n", + dev, primary ? "(primary)" : ""); /* Check for ranges property */ if (of_pci_range_parser_init(&parser, dev)) @@ -1556,8 +1555,8 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose, if (!res->flags) { pr_debug("PCI: I/O resource not set for host" - " bridge %s (domain %d)\n", - hose->dn->full_name, hose->global_number); + " bridge %pOF (domain %d)\n", + hose->dn, hose->global_number); } else { offset = pcibios_io_space_offset(hose); @@ -1668,7 +1667,7 @@ void pcibios_scan_phb(struct pci_controller *hose) struct device_node *node = hose->dn; int mode; - pr_debug("PCI: Scanning PHB %s\n", of_node_full_name(node)); + pr_debug("PCI: Scanning PHB %pOF\n", node); /* Get some IO space for the new PHB */ pcibios_setup_phb_io_space(hose); diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 41c86c6b6e4d..1d817f4d97d9 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -79,8 +79,8 @@ make_one_node_map(struct device_node* node, u8 pci_bus) return; bus_range = of_get_property(node, "bus-range", &len); if (bus_range == NULL || len < 2 * sizeof(int)) { - printk(KERN_WARNING "Can't get bus-range for %s, " - "assuming it starts at 0\n", node->full_name); + printk(KERN_WARNING "Can't get bus-range for %pOF, " + "assuming it starts at 0\n", node); pci_to_OF_bus_map[pci_bus] = 0; } else pci_to_OF_bus_map[pci_bus] = bus_range[0]; diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index ed5e9ff61a68..932b9741aa8f 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -111,7 +111,7 @@ int pcibios_unmap_io_space(struct pci_bus *bus) if (hose->io_base_alloc == NULL) return 0; - pr_debug("IO unmapping for PHB %s\n", hose->dn->full_name); + pr_debug("IO unmapping for PHB %pOF\n", hose->dn); pr_debug(" alloc=0x%p\n", hose->io_base_alloc); /* This is a PHB, we fully unmap the IO area */ @@ -151,7 +151,7 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose) hose->io_base_virt = (void __iomem *)(area->addr + hose->io_base_phys - phys_page); - pr_debug("IO mapping for PHB %s\n", hose->dn->full_name); + pr_debug("IO mapping for PHB %pOF\n", hose->dn); pr_debug(" phys=0x%016llx, virt=0x%p (alloc=0x%p)\n", hose->io_base_phys, hose->io_base_virt, hose->io_base_alloc); pr_debug(" size=0x%016llx (alloc=0x%016lx)\n", diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 592693437070..0e395afbf0f4 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -139,7 +139,6 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev) #ifdef CONFIG_PCI_IOV static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent, - struct pci_dev *pdev, int vf_index, int busno, int devfn) { @@ -150,10 +149,8 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent, return NULL; pdn = kzalloc(sizeof(*pdn), GFP_KERNEL); - if (!pdn) { - dev_warn(&pdev->dev, "%s: Out of memory!\n", __func__); + if (!pdn) return NULL; - } pdn->phb = parent->phb; pdn->parent = parent; @@ -167,13 +164,6 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent, INIT_LIST_HEAD(&pdn->list); list_add_tail(&pdn->list, &parent->child_list); - /* - * If we already have PCI device instance, lets - * bind them. - */ - if (pdev) - pdev->dev.archdata.pci_data = pdn; - return pdn; } #endif @@ -201,7 +191,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) { struct eeh_dev *edev __maybe_unused; - pdn = add_one_dev_pci_data(parent, NULL, i, + pdn = add_one_dev_pci_data(parent, i, pci_iov_virtfn_bus(pdev, i), pci_iov_virtfn_devfn(pdev, i)); if (!pdn) { @@ -303,7 +293,6 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose, if (pdn == NULL) return NULL; dn->data = pdn; - pdn->node = dn; pdn->phb = hose; #ifdef CONFIG_PPC_POWERNV pdn->pe_number = IODA_INVALID_PE; @@ -352,6 +341,7 @@ EXPORT_SYMBOL_GPL(pci_add_device_node_info); void pci_remove_device_node_info(struct device_node *dn) { struct pci_dn *pdn = dn ? PCI_DN(dn) : NULL; + struct device_node *parent; #ifdef CONFIG_EEH struct eeh_dev *edev = pdn_to_eeh_dev(pdn); @@ -364,8 +354,10 @@ void pci_remove_device_node_info(struct device_node *dn) WARN_ON(!list_empty(&pdn->child_list)); list_del(&pdn->list); - if (pdn->parent) - of_node_put(pdn->parent->node); + + parent = of_get_parent(dn); + if (parent) + of_node_put(parent); dn->data = NULL; kfree(pdn); diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index ea3d98115b88..0d790f8432d2 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -211,19 +211,19 @@ void of_scan_pci_bridge(struct pci_dev *dev) unsigned int flags; u64 size; - pr_debug("of_scan_pci_bridge(%s)\n", node->full_name); + pr_debug("of_scan_pci_bridge(%pOF)\n", node); /* parse bus-range property */ busrange = of_get_property(node, "bus-range", &len); if (busrange == NULL || len != 8) { - printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n", - node->full_name); + printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %pOF\n", + node); return; } ranges = of_get_property(node, "ranges", &len); if (ranges == NULL) { - printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %s\n", - node->full_name); + printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %pOF\n", + node); return; } @@ -233,8 +233,8 @@ void of_scan_pci_bridge(struct pci_dev *dev) bus = pci_add_new_bus(dev->bus, dev, of_read_number(busrange, 1)); if (!bus) { - printk(KERN_ERR "Failed to create pci bus for %s\n", - node->full_name); + printk(KERN_ERR "Failed to create pci bus for %pOF\n", + node); return; } } @@ -262,13 +262,13 @@ void of_scan_pci_bridge(struct pci_dev *dev) res = bus->resource[0]; if (res->flags) { printk(KERN_ERR "PCI: ignoring extra I/O range" - " for bridge %s\n", node->full_name); + " for bridge %pOF\n", node); continue; } } else { if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) { printk(KERN_ERR "PCI: too many memory ranges" - " for bridge %s\n", node->full_name); + " for bridge %pOF\n", node); continue; } res = bus->resource[i]; @@ -307,7 +307,7 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus, struct eeh_dev *edev = pdn_to_eeh_dev(PCI_DN(dn)); #endif - pr_debug(" * %s\n", dn->full_name); + pr_debug(" * %pOF\n", dn); if (!of_device_is_available(dn)) return NULL; @@ -350,8 +350,8 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus, struct device_node *child; struct pci_dev *dev; - pr_debug("of_scan_bus(%s) bus no %d...\n", - node->full_name, bus->number); + pr_debug("of_scan_bus(%pOF) bus no %d...\n", + node, bus->number); /* Scan direct children */ for_each_child_of_node(node, child) { diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 1f0fd361e09b..a0c74bbf3454 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -230,7 +230,8 @@ void enable_kernel_fp(void) } EXPORT_SYMBOL(enable_kernel_fp); -static int restore_fp(struct task_struct *tsk) { +static int restore_fp(struct task_struct *tsk) +{ if (tsk->thread.load_fp || msr_tm_active(tsk->thread.regs->msr)) { load_fp_state(¤t->thread.fp_state); current->thread.load_fp++; @@ -330,11 +331,19 @@ static inline int restore_altivec(struct task_struct *tsk) { return 0; } #ifdef CONFIG_VSX static void __giveup_vsx(struct task_struct *tsk) { - if (tsk->thread.regs->msr & MSR_FP) + unsigned long msr = tsk->thread.regs->msr; + + /* + * We should never be ssetting MSR_VSX without also setting + * MSR_FP and MSR_VEC + */ + WARN_ON((msr & MSR_VSX) && !((msr & MSR_FP) && (msr & MSR_VEC))); + + /* __giveup_fpu will clear MSR_VSX */ + if (msr & MSR_FP) __giveup_fpu(tsk); - if (tsk->thread.regs->msr & MSR_VEC) + if (msr & MSR_VEC) __giveup_altivec(tsk); - tsk->thread.regs->msr &= ~MSR_VSX; } static void giveup_vsx(struct task_struct *tsk) @@ -346,14 +355,6 @@ static void giveup_vsx(struct task_struct *tsk) msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX); } -static void save_vsx(struct task_struct *tsk) -{ - if (tsk->thread.regs->msr & MSR_FP) - save_fpu(tsk); - if (tsk->thread.regs->msr & MSR_VEC) - save_altivec(tsk); -} - void enable_kernel_vsx(void) { unsigned long cpumsr; @@ -374,10 +375,6 @@ void enable_kernel_vsx(void) */ if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr)) return; - if (current->thread.regs->msr & MSR_FP) - __giveup_fpu(current); - if (current->thread.regs->msr & MSR_VEC) - __giveup_altivec(current); __giveup_vsx(current); } } @@ -407,7 +404,6 @@ static int restore_vsx(struct task_struct *tsk) } #else static inline int restore_vsx(struct task_struct *tsk) { return 0; } -static inline void save_vsx(struct task_struct *tsk) { } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE @@ -487,6 +483,8 @@ void giveup_all(struct task_struct *tsk) msr_check_and_set(msr_all_available); check_if_tm_restore_required(tsk); + WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC))); + #ifdef CONFIG_PPC_FPU if (usermsr & MSR_FP) __giveup_fpu(tsk); @@ -495,10 +493,6 @@ void giveup_all(struct task_struct *tsk) if (usermsr & MSR_VEC) __giveup_altivec(tsk); #endif -#ifdef CONFIG_VSX - if (usermsr & MSR_VSX) - __giveup_vsx(tsk); -#endif #ifdef CONFIG_SPE if (usermsr & MSR_SPE) __giveup_spe(tsk); @@ -553,19 +547,13 @@ void save_all(struct task_struct *tsk) msr_check_and_set(msr_all_available); - /* - * Saving the way the register space is in hardware, save_vsx boils - * down to a save_fpu() and save_altivec() - */ - if (usermsr & MSR_VSX) { - save_vsx(tsk); - } else { - if (usermsr & MSR_FP) - save_fpu(tsk); + WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC))); - if (usermsr & MSR_VEC) - save_altivec(tsk); - } + if (usermsr & MSR_FP) + save_fpu(tsk); + + if (usermsr & MSR_VEC) + save_altivec(tsk); if (usermsr & MSR_SPE) __giveup_spe(tsk); @@ -1392,13 +1380,13 @@ void show_regs(struct pt_regs * regs) show_regs_print_info(KERN_DEFAULT); - printk("NIP: "REG" LR: "REG" CTR: "REG"\n", + printk("NIP: "REG" LR: "REG" CTR: "REG"\n", regs->nip, regs->link, regs->ctr); printk("REGS: %p TRAP: %04lx %s (%s)\n", regs, regs->trap, print_tainted(), init_utsname()->release); - printk("MSR: "REG" ", regs->msr); + printk("MSR: "REG" ", regs->msr); print_msr_bits(regs->msr); - printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); + pr_cont(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); trap = TRAP(regs); if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) pr_cont("CFAR: "REG" ", regs->orig_gpr3); @@ -1991,11 +1979,25 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) void notrace __ppc64_runlatch_on(void) { struct thread_info *ti = current_thread_info(); - unsigned long ctrl; - ctrl = mfspr(SPRN_CTRLF); - ctrl |= CTRL_RUNLATCH; - mtspr(SPRN_CTRLT, ctrl); + if (cpu_has_feature(CPU_FTR_ARCH_206)) { + /* + * Least significant bit (RUN) is the only writable bit of + * the CTRL register, so we can avoid mfspr. 2.06 is not the + * earliest ISA where this is the case, but it's convenient. + */ + mtspr(SPRN_CTRLT, CTRL_RUNLATCH); + } else { + unsigned long ctrl; + + /* + * Some architectures (e.g., Cell) have writable fields other + * than RUN, so do the read-modify-write. + */ + ctrl = mfspr(SPRN_CTRLF); + ctrl |= CTRL_RUNLATCH; + mtspr(SPRN_CTRLT, ctrl); + } ti->local_flags |= _TLF_RUNLATCH; } @@ -2004,13 +2006,18 @@ void notrace __ppc64_runlatch_on(void) void notrace __ppc64_runlatch_off(void) { struct thread_info *ti = current_thread_info(); - unsigned long ctrl; ti->local_flags &= ~_TLF_RUNLATCH; - ctrl = mfspr(SPRN_CTRLF); - ctrl &= ~CTRL_RUNLATCH; - mtspr(SPRN_CTRLT, ctrl); + if (cpu_has_feature(CPU_FTR_ARCH_206)) { + mtspr(SPRN_CTRLT, 0); + } else { + unsigned long ctrl; + + ctrl = mfspr(SPRN_CTRLF); + ctrl &= ~CTRL_RUNLATCH; + mtspr(SPRN_CTRLT, ctrl); + } } #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 613f79f03877..02190e90c7ae 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -177,6 +177,7 @@ struct platform_support { bool hash_mmu; bool radix_mmu; bool radix_gtse; + bool xive; }; /* Platforms codes are now obsolete in the kernel. Now only used within this @@ -1041,6 +1042,27 @@ static void __init prom_parse_mmu_model(u8 val, } } +static void __init prom_parse_xive_model(u8 val, + struct platform_support *support) +{ + switch (val) { + case OV5_FEAT(OV5_XIVE_EITHER): /* Either Available */ + prom_debug("XIVE - either mode supported\n"); + support->xive = true; + break; + case OV5_FEAT(OV5_XIVE_EXPLOIT): /* Only Exploitation mode */ + prom_debug("XIVE - exploitation mode supported\n"); + support->xive = true; + break; + case OV5_FEAT(OV5_XIVE_LEGACY): /* Only Legacy mode */ + prom_debug("XIVE - legacy mode supported\n"); + break; + default: + prom_debug("Unknown xive support option: 0x%x\n", val); + break; + } +} + static void __init prom_parse_platform_support(u8 index, u8 val, struct platform_support *support) { @@ -1054,6 +1076,10 @@ static void __init prom_parse_platform_support(u8 index, u8 val, support->radix_gtse = true; } break; + case OV5_INDX(OV5_XIVE_SUPPORT): /* Interrupt mode */ + prom_parse_xive_model(val & OV5_FEAT(OV5_XIVE_SUPPORT), + support); + break; } } @@ -1062,7 +1088,8 @@ static void __init prom_check_platform_support(void) struct platform_support supported = { .hash_mmu = false, .radix_mmu = false, - .radix_gtse = false + .radix_gtse = false, + .xive = false }; int prop_len = prom_getproplen(prom.chosen, "ibm,arch-vec-5-platform-support"); @@ -1095,6 +1122,11 @@ static void __init prom_check_platform_support(void) /* We're probably on a legacy hypervisor */ prom_debug("Assuming legacy hash support\n"); } + + if (supported.xive) { + prom_debug("Asking for XIVE\n"); + ibm_architecture_vec.vec5.intarch = OV5_FEAT(OV5_XIVE_EXPLOIT); + } } static void __init prom_send_capabilities(void) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 660ed39e9c9a..07cd22e35405 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1594,11 +1594,8 @@ static int ppr_get(struct task_struct *target, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - int ret; - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.ppr, 0, sizeof(u64)); - return ret; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.ppr, 0, sizeof(u64)); } static int ppr_set(struct task_struct *target, @@ -1606,11 +1603,8 @@ static int ppr_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - int ret; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.ppr, 0, sizeof(u64)); - return ret; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.ppr, 0, sizeof(u64)); } static int dscr_get(struct task_struct *target, @@ -1618,22 +1612,16 @@ static int dscr_get(struct task_struct *target, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - int ret; - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.dscr, 0, sizeof(u64)); - return ret; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.dscr, 0, sizeof(u64)); } static int dscr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - int ret; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.dscr, 0, sizeof(u64)); - return ret; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.dscr, 0, sizeof(u64)); } #endif #ifdef CONFIG_PPC_BOOK3S_64 @@ -1642,22 +1630,16 @@ static int tar_get(struct task_struct *target, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - int ret; - - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.tar, 0, sizeof(u64)); - return ret; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.tar, 0, sizeof(u64)); } static int tar_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - int ret; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.tar, 0, sizeof(u64)); - return ret; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.tar, 0, sizeof(u64)); } static int ebb_active(struct task_struct *target, diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S index d88736fbece6..e8cfc69f59ae 100644 --- a/arch/powerpc/kernel/reloc_64.S +++ b/arch/powerpc/kernel/reloc_64.S @@ -82,7 +82,7 @@ _GLOBAL(relocate) 6: blr .balign 8 -p_dyn: .llong __dynamic_start - 0b -p_rela: .llong __rela_dyn_start - 0b -p_st: .llong _stext - 0b +p_dyn: .8byte __dynamic_start - 0b +p_rela: .8byte __rela_dyn_start - 0b +p_st: .8byte _stext - 0b diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 73f1934582c2..c2b148b1634a 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -91,26 +91,14 @@ static int rtas_pci_read_config(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) { - struct device_node *busdn, *dn; struct pci_dn *pdn; - bool found = false; int ret; - /* Search only direct children of the bus */ *val = 0xFFFFFFFF; - busdn = pci_bus_to_OF_node(bus); - for (dn = busdn->child; dn; dn = dn->sibling) { - pdn = PCI_DN(dn); - if (pdn && pdn->devfn == devfn - && of_device_is_available(dn)) { - found = true; - break; - } - } - if (!found) - return PCIBIOS_DEVICE_NOT_FOUND; + pdn = pci_get_pdn_by_devfn(bus, devfn); + /* Validity of pdn is checked in here */ ret = rtas_read_config(pdn, where, size, val); if (*val == EEH_IO_ERROR_VALUE(size) && eeh_dev_check_failure(pdn_to_eeh_dev(pdn))) @@ -153,24 +141,11 @@ static int rtas_pci_write_config(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val) { - struct device_node *busdn, *dn; struct pci_dn *pdn; - bool found = false; - - /* Search only direct children of the bus */ - busdn = pci_bus_to_OF_node(bus); - for (dn = busdn->child; dn; dn = dn->sibling) { - pdn = PCI_DN(dn); - if (pdn && pdn->devfn == devfn - && of_device_is_available(dn)) { - found = true; - break; - } - } - if (!found) - return PCIBIOS_DEVICE_NOT_FOUND; + pdn = pci_get_pdn_by_devfn(bus, devfn); + /* Validity of pdn is checked in here. */ return rtas_write_config(pdn, where, size, val); } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 94a948207cd2..7de73589d8e2 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -481,7 +481,7 @@ void __init smp_setup_cpu_maps(void) __be32 cpu_be; int j, len; - DBG(" * %s...\n", dn->full_name); + DBG(" * %pOF...\n", dn); intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); @@ -704,30 +704,6 @@ int check_legacy_ioport(unsigned long base_port) } EXPORT_SYMBOL(check_legacy_ioport); -static int ppc_panic_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - /* - * If firmware-assisted dump has been registered then trigger - * firmware-assisted dump and let firmware handle everything else. - */ - crash_fadump(NULL, ptr); - ppc_md.panic(ptr); /* May not return */ - return NOTIFY_DONE; -} - -static struct notifier_block ppc_panic_block = { - .notifier_call = ppc_panic_event, - .priority = INT_MIN /* may not return; must be done last */ -}; - -void __init setup_panic(void) -{ - if (!ppc_md.panic) - return; - atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block); -} - #ifdef CONFIG_CHECK_CACHE_COHERENCY /* * For platforms that have configurable cache-coherency. This function @@ -872,9 +848,6 @@ void __init setup_arch(char **cmdline_p) /* Probe the machine type, establish ppc_md. */ probe_machine(); - /* Setup panic notifier if requested by the platform. */ - setup_panic(); - /* * Configure ppc_md.power_save (ppc32 only, 64-bit machines do * it from their respective probe() function. @@ -916,13 +889,6 @@ void __init setup_arch(char **cmdline_p) /* Reserve large chunks of memory for use by CMA for KVM. */ kvm_cma_reserve(); - /* - * Reserve any gigantic pages requested on the command line. - * memblock needs to have been initialized by the time this is - * called since this will reserve memory. - */ - reserve_hugetlb_gpages(); - klp_init_thread_info(&init_thread_info); init_mm.start_code = (unsigned long)_stext; diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 2f88f6cf1a42..51ebc01fff52 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -98,6 +98,9 @@ extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */ notrace void __init machine_init(u64 dt_ptr) { + unsigned int *addr = &memset_nocache_branch; + unsigned long insn; + /* Configure static keys first, now that we're relocated. */ setup_feature_keys(); @@ -105,7 +108,9 @@ notrace void __init machine_init(u64 dt_ptr) udbg_early_init(); patch_instruction((unsigned int *)&memcpy, PPC_INST_NOP); - patch_instruction(&memset_nocache_branch, PPC_INST_NOP); + + insn = create_cond_branch(addr, branch_target(addr), 0x820000); + patch_instruction(addr, insn); /* replace b by bne cr0 */ /* Do some early initialization based on the flat device tree */ early_init_devtree(__va(dt_ptr)); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index af23d4b576ec..b89c6aac48c9 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -564,6 +564,9 @@ static __init u64 safe_stack_limit(void) /* Other BookE, we assume the first GB is bolted */ return 1ul << 30; #else + if (early_radix_enabled()) + return ULONG_MAX; + /* BookS, the first segment is bolted */ if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) return 1UL << SID_SHIFT_1T; @@ -578,7 +581,8 @@ void __init irqstack_early_init(void) /* * Interrupt stacks must be in the first segment since we - * cannot afford to take SLB misses on them. + * cannot afford to take SLB misses on them. They are not + * accessed in realmode. */ for_each_possible_cpu(i) { softirq_ctx[i] = (struct thread_info *) @@ -649,8 +653,9 @@ void __init emergency_stack_init(void) * aligned. * * Since we use these as temporary stacks during secondary CPU - * bringup, we need to get at them in real mode. This means they - * must also be within the RMO region. + * bringup, machine check, system reset, and HMI, we need to get + * at them in real mode. This means they must also be within the RMO + * region. * * The IRQ stacks allocated elsewhere in this file are zeroed and * initialized in kernel/irq.c. These are initialized here in order @@ -751,3 +756,31 @@ unsigned long memory_block_size_bytes(void) struct ppc_pci_io ppc_pci_io; EXPORT_SYMBOL(ppc_pci_io); #endif + +#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF +u64 hw_nmi_get_sample_period(int watchdog_thresh) +{ + return ppc_proc_freq * watchdog_thresh; +} +#endif + +/* + * The perf based hardlockup detector breaks PMU event based branches, so + * disable it by default. Book3S has a soft-nmi hardlockup detector based + * on the decrementer interrupt, so it does not suffer from this problem. + * + * It is likely to get false positives in VM guests, so disable it there + * by default too. + */ +static int __init disable_hardlockup_detector(void) +{ +#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF + hardlockup_detector_disable(); +#else + if (firmware_has_feature(FW_FEATURE_LPAR)) + hardlockup_detector_disable(); +#endif + + return 0; +} +early_initcall(disable_hardlockup_detector); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8d3320562c70..e0a4c1f82e25 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -75,9 +75,11 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 }; struct thread_info *secondary_ti; DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); +DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map); DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); +EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); /* SMP operations for this machine */ @@ -571,6 +573,26 @@ static void smp_store_cpu_info(int id) #endif } +/* + * Relationships between CPUs are maintained in a set of per-cpu cpumasks so + * rather than just passing around the cpumask we pass around a function that + * returns the that cpumask for the given CPU. + */ +static void set_cpus_related(int i, int j, struct cpumask *(*get_cpumask)(int)) +{ + cpumask_set_cpu(i, get_cpumask(j)); + cpumask_set_cpu(j, get_cpumask(i)); +} + +#ifdef CONFIG_HOTPLUG_CPU +static void set_cpus_unrelated(int i, int j, + struct cpumask *(*get_cpumask)(int)) +{ + cpumask_clear_cpu(i, get_cpumask(j)); + cpumask_clear_cpu(j, get_cpumask(i)); +} +#endif + void __init smp_prepare_cpus(unsigned int max_cpus) { unsigned int cpu; @@ -590,6 +612,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus) for_each_possible_cpu(cpu) { zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu), GFP_KERNEL, cpu_to_node(cpu)); + zalloc_cpumask_var_node(&per_cpu(cpu_l2_cache_map, cpu), + GFP_KERNEL, cpu_to_node(cpu)); zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu), GFP_KERNEL, cpu_to_node(cpu)); /* @@ -602,7 +626,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) } } + /* Init the cpumasks so the boot CPU is related to itself */ cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); + cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid)); cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); if (smp_ops && smp_ops->probe) @@ -828,33 +854,6 @@ int cpu_first_thread_of_core(int core) } EXPORT_SYMBOL_GPL(cpu_first_thread_of_core); -static void traverse_siblings_chip_id(int cpu, bool add, int chipid) -{ - const struct cpumask *mask; - struct device_node *np; - int i, plen; - const __be32 *prop; - - mask = add ? cpu_online_mask : cpu_present_mask; - for_each_cpu(i, mask) { - np = of_get_cpu_node(i, NULL); - if (!np) - continue; - prop = of_get_property(np, "ibm,chip-id", &plen); - if (prop && plen == sizeof(int) && - of_read_number(prop, 1) == chipid) { - if (add) { - cpumask_set_cpu(cpu, cpu_core_mask(i)); - cpumask_set_cpu(i, cpu_core_mask(cpu)); - } else { - cpumask_clear_cpu(cpu, cpu_core_mask(i)); - cpumask_clear_cpu(i, cpu_core_mask(cpu)); - } - } - of_node_put(np); - } -} - /* Must be called when no change can occur to cpu_present_mask, * i.e. during cpu online or offline. */ @@ -877,52 +876,93 @@ static struct device_node *cpu_to_l2cache(int cpu) return cache; } -static void traverse_core_siblings(int cpu, bool add) +static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int)) { struct device_node *l2_cache, *np; - const struct cpumask *mask; - int i, chip, plen; - const __be32 *prop; - - /* First see if we have ibm,chip-id properties in cpu nodes */ - np = of_get_cpu_node(cpu, NULL); - if (np) { - chip = -1; - prop = of_get_property(np, "ibm,chip-id", &plen); - if (prop && plen == sizeof(int)) - chip = of_read_number(prop, 1); - of_node_put(np); - if (chip >= 0) { - traverse_siblings_chip_id(cpu, add, chip); - return; - } - } + int i; l2_cache = cpu_to_l2cache(cpu); - mask = add ? cpu_online_mask : cpu_present_mask; - for_each_cpu(i, mask) { + if (!l2_cache) + return false; + + for_each_cpu(i, cpu_online_mask) { + /* + * when updating the marks the current CPU has not been marked + * online, but we need to update the cache masks + */ np = cpu_to_l2cache(i); if (!np) continue; - if (np == l2_cache) { - if (add) { - cpumask_set_cpu(cpu, cpu_core_mask(i)); - cpumask_set_cpu(i, cpu_core_mask(cpu)); - } else { - cpumask_clear_cpu(cpu, cpu_core_mask(i)); - cpumask_clear_cpu(i, cpu_core_mask(cpu)); - } - } + + if (np == l2_cache) + set_cpus_related(cpu, i, mask_fn); + of_node_put(np); } of_node_put(l2_cache); + + return true; } +#ifdef CONFIG_HOTPLUG_CPU +static void remove_cpu_from_masks(int cpu) +{ + int i; + + /* NB: cpu_core_mask is a superset of the others */ + for_each_cpu(i, cpu_core_mask(cpu)) { + set_cpus_unrelated(cpu, i, cpu_core_mask); + set_cpus_unrelated(cpu, i, cpu_l2_cache_mask); + set_cpus_unrelated(cpu, i, cpu_sibling_mask); + } +} +#endif + +static void add_cpu_to_masks(int cpu) +{ + int first_thread = cpu_first_thread_sibling(cpu); + int chipid = cpu_to_chip_id(cpu); + int i; + + /* + * This CPU will not be in the online mask yet so we need to manually + * add it to it's own thread sibling mask. + */ + cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); + + for (i = first_thread; i < first_thread + threads_per_core; i++) + if (cpu_online(i)) + set_cpus_related(i, cpu, cpu_sibling_mask); + + /* + * Copy the thread sibling mask into the cache sibling mask + * and mark any CPUs that share an L2 with this CPU. + */ + for_each_cpu(i, cpu_sibling_mask(cpu)) + set_cpus_related(cpu, i, cpu_l2_cache_mask); + update_mask_by_l2(cpu, cpu_l2_cache_mask); + + /* + * Copy the cache sibling mask into core sibling mask and mark + * any CPUs on the same chip as this CPU. + */ + for_each_cpu(i, cpu_l2_cache_mask(cpu)) + set_cpus_related(cpu, i, cpu_core_mask); + + if (chipid == -1) + return; + + for_each_cpu(i, cpu_online_mask) + if (cpu_to_chip_id(i) == chipid) + set_cpus_related(cpu, i, cpu_core_mask); +} + +static bool shared_caches; + /* Activate a secondary processor. */ void start_secondary(void *unused) { unsigned int cpu = smp_processor_id(); - int i, base; mmgrab(&init_mm); current->active_mm = &init_mm; @@ -945,22 +985,15 @@ void start_secondary(void *unused) vdso_getcpu_init(); #endif - /* Update sibling maps */ - base = cpu_first_thread_sibling(cpu); - for (i = 0; i < threads_per_core; i++) { - if (cpu_is_offline(base + i) && (cpu != base + i)) - continue; - cpumask_set_cpu(cpu, cpu_sibling_mask(base + i)); - cpumask_set_cpu(base + i, cpu_sibling_mask(cpu)); + /* Update topology CPU masks */ + add_cpu_to_masks(cpu); - /* cpu_core_map should be a superset of - * cpu_sibling_map even if we don't have cache - * information, so update the former here, too. - */ - cpumask_set_cpu(cpu, cpu_core_mask(base + i)); - cpumask_set_cpu(base + i, cpu_core_mask(cpu)); - } - traverse_core_siblings(cpu, true); + /* + * Check for any shared caches. Note that this must be done on a + * per-core basis because one core in the pair might be disabled. + */ + if (!cpumask_equal(cpu_l2_cache_mask(cpu), cpu_sibling_mask(cpu))) + shared_caches = true; set_numa_node(numa_cpu_lookup_table[cpu]); set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); @@ -1003,6 +1036,35 @@ static struct sched_domain_topology_level powerpc_topology[] = { { NULL, }, }; +/* + * P9 has a slightly odd architecture where pairs of cores share an L2 cache. + * This topology makes it *much* cheaper to migrate tasks between adjacent cores + * since the migrated task remains cache hot. We want to take advantage of this + * at the scheduler level so an extra topology level is required. + */ +static int powerpc_shared_cache_flags(void) +{ + return SD_SHARE_PKG_RESOURCES; +} + +/* + * We can't just pass cpu_l2_cache_mask() directly because + * returns a non-const pointer and the compiler barfs on that. + */ +static const struct cpumask *shared_cache_mask(int cpu) +{ + return cpu_l2_cache_mask(cpu); +} + +static struct sched_domain_topology_level power9_topology[] = { +#ifdef CONFIG_SCHED_SMT + { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, +#endif + { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) }, + { cpu_cpu_mask, SD_INIT_NAME(DIE) }, + { NULL, }, +}; + void __init smp_cpus_done(unsigned int max_cpus) { /* @@ -1016,14 +1078,23 @@ void __init smp_cpus_done(unsigned int max_cpus) dump_numa_cpu_topology(); - set_sched_topology(powerpc_topology); + /* + * If any CPU detects that it's sharing a cache with another CPU then + * use the deeper topology that is aware of this sharing. + */ + if (shared_caches) { + pr_info("Using shared cache scheduler topology\n"); + set_sched_topology(power9_topology); + } else { + pr_info("Using standard scheduler topology\n"); + set_sched_topology(powerpc_topology); + } } #ifdef CONFIG_HOTPLUG_CPU int __cpu_disable(void) { int cpu = smp_processor_id(); - int base, i; int err; if (!smp_ops->cpu_disable) @@ -1034,14 +1105,7 @@ int __cpu_disable(void) return err; /* Update sibling maps */ - base = cpu_first_thread_sibling(cpu); - for (i = 0; i < threads_per_core && base + i < nr_cpu_ids; i++) { - cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i)); - cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu)); - cpumask_clear_cpu(cpu, cpu_core_mask(base + i)); - cpumask_clear_cpu(base + i, cpu_core_mask(cpu)); - } - traverse_core_siblings(cpu, false); + remove_cpu_from_masks(cpu); return 0; } diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index 988f38dced0f..82d8aae81c6a 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -179,7 +179,7 @@ nothing_to_copy: sld r3, r3, r0 li r0, 0 1: - dcbf r0,r3 + dcbf 0,r3 addi r3,r3,0x20 bdnz 1b diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index 4d6b1d3a747f..7ccb7f81f8db 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -17,13 +17,13 @@ #include <asm/ppc_asm.h> #ifdef CONFIG_PPC64 -#define SYSCALL(func) .llong DOTSYM(sys_##func),DOTSYM(sys_##func) -#define COMPAT_SYS(func) .llong DOTSYM(sys_##func),DOTSYM(compat_sys_##func) -#define PPC_SYS(func) .llong DOTSYM(ppc_##func),DOTSYM(ppc_##func) -#define OLDSYS(func) .llong DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall) -#define SYS32ONLY(func) .llong DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func) -#define PPC64ONLY(func) .llong DOTSYM(ppc_##func),DOTSYM(sys_ni_syscall) -#define SYSX(f, f3264, f32) .llong DOTSYM(f),DOTSYM(f3264) +#define SYSCALL(func) .8byte DOTSYM(sys_##func),DOTSYM(sys_##func) +#define COMPAT_SYS(func) .8byte DOTSYM(sys_##func),DOTSYM(compat_sys_##func) +#define PPC_SYS(func) .8byte DOTSYM(ppc_##func),DOTSYM(ppc_##func) +#define OLDSYS(func) .8byte DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall) +#define SYS32ONLY(func) .8byte DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func) +#define PPC64ONLY(func) .8byte DOTSYM(ppc_##func),DOTSYM(sys_ni_syscall) +#define SYSX(f, f3264, f32) .8byte DOTSYM(f),DOTSYM(f3264) #else #define SYSCALL(func) .long sys_##func #define COMPAT_SYS(func) .long sys_##func diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index bfcfd9ef09f2..ec74e203ee04 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -114,6 +114,28 @@ static void pmac_backlight_unblank(void) static inline void pmac_backlight_unblank(void) { } #endif +/* + * If oops/die is expected to crash the machine, return true here. + * + * This should not be expected to be 100% accurate, there may be + * notifiers registered or other unexpected conditions that may bring + * down the kernel. Or if the current process in the kernel is holding + * locks or has other critical state, the kernel may become effectively + * unusable anyway. + */ +bool die_will_crash(void) +{ + if (should_fadump_crash()) + return true; + if (kexec_should_crash(current)) + return true; + if (in_interrupt() || panic_on_oops || + !current->pid || is_global_init(current)) + return true; + + return false; +} + static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; static int die_owner = -1; static unsigned int die_nest_count; @@ -162,21 +184,9 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, crash_fadump(regs, "die oops"); - /* - * A system reset (0x100) is a request to dump, so we always send - * it through the crashdump code. - */ - if (kexec_should_crash(current) || (TRAP(regs) == 0x100)) { + if (kexec_should_crash(current)) crash_kexec(regs); - /* - * We aren't the primary crash CPU. We need to send it - * to a holding pattern to avoid it ending up in the panic - * code. - */ - crash_kexec_secondary(regs); - } - if (!signr) return; @@ -202,18 +212,25 @@ NOKPROBE_SYMBOL(oops_end); static int __die(const char *str, struct pt_regs *regs, long err) { printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); -#endif -#ifdef CONFIG_SMP - printk("SMP NR_CPUS=%d ", NR_CPUS); -#endif + + if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) + printk("LE "); + else + printk("BE "); + + if (IS_ENABLED(CONFIG_PREEMPT)) + pr_cont("PREEMPT "); + + if (IS_ENABLED(CONFIG_SMP)) + pr_cont("SMP NR_CPUS=%d ", NR_CPUS); + if (debug_pagealloc_enabled()) - printk("DEBUG_PAGEALLOC "); -#ifdef CONFIG_NUMA - printk("NUMA "); -#endif - printk("%s\n", ppc_md.name ? ppc_md.name : ""); + pr_cont("DEBUG_PAGEALLOC "); + + if (IS_ENABLED(CONFIG_NUMA)) + pr_cont("NUMA "); + + pr_cont("%s\n", ppc_md.name ? ppc_md.name : ""); if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP) return 1; @@ -288,23 +305,52 @@ void system_reset_exception(struct pt_regs *regs) if (!nested) nmi_enter(); + __this_cpu_inc(irq_stat.sreset_irqs); + /* See if any machine dependent calls */ if (ppc_md.system_reset_exception) { if (ppc_md.system_reset_exception(regs)) goto out; } - die("System Reset", regs, SIGABRT); + if (debugger(regs)) + goto out; + + /* + * A system reset is a request to dump, so we always send + * it through the crashdump code (if fadump or kdump are + * registered). + */ + crash_fadump(regs, "System Reset"); + + crash_kexec(regs); + + /* + * We aren't the primary crash CPU. We need to send it + * to a holding pattern to avoid it ending up in the panic + * code. + */ + crash_kexec_secondary(regs); + + /* + * No debugger or crash dump registered, print logs then + * panic. + */ + __die("System Reset", regs, SIGABRT); + + mdelay(2*MSEC_PER_SEC); /* Wait a little while for others to print */ + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); + nmi_panic(regs, "System Reset"); out: #ifdef CONFIG_PPC_BOOK3S_64 BUG_ON(get_paca()->in_nmi == 0); if (get_paca()->in_nmi > 1) - panic("Unrecoverable nested System Reset"); + nmi_panic(regs, "Unrecoverable nested System Reset"); #endif /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) - panic("Unrecoverable System Reset"); + nmi_panic(regs, "Unrecoverable System Reset"); if (!nested) nmi_exit(); @@ -312,39 +358,6 @@ out: /* What should we do here? We could issue a shutdown or hard reset. */ } -#ifdef CONFIG_PPC64 -/* - * This function is called in real mode. Strictly no printk's please. - * - * regs->nip and regs->msr contains srr0 and ssr1. - */ -long machine_check_early(struct pt_regs *regs) -{ - long handled = 0; - - __this_cpu_inc(irq_stat.mce_exceptions); - - if (cur_cpu_spec && cur_cpu_spec->machine_check_early) - handled = cur_cpu_spec->machine_check_early(regs); - return handled; -} - -long hmi_exception_realmode(struct pt_regs *regs) -{ - __this_cpu_inc(irq_stat.hmi_exceptions); - - wait_for_subcore_guest_exit(); - - if (ppc_md.hmi_exception_early) - ppc_md.hmi_exception_early(regs); - - wait_for_tb_resync(); - - return 0; -} - -#endif - /* * I/O accesses can cause machine checks on powermacs. * Check if the NIP corresponds to the address of a sync @@ -397,11 +410,6 @@ static inline int check_io_access(struct pt_regs *regs) /* On 4xx, the reason for the machine check or program exception is in the ESR. */ #define get_reason(regs) ((regs)->dsisr) -#ifndef CONFIG_FSL_BOOKE -#define get_mc_reason(regs) ((regs)->dsisr) -#else -#define get_mc_reason(regs) (mfspr(SPRN_MCSR)) -#endif #define REASON_FP ESR_FP #define REASON_ILLEGAL (ESR_PIL | ESR_PUO) #define REASON_PRIVILEGED ESR_PPR @@ -415,108 +423,17 @@ static inline int check_io_access(struct pt_regs *regs) /* On non-4xx, the reason for the machine check or program exception is in the MSR. */ #define get_reason(regs) ((regs)->msr) -#define get_mc_reason(regs) ((regs)->msr) -#define REASON_TM 0x200000 -#define REASON_FP 0x100000 -#define REASON_ILLEGAL 0x80000 -#define REASON_PRIVILEGED 0x40000 -#define REASON_TRAP 0x20000 +#define REASON_TM SRR1_PROGTM +#define REASON_FP SRR1_PROGFPE +#define REASON_ILLEGAL SRR1_PROGILL +#define REASON_PRIVILEGED SRR1_PROGPRIV +#define REASON_TRAP SRR1_PROGTRAP #define single_stepping(regs) ((regs)->msr & MSR_SE) #define clear_single_step(regs) ((regs)->msr &= ~MSR_SE) #endif -#if defined(CONFIG_4xx) -int machine_check_4xx(struct pt_regs *regs) -{ - unsigned long reason = get_mc_reason(regs); - - if (reason & ESR_IMCP) { - printk("Instruction"); - mtspr(SPRN_ESR, reason & ~ESR_IMCP); - } else - printk("Data"); - printk(" machine check in kernel mode.\n"); - - return 0; -} - -int machine_check_440A(struct pt_regs *regs) -{ - unsigned long reason = get_mc_reason(regs); - - printk("Machine check in kernel mode.\n"); - if (reason & ESR_IMCP){ - printk("Instruction Synchronous Machine Check exception\n"); - mtspr(SPRN_ESR, reason & ~ESR_IMCP); - } - else { - u32 mcsr = mfspr(SPRN_MCSR); - if (mcsr & MCSR_IB) - printk("Instruction Read PLB Error\n"); - if (mcsr & MCSR_DRB) - printk("Data Read PLB Error\n"); - if (mcsr & MCSR_DWB) - printk("Data Write PLB Error\n"); - if (mcsr & MCSR_TLBP) - printk("TLB Parity Error\n"); - if (mcsr & MCSR_ICP){ - flush_instruction_cache(); - printk("I-Cache Parity Error\n"); - } - if (mcsr & MCSR_DCSP) - printk("D-Cache Search Parity Error\n"); - if (mcsr & MCSR_DCFP) - printk("D-Cache Flush Parity Error\n"); - if (mcsr & MCSR_IMPE) - printk("Machine Check exception is imprecise\n"); - - /* Clear MCSR */ - mtspr(SPRN_MCSR, mcsr); - } - return 0; -} - -int machine_check_47x(struct pt_regs *regs) -{ - unsigned long reason = get_mc_reason(regs); - u32 mcsr; - - printk(KERN_ERR "Machine check in kernel mode.\n"); - if (reason & ESR_IMCP) { - printk(KERN_ERR - "Instruction Synchronous Machine Check exception\n"); - mtspr(SPRN_ESR, reason & ~ESR_IMCP); - return 0; - } - mcsr = mfspr(SPRN_MCSR); - if (mcsr & MCSR_IB) - printk(KERN_ERR "Instruction Read PLB Error\n"); - if (mcsr & MCSR_DRB) - printk(KERN_ERR "Data Read PLB Error\n"); - if (mcsr & MCSR_DWB) - printk(KERN_ERR "Data Write PLB Error\n"); - if (mcsr & MCSR_TLBP) - printk(KERN_ERR "TLB Parity Error\n"); - if (mcsr & MCSR_ICP) { - flush_instruction_cache(); - printk(KERN_ERR "I-Cache Parity Error\n"); - } - if (mcsr & MCSR_DCSP) - printk(KERN_ERR "D-Cache Search Parity Error\n"); - if (mcsr & PPC47x_MCSR_GPR) - printk(KERN_ERR "GPR Parity Error\n"); - if (mcsr & PPC47x_MCSR_FPR) - printk(KERN_ERR "FPR Parity Error\n"); - if (mcsr & PPC47x_MCSR_IPR) - printk(KERN_ERR "Machine Check exception is imprecise\n"); - - /* Clear MCSR */ - mtspr(SPRN_MCSR, mcsr); - - return 0; -} -#elif defined(CONFIG_E500) +#if defined(CONFIG_E500) int machine_check_e500mc(struct pt_regs *regs) { unsigned long mcsr = mfspr(SPRN_MCSR); @@ -618,7 +535,7 @@ silent_out: int machine_check_e500(struct pt_regs *regs) { - unsigned long reason = get_mc_reason(regs); + unsigned long reason = mfspr(SPRN_MCSR); if (reason & MCSR_BUS_RBERR) { if (fsl_rio_mcheck_exception(regs)) @@ -665,7 +582,7 @@ int machine_check_generic(struct pt_regs *regs) #elif defined(CONFIG_E200) int machine_check_e200(struct pt_regs *regs) { - unsigned long reason = get_mc_reason(regs); + unsigned long reason = mfspr(SPRN_MCSR); printk("Machine check in kernel mode.\n"); printk("Caused by (from MCSR=%lx): ", reason); @@ -687,35 +604,10 @@ int machine_check_e200(struct pt_regs *regs) return 0; } -#elif defined(CONFIG_PPC_8xx) -int machine_check_8xx(struct pt_regs *regs) -{ - unsigned long reason = get_mc_reason(regs); - - pr_err("Machine check in kernel mode.\n"); - pr_err("Caused by (from SRR1=%lx): ", reason); - if (reason & 0x40000000) - pr_err("Fetch error at address %lx\n", regs->nip); - else - pr_err("Data access error at address %lx\n", regs->dar); - -#ifdef CONFIG_PCI - /* the qspan pci read routines can cause machine checks -- Cort - * - * yuck !!! that totally needs to go away ! There are better ways - * to deal with that than having a wart in the mcheck handler. - * -- BenH - */ - bad_page_fault(regs, regs->dar, SIGBUS); - return 1; -#else - return 0; -#endif -} -#else +#elif defined(CONFIG_PPC32) int machine_check_generic(struct pt_regs *regs) { - unsigned long reason = get_mc_reason(regs); + unsigned long reason = regs->msr; printk("Machine check in kernel mode.\n"); printk("Caused by (from SRR1=%lx): ", reason); @@ -752,10 +644,14 @@ int machine_check_generic(struct pt_regs *regs) void machine_check_exception(struct pt_regs *regs) { - enum ctx_state prev_state = exception_enter(); int recover = 0; + bool nested = in_nmi(); + if (!nested) + nmi_enter(); - __this_cpu_inc(irq_stat.mce_exceptions); + /* 64s accounts the mce in machine_check_early when in HVMODE */ + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !cpu_has_feature(CPU_FTR_HVMODE)) + __this_cpu_inc(irq_stat.mce_exceptions); add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); @@ -783,10 +679,11 @@ void machine_check_exception(struct pt_regs *regs) /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) - panic("Unrecoverable Machine check"); + nmi_panic(regs, "Unrecoverable Machine check"); bail: - exception_exit(prev_state); + if (!nested) + nmi_exit(); } void SMIException(struct pt_regs *regs) @@ -1672,24 +1569,6 @@ void performance_monitor_exception(struct pt_regs *regs) perf_irq(regs); } -#ifdef CONFIG_8xx -void SoftwareEmulation(struct pt_regs *regs) -{ - CHECK_FULL_REGS(regs); - - if (!user_mode(regs)) { - debugger(regs); - die("Kernel Mode Unimplemented Instruction or SW FPU Emulation", - regs, SIGFPE); - } - - if (!emulate_math(regs)) - return; - - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); -} -#endif /* CONFIG_8xx */ - #ifdef CONFIG_PPC_ADV_DEBUG_REGS static void handle_debug(struct pt_regs *regs, unsigned long debug_status) { diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index 003b20964ea0..5d105b8eeece 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -205,3 +205,12 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs return orig_ret_vaddr; } + +bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, + struct pt_regs *regs) +{ + if (ctx == RP_CHECK_CHAIN_CALL) + return regs->gpr[1] <= ret->stack; + else + return regs->gpr[1] < ret->stack; +} diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index 6b2b69616e77..769c2624e0a6 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -232,15 +232,9 @@ __do_get_tspec: lwz r6,(CFG_TB_ORIG_STAMP+4)(r9) /* Get a stable TB value */ -#ifdef CONFIG_8xx -2: mftbu r3 - mftbl r4 - mftbu r0 -#else -2: mfspr r3, SPRN_TBRU - mfspr r4, SPRN_TBRL - mfspr r0, SPRN_TBRU -#endif +2: MFTBU(r3) + MFTBL(r4) + MFTBU(r0) cmplw cr0,r3,r0 bne- 2b diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index b1a250560198..882628fa6987 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -8,7 +8,7 @@ #include <asm/cache.h> #include <asm/thread_info.h> -#ifdef CONFIG_STRICT_KERNEL_RWX +#if defined(CONFIG_STRICT_KERNEL_RWX) && !defined(CONFIG_PPC32) #define STRICT_ALIGN_SIZE (1 << 24) #else #define STRICT_ALIGN_SIZE PAGE_SIZE diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 34721a257a77..2f6eadd9408d 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -216,6 +216,9 @@ void soft_nmi_interrupt(struct pt_regs *regs) return; nmi_enter(); + + __this_cpu_inc(irq_stat.soft_nmi_irqs); + tb = get_tb(); if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) { per_cpu(wd_timer_tb, cpu) = tb; |