From 7e91c7df29b5e196de3dc6f086c8937973bd0b88 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Tue, 7 Feb 2017 19:58:02 +0200 Subject: swiotlb-xen: implement xen_swiotlb_dma_mmap callback This function creates userspace mapping for the DMA-coherent memory. Signed-off-by: Stefano Stabellini Signed-off-by: Oleksandr Dmytryshyn Signed-off-by: Andrii Anisov Signed-off-by: Konrad Rzeszutek Wilk --- arch/arm/xen/mm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index bd62d94f8ac5..cd1684e4e864 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -198,6 +198,7 @@ static struct dma_map_ops xen_swiotlb_dma_ops = { .unmap_page = xen_swiotlb_unmap_page, .dma_supported = xen_swiotlb_dma_supported, .set_dma_mask = xen_swiotlb_set_dma_mask, + .mmap = xen_swiotlb_dma_mmap, }; int __init xen_mm_init(void) -- cgit From 69369f52d28a34c84acb6f2a8a585e743441566a Mon Sep 17 00:00:00 2001 From: Andrii Anisov Date: Tue, 7 Feb 2017 19:58:03 +0200 Subject: swiotlb-xen: implement xen_swiotlb_get_sgtable callback Signed-off-by: Andrii Anisov Signed-off-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/arm/xen/mm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index cd1684e4e864..76ea48a614e1 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -199,6 +199,7 @@ static struct dma_map_ops xen_swiotlb_dma_ops = { .dma_supported = xen_swiotlb_dma_supported, .set_dma_mask = xen_swiotlb_set_dma_mask, .mmap = xen_swiotlb_dma_mmap, + .get_sgtable = xen_swiotlb_get_sgtable, }; int __init xen_mm_init(void) -- cgit From c74fd80f2f41d05f350bb478151021f88551afe8 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Fri, 13 Jan 2017 15:07:51 -0500 Subject: xen: do not re-use pirq number cached in pci device msi msg data Revert the main part of commit: af42b8d12f8a ("xen: fix MSI setup and teardown for PV on HVM guests") That commit introduced reading the pci device's msi message data to see if a pirq was previously configured for the device's msi/msix, and re-use that pirq. At the time, that was the correct behavior. However, a later change to Qemu caused it to call into the Xen hypervisor to unmap all pirqs for a pci device, when the pci device disables its MSI/MSIX vectors; specifically the Qemu commit: c976437c7dba9c7444fb41df45468968aaa326ad ("qemu-xen: free all the pirqs for msi/msix when driver unload") Once Qemu added this pirq unmapping, it was no longer correct for the kernel to re-use the pirq number cached in the pci device msi message data. All Qemu releases since 2.1.0 contain the patch that unmaps the pirqs when the pci device disables its MSI/MSIX vectors. This bug is causing failures to initialize multiple NVMe controllers under Xen, because the NVMe driver sets up a single MSIX vector for each controller (concurrently), and then after using that to talk to the controller for some configuration data, it disables the single MSIX vector and re-configures all the MSIX vectors it needs. So the MSIX setup code tries to re-use the cached pirq from the first vector for each controller, but the hypervisor has already given away that pirq to another controller, and its initialization fails. This is discussed in more detail at: https://lists.xen.org/archives/html/xen-devel/2017-01/msg00447.html Fixes: af42b8d12f8a ("xen: fix MSI setup and teardown for PV on HVM guests") Signed-off-by: Dan Streetman Reviewed-by: Stefano Stabellini Acked-by: Konrad Rzeszutek Wilk Signed-off-by: Boris Ostrovsky --- arch/x86/pci/xen.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index e1fb269c87af..292ab0364a89 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -234,23 +234,14 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return 1; for_each_pci_msi_entry(msidesc, dev) { - __pci_read_msi_msg(msidesc, &msg); - pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | - ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); - if (msg.data != XEN_PIRQ_MSI_DATA || - xen_irq_from_pirq(pirq) < 0) { - pirq = xen_allocate_pirq_msi(dev, msidesc); - if (pirq < 0) { - irq = -ENODEV; - goto error; - } - xen_msi_compose_msg(dev, pirq, &msg); - __pci_write_msi_msg(msidesc, &msg); - dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); - } else { - dev_dbg(&dev->dev, - "xen: msi already bound to pirq=%d\n", pirq); + pirq = xen_allocate_pirq_msi(dev, msidesc); + if (pirq < 0) { + irq = -ENODEV; + goto error; } + xen_msi_compose_msg(dev, pirq, &msg); + __pci_write_msi_msg(msidesc, &msg); + dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, (type == PCI_CAP_ID_MSI) ? nvec : 1, (type == PCI_CAP_ID_MSIX) ? -- cgit From 75013fb16f8484898eaa8d0b08fed942d790f029 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 1 Mar 2017 01:23:24 +0900 Subject: kprobes/x86: Fix kernel panic when certain exception-handling addresses are probed Fix to the exception table entry check by using probed address instead of the address of copied instruction. This bug may cause unexpected kernel panic if user probe an address where an exception can happen which should be fixup by __ex_table (e.g. copy_from_user.) Unless user puts a kprobe on such address, this doesn't cause any problem. This bug has been introduced years ago, by commit: 464846888d9a ("x86/kprobes: Fix a bug which can modify kernel code permanently"). Signed-off-by: Masami Hiramatsu Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 464846888d9a ("x86/kprobes: Fix a bug which can modify kernel code permanently") Link: http://lkml.kernel.org/r/148829899399.28855.12581062400757221722.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/common.h | 2 +- arch/x86/kernel/kprobes/core.c | 6 +++--- arch/x86/kernel/kprobes/opt.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h index c6ee63f927ab..d688826e5736 100644 --- a/arch/x86/kernel/kprobes/common.h +++ b/arch/x86/kernel/kprobes/common.h @@ -67,7 +67,7 @@ #endif /* Ensure if the instruction can be boostable */ -extern int can_boost(kprobe_opcode_t *instruction); +extern int can_boost(kprobe_opcode_t *instruction, void *addr); /* Recover instruction if given address is probed */ extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 520b8dfe1640..88b3c942473d 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -166,12 +166,12 @@ NOKPROBE_SYMBOL(skip_prefixes); * Returns non-zero if opcode is boostable. * RIP relative instructions are adjusted at copying time in 64 bits mode */ -int can_boost(kprobe_opcode_t *opcodes) +int can_boost(kprobe_opcode_t *opcodes, void *addr) { kprobe_opcode_t opcode; kprobe_opcode_t *orig_opcodes = opcodes; - if (search_exception_tables((unsigned long)opcodes)) + if (search_exception_tables((unsigned long)addr)) return 0; /* Page fault may occur on this address. */ retry: @@ -416,7 +416,7 @@ static int arch_copy_kprobe(struct kprobe *p) * __copy_instruction can modify the displacement of the instruction, * but it doesn't affect boostable check. */ - if (can_boost(p->ainsn.insn)) + if (can_boost(p->ainsn.insn, p->addr)) p->ainsn.boostable = 0; else p->ainsn.boostable = -1; diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 3d1bee9d6a72..3e7c6e5a08ff 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -178,7 +178,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src) while (len < RELATIVEJUMP_SIZE) { ret = __copy_instruction(dest + len, src + len); - if (!ret || !can_boost(dest + len)) + if (!ret || !can_boost(dest + len, src + len)) return -EINVAL; len += ret; } -- cgit From 10bce8410607a18eb3adf5d2739db8c8593e110d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 27 Feb 2017 23:50:58 +0100 Subject: x86/kdebugfs: Move boot params hierarchy under (debugfs)/x86/ ... since this is all x86-specific data and it makes sense to have it under x86/ logically instead in the toplevel debugfs dir. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170227225058.27289-1-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/kdebugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index bdb83e431d89..38b64587b31b 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -167,7 +167,7 @@ static int __init boot_params_kdebugfs_init(void) struct dentry *dbp, *version, *data; int error = -ENOMEM; - dbp = debugfs_create_dir("boot_params", NULL); + dbp = debugfs_create_dir("boot_params", arch_debugfs_dir); if (!dbp) return -ENOMEM; -- cgit From e7c95effcd3a7a0c9535c809141ca499fede2c31 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 28 Feb 2017 07:05:59 +0100 Subject: s390/crypt: fix missing unlock in ctr_paes_crypt on error path The ctr mode of protected key aes uses the ctrblk page if the ctrblk_lock could be acquired. If the protected key has to be reestablished and this operation fails the unlock for the ctrblk_lock is missing. Add it. Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/paes_s390.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index d69ea495c4d7..716b17238599 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -474,8 +474,11 @@ static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier, ret = blkcipher_walk_done(desc, walk, nbytes - n); } if (k < n) { - if (__ctr_paes_set_key(ctx) != 0) + if (__ctr_paes_set_key(ctx) != 0) { + if (locked) + spin_unlock(&ctrblk_lock); return blkcipher_walk_done(desc, walk, -EIO); + } } } if (locked) -- cgit From d9fcf2a1cbd1ff65d0109b1b400938808007fcd5 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 28 Feb 2017 07:42:01 +0100 Subject: s390: fix in-kernel program checks A program check inside the kernel takes a slightly different path in entry.S compare to a normal user fault. A recent change moved the store of the breaking event address into the path taken for in-kernel program checks as well, but %r14 has not been setup to point to the correct location. A wild store is the consequence. Move the store of the breaking event address to the code path for user space faults. Fixes: 34525e1f7e8d ("s390: store breaking event address only for program checks") Reported-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index dff2152350a7..6a7d737d514c 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -490,7 +490,7 @@ ENTRY(pgm_check_handler) jnz .Lpgm_svcper # -> single stepped svc 1: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 3f + j 4f 2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER lg %r15,__LC_KERNEL_STACK lgr %r14,%r12 @@ -499,8 +499,8 @@ ENTRY(pgm_check_handler) tm __LC_PGM_ILC+2,0x02 # check for transaction abort jz 3f mvc __THREAD_trap_tdb(256,%r14),0(%r13) -3: la %r11,STACK_FRAME_OVERHEAD(%r15) - stg %r10,__THREAD_last_break(%r14) +3: stg %r10,__THREAD_last_break(%r14) +4: la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC stmg %r8,%r9,__PT_PSW(%r11) @@ -509,14 +509,14 @@ ENTRY(pgm_check_handler) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) stg %r10,__PT_ARGS(%r11) tm __LC_PGM_ILC+3,0x80 # check for per exception - jz 4f + jz 5f tmhh %r8,0x0001 # kernel per event ? jz .Lpgm_kprobe oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID -4: REENABLE_IRQS +5: REENABLE_IRQS xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) larl %r1,pgm_check_table llgh %r10,__PT_INT_CODE+2(%r11) -- cgit From e69ca822ce0ed3ba006ce384d7d205c81d92373f Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 1 Mar 2017 09:16:03 +0100 Subject: s390/cputime: remove last traces of cputime_t The cputime_t type is a thing of the past, replace the last occurences of the type in the s390 code with a simple u64. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cputime.h | 14 ++------------ arch/s390/kernel/vtime.c | 2 +- 2 files changed, 3 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index d1c407ddf703..e5672d6276a6 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -8,32 +8,22 @@ #define _S390_CPUTIME_H #include -#include #define CPUTIME_PER_USEC 4096ULL #define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC) /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ -typedef unsigned long long __nocast cputime_t; -typedef unsigned long long __nocast cputime64_t; - #define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new) -static inline unsigned long __div(unsigned long long n, unsigned long base) -{ - return n / base; -} - /* * Convert cputime to microseconds and back. */ -static inline unsigned int cputime_to_usecs(const cputime_t cputime) +static inline u64 cputime_to_usecs(const u64 cputime) { - return (__force unsigned long long) cputime >> 12; + return cputime >> 12; } - u64 arch_cpu_idle_time(int cpu); #define arch_idle_time(cpu) arch_cpu_idle_time(cpu) diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 31bd96e81167..8f5f59a151b4 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -111,7 +111,7 @@ static inline u64 scale_vtime(u64 vtime) } static void account_system_index_scaled(struct task_struct *p, - cputime_t cputime, cputime_t scaled, + u64 cputime, u64 scaled, enum cpu_usage_stat index) { p->stimescaled += cputime_to_nsecs(scaled); -- cgit From 3c915bdc1775acfa214195da1ffb39dabdd1a389 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 1 Mar 2017 09:18:34 +0100 Subject: s390/cputime: reset all accounting fields on fork copy_thread has to reset all cputime related field in the task struct, not only user_timer and system_timer. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/process.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 54281660582c..249deafaa6ee 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -121,7 +121,10 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp, clear_tsk_thread_flag(p, TIF_SINGLE_STEP); /* Initialize per thread user and system timer values */ p->thread.user_timer = 0; + p->thread.guest_timer = 0; p->thread.system_timer = 0; + p->thread.hardirq_timer = 0; + p->thread.softirq_timer = 0; frame->sf.back_chain = 0; /* new return point is ret_from_fork */ -- cgit From e53051e757d6cd66741955b93581e54415e48a70 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 1 Mar 2017 09:21:10 +0100 Subject: s390/cputime: provide archicture specific cputime_to_nsecs The generic cputime_to_nsecs function first converts the cputime to micro-seconds and then multiplies the result with 1000. This looses some bits of accuracy, provide our own version of cputime_to_nsecs that does not loose precision. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cputime.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index e5672d6276a6..9072bf63a846 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -8,6 +8,7 @@ #define _S390_CPUTIME_H #include +#include #define CPUTIME_PER_USEC 4096ULL #define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC) @@ -17,13 +18,18 @@ #define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new) /* - * Convert cputime to microseconds and back. + * Convert cputime to microseconds. */ static inline u64 cputime_to_usecs(const u64 cputime) { return cputime >> 12; } +/* + * Convert cputime to nanoseconds. + */ +#define cputime_to_nsecs(cputime) tod_to_ns(cputime) + u64 arch_cpu_idle_time(int cpu); #define arch_idle_time(cpu) arch_cpu_idle_time(cpu) -- cgit From d03bd0454b101adb94d0b5a9cc11396182943cb4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 1 Mar 2017 09:47:57 +0100 Subject: s390/timex: micro optimization for tod_to_ns The conversion of a TOD value to nano-seconds currently uses a 32/32 bit split with the calculation for "nsecs = (TOD * 125) >> 9". Using a 55/9 bit split saves an instruction. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/timex.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 354344dcc198..118535123f34 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -206,20 +206,16 @@ static inline unsigned long long get_tod_clock_monotonic(void) * ns = (todval * 125) >> 9; * * In order to avoid an overflow with the multiplication we can rewrite this. - * With a split todval == 2^32 * th + tl (th upper 32 bits, tl lower 32 bits) + * With a split todval == 2^9 * th + tl (th upper 55 bits, tl lower 9 bits) * we end up with * - * ns = ((2^32 * th + tl) * 125 ) >> 9; - * -> ns = (2^23 * th * 125) + ((tl * 125) >> 9); + * ns = ((2^9 * th + tl) * 125 ) >> 9; + * -> ns = (th * 125) + ((tl * 125) >> 9); * */ static inline unsigned long long tod_to_ns(unsigned long long todval) { - unsigned long long ns; - - ns = ((todval >> 32) << 23) * 125; - ns += ((todval & 0xffffffff) * 125) >> 9; - return ns; + return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9); } #endif -- cgit From bb3f0a52630c84807fca9bdd76ac2f5dcec82689 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Tue, 28 Feb 2017 13:50:52 +0800 Subject: x86/apic: Fix a warning message in logical CPU IDs allocation The current warning message in allocate_logical_cpuid() is somewhat confusing: Only 1 processors supported.Processor 2/0x2 and the rest are ignored. As it might imply that there's only one CPU in the system - while what we ran into here is a kernel limitation. Fix the warning message to clarify all that: APIC: NR_CPUS/possible_cpus limit of 2 reached. Processor 2/0x2 and the rest are ignored. ( Also update the error return from -1 to -EINVAL, which is the more canonical return value. ) Signed-off-by: Dou Liyang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bp@alien8.de Cc: nicstange@gmail.com Cc: wanpeng.li@hotmail.com Link: http://lkml.kernel.org/r/1488261052-25753-1-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 4261b3282ad9..11088b86e5c7 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2062,10 +2062,10 @@ static int allocate_logical_cpuid(int apicid) /* Allocate a new cpuid. */ if (nr_logical_cpuids >= nr_cpu_ids) { - WARN_ONCE(1, "Only %d processors supported." + WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %i reached. " "Processor %d/0x%x and the rest are ignored.\n", - nr_cpu_ids - 1, nr_logical_cpuids, apicid); - return -1; + nr_cpu_ids, nr_logical_cpuids, apicid); + return -EINVAL; } cpuid_to_apicid[nr_logical_cpuids] = apicid; -- cgit From 11277aabcbbe13916151af897d29a5e9f71ca73f Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Thu, 23 Feb 2017 17:16:41 +0800 Subject: x86/apic: Simplify enable_IR_x2apic(), remove try_to_enable_IR() The following commit: 2e63ad4bd5dd ("x86/apic: Do not init irq remapping if ioapic is disabled") ... added a check for skipped IO-APIC setup to enable_IR_x2apic(), but this check is also duplicated in try_to_enable_IR() - and it will never succeed in calling irq_remapping_enable(). Remove the whole irq_remapping_enable() complication: if the IO-APIC is disabled we cannot enable IRQ remapping. Signed-off-by: Dou Liyang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bp@alien8.de Cc: nicstange@gmail.com Cc: wanpeng.li@hotmail.com Link: http://lkml.kernel.org/r/1487841401-1543-1-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 11088b86e5c7..aee7deddabd0 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1610,24 +1610,15 @@ static inline void try_to_enable_x2apic(int remap_mode) { } static inline void __x2apic_enable(void) { } #endif /* !CONFIG_X86_X2APIC */ -static int __init try_to_enable_IR(void) -{ -#ifdef CONFIG_X86_IO_APIC - if (!x2apic_enabled() && skip_ioapic_setup) { - pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); - return -1; - } -#endif - return irq_remapping_enable(); -} - void __init enable_IR_x2apic(void) { unsigned long flags; int ret, ir_stat; - if (skip_ioapic_setup) + if (skip_ioapic_setup) { + pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); return; + } ir_stat = irq_remapping_prepare(); if (ir_stat < 0 && !x2apic_supported()) @@ -1645,7 +1636,7 @@ void __init enable_IR_x2apic(void) /* If irq_remapping_prepare() succeeded, try to enable it */ if (ir_stat >= 0) - ir_stat = try_to_enable_IR(); + ir_stat = irq_remapping_enable(); /* ir_stat contains the remap mode or an error code */ try_to_enable_x2apic(ir_stat); -- cgit From 6b0b7551428e4caae1e2c023a529465a9a9ae2d4 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 16 Feb 2017 17:00:50 +1100 Subject: perf/core: Rename CONFIG_[UK]PROBE_EVENT to CONFIG_[UK]PROBE_EVENTS We have uses of CONFIG_UPROBE_EVENT and CONFIG_KPROBE_EVENT as well as CONFIG_UPROBE_EVENTS and CONFIG_KPROBE_EVENTS. Consistently use the plurals. Signed-off-by: Anton Blanchard Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@kernel.org Cc: alexander.shishkin@linux.intel.com Cc: davem@davemloft.net Cc: sparclinux@vger.kernel.org Link: http://lkml.kernel.org/r/20170216060050.20866-1-anton@ozlabs.org Signed-off-by: Ingo Molnar --- arch/powerpc/configs/85xx/kmp204x_defconfig | 2 +- arch/s390/configs/default_defconfig | 2 +- arch/s390/configs/gcov_defconfig | 2 +- arch/s390/configs/performance_defconfig | 2 +- arch/s390/defconfig | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/configs/85xx/kmp204x_defconfig b/arch/powerpc/configs/85xx/kmp204x_defconfig index aaaaa609cd24..34a4da23f000 100644 --- a/arch/powerpc/configs/85xx/kmp204x_defconfig +++ b/arch/powerpc/configs/85xx/kmp204x_defconfig @@ -210,7 +210,7 @@ CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_SCHEDSTATS=y CONFIG_RCU_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD4=y diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 143b1e00b818..4b176fe83da4 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -609,7 +609,7 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y CONFIG_TRACE_ENUM_MAP_FILE=y diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index f05d2d6e1087..0de46cc397f6 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -560,7 +560,7 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y CONFIG_TRACE_ENUM_MAP_FILE=y diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 2358bf33c5ef..e167557b434c 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -558,7 +558,7 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y CONFIG_TRACE_ENUM_MAP_FILE=y diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 68bfd09f1b02..97189dbaf34b 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -179,7 +179,7 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_TRACE_ENUM_MAP_FILE=y CONFIG_KPROBES_SANITY_TEST=y -- cgit From 1b17c6df852851b40c3c27c66b8fa2fd99cf25d8 Mon Sep 17 00:00:00 2001 From: Andrew Banman Date: Fri, 17 Feb 2017 11:07:49 -0600 Subject: x86/platform/uv/BAU: Fix HUB errors by remove initial write to sw-ack register Writing to the software acknowledge clear register when there are no pending messages causes a HUB error to assert. The original intent of this write was to clear the pending bits before start of operation, but this is an incorrect method and has been determined to be unnecessary. Signed-off-by: Andrew Banman Acked-by: Mike Travis Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: rja@hpe.com Cc: sivanich@hpe.com Link: http://lkml.kernel.org/r/1487351269-181133-1-git-send-email-abanman@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/platform/uv/tlb_uv.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 766d4d3529a1..f25982cdff90 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1847,7 +1847,6 @@ static void pq_init(int node, int pnode) ops.write_payload_first(pnode, first); ops.write_payload_last(pnode, last); - ops.write_g_sw_ack(pnode, 0xffffUL); /* in effect, all msg_type's are set to MSG_NOOP */ memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE); -- cgit From 90b28ded88dda8bea82b4a86923e73ba0746d884 Mon Sep 17 00:00:00 2001 From: Matjaz Hegedic Date: Sun, 19 Feb 2017 19:32:48 +0100 Subject: x86/reboot/quirks: Add ASUS EeeBook X205TA reboot quirk Without the parameter reboot=a, ASUS EeeBook X205TA will hang when it should reboot. This adds the appropriate quirk, thus fixing the problem. Signed-off-by: Matjaz Hegedic Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index e244c19a2451..01c9cda3e1b7 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -223,6 +223,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "P4S800"), }, }, + { /* Handle problems with rebooting on ASUS EeeBook X205TA */ + .callback = set_acpi_reboot, + .ident = "ASUS EeeBook X205TA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"), + }, + }, /* Certec */ { /* Handle problems with rebooting on Certec BPC600 */ -- cgit From 73667e31a153a66da97feb1584726222504924f8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Feb 2017 22:17:17 +0100 Subject: x86/hyperv: Hide unused label This new 32-bit warning just showed up: arch/x86/hyperv/hv_init.c: In function 'hyperv_init': arch/x86/hyperv/hv_init.c:167:1: error: label 'register_msr_cs' defined but not used [-Werror=unused-label] The easiest solution is to move the label up into the existing #ifdef that has the goto. Fixes: dee863b571b0 ("hv: export current Hyper-V clocksource") Signed-off-by: Arnd Bergmann Acked-by: Stephen Hemminger Cc: Greg Kroah-Hartman Cc: Haiyang Zhang Cc: devel@linuxdriverproject.org Cc: Vitaly Kuznetsov Cc: "K. Y. Srinivasan" Link: http://lkml.kernel.org/r/20170214211736.2641241-1-arnd@arndb.de Signed-off-by: Thomas Gleixner --- arch/x86/hyperv/hv_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index db64baf0e500..8bef70e7f3cc 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -158,13 +158,13 @@ void hyperv_init(void) clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); return; } +register_msr_cs: #endif /* * For 32 bit guests just use the MSR based mechanism for reading * the partition counter. */ -register_msr_cs: hyperv_cs = &hyperv_cs_msr; if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); -- cgit From aa5ec3f715d576353c7d8f4f8085634bd845b73f Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Mon, 27 Feb 2017 21:29:22 +0900 Subject: x86/vmware: Remove duplicate inclusion of asm/timer.h Signed-off-by: Masanari Iida Cc: akataria@vmware.com Cc: virtualization@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20170227122922.26230-1-standby24x7@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/vmware.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 891f4dad7b2c..22403a28caf5 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -30,7 +30,6 @@ #include #include #include -#include #undef pr_fmt #define pr_fmt(fmt) "vmware: " fmt -- cgit From e86a2d2d34e20289ae7d46692e16d43c3a785db9 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Mon, 27 Feb 2017 22:07:03 +0900 Subject: x86/intel_rdt: Remove duplicate inclusion of linux/cpu.h Signed-off-by: Masanari Iida Cc: fenghua.yu@intel.com Link: http://lkml.kernel.org/r/20170227130703.26968-1-standby24x7@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 8af04afdfcb9..759577d9d166 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include -- cgit From 153654dbe595a68845ba14d5b0bfe299fa6a7e99 Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Tue, 28 Feb 2017 21:34:28 +0800 Subject: x86/PCI: Implement pcibios_release_device to release IRQ from IOAPIC The revert of 991de2e59090 ("PCI, x86: Implement pcibios_alloc_irq() and pcibios_free_irq()") causes a problem for IOAPIC hotplug. The problem is that IRQs are allocated and freed in pci_enable_device() and pci_disable_device(). But there are some drivers which don't call pci_disable_device(), and they have good reasons not calling it, so if they're using IOAPIC their IRQs won't have a chance to be released from the IOAPIC. When this happens IOAPIC hot-removal fails with a kernel stack dump and an error message like this: [149335.697989] pin16 on IOAPIC2 is still in use. It turns out that we can fix it in a different way without moving IRQ allocation into pcibios_alloc_irq(), thus avoiding the regression of 991de2e59090. We can keep the allocation and freeing of IRQs as is within pci_enable_device()/pci_disable_device(), without breaking any previous assumption of the rest of the system, keeping compatibility with both the legacy and the modern drivers. We can accomplish this by implementing the existing __weak hook of pcibios_release_device() thus when a pci device is about to be deleted we get notified in the hook and take the chance to release its IRQ, if any, from the IOAPIC. Implement pcibios_release_device() for x86 to release any IRQ not released by the driver. Signed-off-by: Rui Wang Cc: tony.luck@intel.com Cc: linux-pci@vger.kernel.org Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: fengguang.wu@intel.com Cc: helgaas@kernel.org Cc: kbuild-all@01.org Cc: bhelgaas@google.com Link: http://lkml.kernel.org/r/1488288869-31290-2-git-send-email-rui.y.wang@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/pci/common.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 0cb52ae0a8f0..190e718694b1 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -735,6 +735,15 @@ void pcibios_disable_device (struct pci_dev *dev) pcibios_disable_irq(dev); } +#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC +void pcibios_release_device(struct pci_dev *dev) +{ + if (atomic_dec_return(&dev->enable_cnt) >= 0) + pcibios_disable_device(dev); + +} +#endif + int pci_ext_cfg_avail(void) { if (raw_pci_ext_ops) -- cgit From 58ab9a088ddac4efe823471275859d64f735577e Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 23 Feb 2017 14:26:03 -0800 Subject: x86/pkeys: Check against max pkey to avoid overflows Kirill reported a warning from UBSAN about undefined behavior when using protection keys. He is running on hardware that actually has support for it, which is not widely available. The warning triggers because of very large shifts of integers when doing a pkey_free() of a large, invalid value. This happens because we never check that the pkey "fits" into the mm_pkey_allocation_map(). I do not believe there is any danger here of anything bad happening other than some aliasing issues where somebody could do: pkey_free(35); and the kernel would effectively execute: pkey_free(8); While this might be confusing to an app that was doing something stupid, it has to do something stupid and the effects are limited to the app shooting itself in the foot. Signed-off-by: Dave Hansen Cc: stable@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: shuah@kernel.org Cc: kirill.shutemov@linux.intel.com Link: http://lkml.kernel.org/r/20170223222603.A022ED65@viggo.jf.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/pkeys.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index 34684adb6899..b3b09b98896d 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -46,6 +46,15 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) { + /* + * "Allocated" pkeys are those that have been returned + * from pkey_alloc(). pkey 0 is special, and never + * returned from pkey_alloc(). + */ + if (pkey <= 0) + return false; + if (pkey >= arch_max_pkey()) + return false; return mm_pkey_allocation_map(mm) & (1U << pkey); } @@ -82,12 +91,6 @@ int mm_pkey_alloc(struct mm_struct *mm) static inline int mm_pkey_free(struct mm_struct *mm, int pkey) { - /* - * pkey 0 is special, always allocated and can never - * be freed. - */ - if (!pkey) - return -EINVAL; if (!mm_pkey_is_allocated(mm, pkey)) return -EINVAL; -- cgit From 940b2f2fd963c043418ce8af64605783b2b19140 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 18 Feb 2017 12:31:40 +0100 Subject: x86/events: Remove last remnants of old filenames Update to the new file paths, remove them from introductory comments. Signed-off-by: Borislav Petkov Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170218113140.8051-1-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/events/amd/core.c | 2 +- arch/x86/events/intel/cstate.c | 2 +- arch/x86/events/intel/rapl.c | 2 +- arch/x86/events/intel/uncore.h | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index afb222b63cae..c84584bb9402 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -604,7 +604,7 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx, return &amd_f15_PMC20; } case AMD_EVENT_NB: - /* moved to perf_event_amd_uncore.c */ + /* moved to uncore.c */ return &emptyconstraint; default: return &emptyconstraint; diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index aff4b5b69d40..238ae3248ba5 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -1,5 +1,5 @@ /* - * perf_event_intel_cstate.c: support cstate residency counters + * Support cstate residency counters * * Copyright (C) 2015, Intel Corp. * Author: Kan Liang (kan.liang@intel.com) diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 22054ca49026..9d05c7e67f60 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -1,5 +1,5 @@ /* - * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters + * Support Intel RAPL energy consumption counters * Copyright (C) 2013 Google, Inc., Stephane Eranian * * Intel RAPL interface is specified in the IA-32 Manual Vol3b diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index ad986c1e29bc..df5989f27b1b 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -360,7 +360,7 @@ extern struct list_head pci2phy_map_head; extern struct pci_extra_dev *uncore_extra_pci_dev; extern struct event_constraint uncore_constraint_empty; -/* perf_event_intel_uncore_snb.c */ +/* uncore_snb.c */ int snb_uncore_pci_init(void); int ivb_uncore_pci_init(void); int hsw_uncore_pci_init(void); @@ -371,7 +371,7 @@ void nhm_uncore_cpu_init(void); void skl_uncore_cpu_init(void); int snb_pci2phy_map_init(int devid); -/* perf_event_intel_uncore_snbep.c */ +/* uncore_snbep.c */ int snbep_uncore_pci_init(void); void snbep_uncore_cpu_init(void); int ivbep_uncore_pci_init(void); @@ -385,5 +385,5 @@ void knl_uncore_cpu_init(void); int skx_uncore_pci_init(void); void skx_uncore_cpu_init(void); -/* perf_event_intel_uncore_nhmex.c */ +/* uncore_nhmex.c */ void nhmex_uncore_cpu_init(void); -- cgit From 72042a8c7b01048a36ece216aaf206b7d60ca661 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Mon, 20 Feb 2017 10:12:35 +1100 Subject: x86/purgatory: Make functions and variables static Sparse emits several 'symbol not declared' warnings for various functions and variables. Add static keyword to functions and variables which have file scope only. Signed-off-by: Tobin C. Harding Link: http://lkml.kernel.org/r/1487545956-2547-2-git-send-email-me@tobin.cc Signed-off-by: Thomas Gleixner --- arch/x86/purgatory/purgatory.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 25e068ba3382..2a5f4373c797 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -18,11 +18,11 @@ struct sha_region { unsigned long len; }; -unsigned long backup_dest = 0; -unsigned long backup_src = 0; -unsigned long backup_sz = 0; +static unsigned long backup_dest; +static unsigned long backup_src; +static unsigned long backup_sz; -u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 }; +static u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 }; struct sha_region sha_regions[16] = {}; @@ -39,7 +39,7 @@ static int copy_backup_region(void) return 0; } -int verify_sha256_digest(void) +static int verify_sha256_digest(void) { struct sha_region *ptr, *end; u8 digest[SHA256_DIGEST_SIZE]; -- cgit From e98fe5127b9cc8ab33d1094b81c19deb1f9082bf Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Mon, 20 Feb 2017 10:12:36 +1100 Subject: x86/purgatory: Fix sparse warning, symbol not declared Sparse emits warning, 'symbol not declared' for a function that has neither file scope nor a forward declaration. The functions only call site is an ASM file. Add a header file with the function declaration. Include the header file in the C source file defining the function in order to fix the sparse warning. Include the header file in ASM file containing the call site to document the usage. Signed-off-by: Tobin C. Harding Link: http://lkml.kernel.org/r/1487545956-2547-3-git-send-email-me@tobin.cc Signed-off-by: Thomas Gleixner --- arch/x86/purgatory/purgatory.c | 1 + arch/x86/purgatory/purgatory.h | 8 ++++++++ arch/x86/purgatory/setup-x86_64.S | 1 + 3 files changed, 10 insertions(+) create mode 100644 arch/x86/purgatory/purgatory.h (limited to 'arch') diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 2a5f4373c797..b6d5c8946e66 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -11,6 +11,7 @@ */ #include "sha256.h" +#include "purgatory.h" #include "../boot/string.h" struct sha_region { diff --git a/arch/x86/purgatory/purgatory.h b/arch/x86/purgatory/purgatory.h new file mode 100644 index 000000000000..e2e365a6c192 --- /dev/null +++ b/arch/x86/purgatory/purgatory.h @@ -0,0 +1,8 @@ +#ifndef PURGATORY_H +#define PURGATORY_H + +#ifndef __ASSEMBLY__ +extern void purgatory(void); +#endif /* __ASSEMBLY__ */ + +#endif /* PURGATORY_H */ diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S index fe3c91ba1bd0..f90e9dfa90bb 100644 --- a/arch/x86/purgatory/setup-x86_64.S +++ b/arch/x86/purgatory/setup-x86_64.S @@ -9,6 +9,7 @@ * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. */ +#include "purgatory.h" .text .globl purgatory_start -- cgit From 8392f16d38bb5222c03073a3906b7fd272386faf Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 21 Feb 2017 19:36:39 +0100 Subject: x86/boot: Correct setup_header.start_sys name It is called start_sys_seg elsewhere so rename it to that. It is an obsolete field so we could just as well directly call it __u16 __pad... No functional change. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20170221183639.16554-1-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/include/uapi/asm/bootparam.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 5138dacf8bb8..07244ea16765 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -58,7 +58,7 @@ struct setup_header { __u32 header; __u16 version; __u32 realmode_swtch; - __u16 start_sys; + __u16 start_sys_seg; __u16 kernel_version; __u8 type_of_loader; __u8 loadflags; -- cgit From f94c8d116997597fc00f0812b0ab9256e7b0c58f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Mar 2017 15:53:38 +0100 Subject: sched/clock, x86/tsc: Rework the x86 'unstable' sched_clock() interface Wanpeng Li reported that since the following commit: acb04058de49 ("sched/clock: Fix hotplug crash") ... KVM always runs with unstable sched-clock even though KVM's kvm_clock _is_ stable. The problem is that we've tied clear_sched_clock_stable() to the TSC state, and overlooked that sched_clock() is a paravirt function. Solve this by doing two things: - tie the sched_clock() stable state more clearly to the TSC stable state for the normal (!paravirt) case. - only call clear_sched_clock_stable() when we mark TSC unstable when we use native_sched_clock(). The first means we can actually run with stable sched_clock in more situations then before, which is good. And since commit: 12907fbb1a69 ("sched/clock, clocksource: Add optional cs::mark_unstable() method") ... this should be reliable. Since any detection of TSC fail now results in marking the TSC unstable. Reported-by: Wanpeng Li Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Fixes: acb04058de49 ("sched/clock: Fix hotplug crash") Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 4 ---- arch/x86/kernel/cpu/centaur.c | 2 -- arch/x86/kernel/cpu/common.c | 3 --- arch/x86/kernel/cpu/cyrix.c | 1 - arch/x86/kernel/cpu/intel.c | 4 ---- arch/x86/kernel/cpu/transmeta.c | 2 -- arch/x86/kernel/tsc.c | 35 +++++++++++++++++++++++------------ 7 files changed, 23 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 4e95b2e0d95f..30d924ae5c34 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -555,10 +555,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (check_tsc_unstable()) - clear_sched_clock_stable(); - } else { - clear_sched_clock_stable(); } /* Bit 12 of 8000_0007 edx is accumulated power mechanism. */ diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 2c234a6d94c4..bad8ff078a21 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -104,8 +104,6 @@ static void early_init_centaur(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); #endif - - clear_sched_clock_stable(); } static void init_centaur(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c64ca5929cb5..9d98e2e15d54 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -86,7 +86,6 @@ static void default_init(struct cpuinfo_x86 *c) strcpy(c->x86_model_id, "386"); } #endif - clear_sched_clock_stable(); } static const struct cpu_dev default_cpu = { @@ -1075,8 +1074,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) */ if (this_cpu->c_init) this_cpu->c_init(c); - else - clear_sched_clock_stable(); /* Disable the PN if appropriate */ squash_the_stupid_serial_number(c); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 47416f959a48..31e679238e8d 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -184,7 +184,6 @@ static void early_init_cyrix(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); break; } - clear_sched_clock_stable(); } static void init_cyrix(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 017ecd3bb553..2388bafe5c37 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -161,10 +161,6 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (check_tsc_unstable()) - clear_sched_clock_stable(); - } else { - clear_sched_clock_stable(); } /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index c1ea5b999839..6a9ad73a2c54 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -15,8 +15,6 @@ static void early_init_transmeta(struct cpuinfo_x86 *c) if (xlvl >= 0x80860001) c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001); } - - clear_sched_clock_stable(); } static void init_transmeta(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 2724dc82f992..911129fda2f9 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -326,9 +326,16 @@ unsigned long long sched_clock(void) { return paravirt_sched_clock(); } + +static inline bool using_native_sched_clock(void) +{ + return pv_time_ops.sched_clock == native_sched_clock; +} #else unsigned long long sched_clock(void) __attribute__((alias("native_sched_clock"))); + +static inline bool using_native_sched_clock(void) { return true; } #endif int check_tsc_unstable(void) @@ -1111,8 +1118,10 @@ static void tsc_cs_mark_unstable(struct clocksource *cs) { if (tsc_unstable) return; + tsc_unstable = 1; - clear_sched_clock_stable(); + if (using_native_sched_clock()) + clear_sched_clock_stable(); disable_sched_clock_irqtime(); pr_info("Marking TSC unstable due to clocksource watchdog\n"); } @@ -1134,18 +1143,20 @@ static struct clocksource clocksource_tsc = { void mark_tsc_unstable(char *reason) { - if (!tsc_unstable) { - tsc_unstable = 1; + if (tsc_unstable) + return; + + tsc_unstable = 1; + if (using_native_sched_clock()) clear_sched_clock_stable(); - disable_sched_clock_irqtime(); - pr_info("Marking TSC unstable due to %s\n", reason); - /* Change only the rating, when not registered */ - if (clocksource_tsc.mult) - clocksource_mark_unstable(&clocksource_tsc); - else { - clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; - clocksource_tsc.rating = 0; - } + disable_sched_clock_irqtime(); + pr_info("Marking TSC unstable due to %s\n", reason); + /* Change only the rating, when not registered */ + if (clocksource_tsc.mult) { + clocksource_mark_unstable(&clocksource_tsc); + } else { + clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; + clocksource_tsc.rating = 0; } } -- cgit From bb1a2c26165640ba2cbcfe06c81e9f9d6db4e643 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 1 Mar 2017 21:10:17 +0100 Subject: x86/hpet: Prevent might sleep splat on resume Sergey reported a might sleep warning triggered from the hpet resume path. It's caused by the call to disable_irq() from interrupt disabled context. The problem with the low level resume code is that it is not accounted as a special system_state like we do during the boot process. Calling the same code during system boot would not trigger the warning. That's inconsistent at best. In this particular case it's trivial to replace the disable_irq() with disable_hardirq() because this particular code path is solely used from system resume and the involved hpet interrupts can never be force threaded. Reported-and-tested-by: Sergey Senozhatsky Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Sergey Senozhatsky Cc: Borislav Petkov Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1703012108460.3684@nanos Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index dc6ba5bda9fc..89ff7af2de50 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -354,7 +354,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer) irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq)); irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); - disable_irq(hdev->irq); + disable_hardirq(hdev->irq); irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); enable_irq(hdev->irq); } -- cgit From 7afbeb6df2aa5f9e3a0fc228817a85c16dea0faa Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Feb 2017 12:56:12 +0100 Subject: s390/ipl: always use load normal for CCW-type re-IPL commit 14890678687c ("s390/ipl: use load normal for LPAR re-ipl") missed to convert one code path to use load normal semantics for re-IPL. Convert the missing code path as well. Fixes: 14890678687c ("s390/ipl: use load normal for LPAR re-ipl") Reported-by: Michael Holzheu Acked-by: Michael Holzheu Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ipl.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index b67dafb7b7cf..e545ffe5155a 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -564,6 +564,8 @@ static struct kset *ipl_kset; static void __ipl_run(void *unused) { + if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW) + diag308(DIAG308_LOAD_NORMAL_DUMP, NULL); diag308(DIAG308_LOAD_CLEAR, NULL); if (MACHINE_IS_VM) __cpcmd("IPL", NULL, 0, NULL); -- cgit From 2e4d88009f57057df7672fa69a32b5224af54d37 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Thu, 2 Mar 2017 15:23:42 +0100 Subject: KVM: s390: Fix guest migration for huge guests resulting in panic While we can technically not run huge page guests right now, we can setup a guest with huge pages. Trying to migrate it will trigger a VM_BUG_ON and, if the kernel is not configured to panic on a BUG, it will happily try to work on non-existing page table entries. With this patch, we always return "dirty" if we encounter a large page when migrating. This at least fixes the immediate problem until we have proper handling for both kind of pages. Fixes: 15f36eb ("KVM: s390: Add proper dirty bitmap support to S390 kvm.") Cc: # 3.16+ Signed-off-by: Janosch Frank Acked-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index b48dc5f1900b..463e5ef02304 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -608,12 +608,29 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) { spinlock_t *ptl; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; pgste_t pgste; pte_t *ptep; pte_t pte; bool dirty; - ptep = get_locked_pte(mm, addr, &ptl); + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (!pud) + return false; + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return false; + /* We can't run guests backed by huge pages, but userspace can + * still set them up and then try to migrate them without any + * migration support. + */ + if (pmd_large(*pmd)) + return true; + + ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (unlikely(!ptep)) return false; -- cgit From e148bd17f48bd17fca2f4f089ec879fa6e47e34c Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 14 Feb 2017 14:46:42 +0530 Subject: powerpc: Emulation support for load/store instructions on LE emulate_step() uses a number of underlying kernel functions that were initially not enabled for LE. This has been rectified since. So, fix emulate_step() for LE for the corresponding instructions. Cc: stable@vger.kernel.org # v3.18+ Reported-by: Anton Blanchard Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman --- arch/powerpc/lib/sstep.c | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 846dba2c6360..9c542ec70c5b 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1799,8 +1799,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case LARX: - if (regs->msr & MSR_LE) - return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ if (!address_ok(regs, op.ea, size)) @@ -1823,8 +1821,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case STCX: - if (regs->msr & MSR_LE) - return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ if (!address_ok(regs, op.ea, size)) @@ -1849,8 +1845,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case LOAD: - if (regs->msr & MSR_LE) - return 0; err = read_mem(®s->gpr[op.reg], op.ea, size, regs); if (!err) { if (op.type & SIGNEXT) @@ -1862,8 +1856,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef CONFIG_PPC_FPU case LOAD_FP: - if (regs->msr & MSR_LE) - return 0; if (size == 4) err = do_fp_load(op.reg, do_lfs, op.ea, size, regs); else @@ -1872,15 +1864,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #endif #ifdef CONFIG_ALTIVEC case LOAD_VMX: - if (regs->msr & MSR_LE) - return 0; err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs); goto ldst_done; #endif #ifdef CONFIG_VSX case LOAD_VSX: - if (regs->msr & MSR_LE) - return 0; err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs); goto ldst_done; #endif @@ -1903,8 +1891,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case STORE: - if (regs->msr & MSR_LE) - return 0; if ((op.type & UPDATE) && size == sizeof(long) && op.reg == 1 && op.update_reg == 1 && !(regs->msr & MSR_PR) && @@ -1917,8 +1903,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef CONFIG_PPC_FPU case STORE_FP: - if (regs->msr & MSR_LE) - return 0; if (size == 4) err = do_fp_store(op.reg, do_stfs, op.ea, size, regs); else @@ -1927,15 +1911,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #endif #ifdef CONFIG_ALTIVEC case STORE_VMX: - if (regs->msr & MSR_LE) - return 0; err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs); goto ldst_done; #endif #ifdef CONFIG_VSX case STORE_VSX: - if (regs->msr & MSR_LE) - return 0; err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs); goto ldst_done; #endif -- cgit From 4ceae137bdab2232e57b2e6fd5631a22a0160938 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 14 Feb 2017 14:46:43 +0530 Subject: powerpc: emulate_step() tests for load/store instructions Add new selftest that test emulate_step for Normal, Floating Point, Vector and Vector Scalar - load/store instructions. Test should run at boot time if CONFIG_KPROBES_SANITY_TEST and CONFIG_PPC64 is set. Sample log: emulate_step_test: ld : PASS emulate_step_test: lwz : PASS emulate_step_test: lwzx : PASS emulate_step_test: std : PASS emulate_step_test: ldarx / stdcx. : PASS emulate_step_test: lfsx : PASS emulate_step_test: stfsx : PASS emulate_step_test: lfdx : PASS emulate_step_test: stfdx : PASS emulate_step_test: lvx : PASS emulate_step_test: stvx : PASS emulate_step_test: lxvd2x : PASS emulate_step_test: stxvd2x : PASS Signed-off-by: Ravi Bangoria [mpe: Drop start/complete lines, make it all __init] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 7 + arch/powerpc/lib/Makefile | 1 + arch/powerpc/lib/test_emulate_step.c | 434 ++++++++++++++++++++++++++++++++++ 3 files changed, 442 insertions(+) create mode 100644 arch/powerpc/lib/test_emulate_step.c (limited to 'arch') diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index d99bd442aacb..e7d6d86563ee 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -284,6 +284,13 @@ #define PPC_INST_BRANCH_COND 0x40800000 #define PPC_INST_LBZCIX 0x7c0006aa #define PPC_INST_STBCIX 0x7c0007aa +#define PPC_INST_LWZX 0x7c00002e +#define PPC_INST_LFSX 0x7c00042e +#define PPC_INST_STFSX 0x7c00052e +#define PPC_INST_LFDX 0x7c0004ae +#define PPC_INST_STFDX 0x7c0005ae +#define PPC_INST_LVX 0x7c0000ce +#define PPC_INST_STVX 0x7c0001ce /* macros to insert fields into opcodes */ #define ___PPC_RA(a) (((a) & 0x1f) << 16) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 0e649d72fe8d..2b5e09020cfe 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -20,6 +20,7 @@ obj64-y += copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \ obj64-$(CONFIG_SMP) += locks.o obj64-$(CONFIG_ALTIVEC) += vmx-helper.o +obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o obj-y += checksum_$(BITS).o checksum_wrappers.o diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c new file mode 100644 index 000000000000..2534c1447554 --- /dev/null +++ b/arch/powerpc/lib/test_emulate_step.c @@ -0,0 +1,434 @@ +/* + * Simple sanity test for emulate_step load/store instructions. + * + * Copyright IBM Corp. 2016 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#define pr_fmt(fmt) "emulate_step_test: " fmt + +#include +#include +#include + +#define IMM_L(i) ((uintptr_t)(i) & 0xffff) + +/* + * Defined with TEST_ prefix so it does not conflict with other + * definitions. + */ +#define TEST_LD(r, base, i) (PPC_INST_LD | ___PPC_RT(r) | \ + ___PPC_RA(base) | IMM_L(i)) +#define TEST_LWZ(r, base, i) (PPC_INST_LWZ | ___PPC_RT(r) | \ + ___PPC_RA(base) | IMM_L(i)) +#define TEST_LWZX(t, a, b) (PPC_INST_LWZX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STD(r, base, i) (PPC_INST_STD | ___PPC_RS(r) | \ + ___PPC_RA(base) | ((i) & 0xfffc)) +#define TEST_LDARX(t, a, b, eh) (PPC_INST_LDARX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b) | \ + __PPC_EH(eh)) +#define TEST_STDCX(s, a, b) (PPC_INST_STDCX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LFSX(t, a, b) (PPC_INST_LFSX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STFSX(s, a, b) (PPC_INST_STFSX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LFDX(t, a, b) (PPC_INST_LFDX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STFDX(s, a, b) (PPC_INST_STFDX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LVX(t, a, b) (PPC_INST_LVX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STVX(s, a, b) (PPC_INST_STVX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LXVD2X(s, a, b) (PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b)) +#define TEST_STXVD2X(s, a, b) (PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b)) + + +static void __init init_pt_regs(struct pt_regs *regs) +{ + static unsigned long msr; + static bool msr_cached; + + memset(regs, 0, sizeof(struct pt_regs)); + + if (likely(msr_cached)) { + regs->msr = msr; + return; + } + + asm volatile("mfmsr %0" : "=r"(regs->msr)); + + regs->msr |= MSR_FP; + regs->msr |= MSR_VEC; + regs->msr |= MSR_VSX; + + msr = regs->msr; + msr_cached = true; +} + +static void __init show_result(char *ins, char *result) +{ + pr_info("%-14s : %s\n", ins, result); +} + +static void __init test_ld(void) +{ + struct pt_regs regs; + unsigned long a = 0x23; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + + /* ld r5, 0(r3) */ + stepped = emulate_step(®s, TEST_LD(5, 3, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("ld", "PASS"); + else + show_result("ld", "FAIL"); +} + +static void __init test_lwz(void) +{ + struct pt_regs regs; + unsigned int a = 0x4545; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + + /* lwz r5, 0(r3) */ + stepped = emulate_step(®s, TEST_LWZ(5, 3, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("lwz", "PASS"); + else + show_result("lwz", "FAIL"); +} + +static void __init test_lwzx(void) +{ + struct pt_regs regs; + unsigned int a[3] = {0x0, 0x0, 0x1234}; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) a; + regs.gpr[4] = 8; + regs.gpr[5] = 0x8765; + + /* lwzx r5, r3, r4 */ + stepped = emulate_step(®s, TEST_LWZX(5, 3, 4)); + if (stepped == 1 && regs.gpr[5] == a[2]) + show_result("lwzx", "PASS"); + else + show_result("lwzx", "FAIL"); +} + +static void __init test_std(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + regs.gpr[5] = 0x5678; + + /* std r5, 0(r3) */ + stepped = emulate_step(®s, TEST_STD(5, 3, 0)); + if (stepped == 1 || regs.gpr[5] == a) + show_result("std", "PASS"); + else + show_result("std", "FAIL"); +} + +static void __init test_ldarx_stdcx(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + unsigned long cr0_eq = 0x1 << 29; /* eq bit of CR0 */ + + init_pt_regs(®s); + asm volatile("mfcr %0" : "=r"(regs.ccr)); + + + /*** ldarx ***/ + + regs.gpr[3] = (unsigned long) &a; + regs.gpr[4] = 0; + regs.gpr[5] = 0x5678; + + /* ldarx r5, r3, r4, 0 */ + stepped = emulate_step(®s, TEST_LDARX(5, 3, 4, 0)); + + /* + * Don't touch 'a' here. Touching 'a' can do Load/store + * of 'a' which result in failure of subsequent stdcx. + * Instead, use hardcoded value for comparison. + */ + if (stepped <= 0 || regs.gpr[5] != 0x1234) { + show_result("ldarx / stdcx.", "FAIL (ldarx)"); + return; + } + + + /*** stdcx. ***/ + + regs.gpr[5] = 0x9ABC; + + /* stdcx. r5, r3, r4 */ + stepped = emulate_step(®s, TEST_STDCX(5, 3, 4)); + + /* + * Two possible scenarios that indicates successful emulation + * of stdcx. : + * 1. Reservation is active and store is performed. In this + * case cr0.eq bit will be set to 1. + * 2. Reservation is not active and store is not performed. + * In this case cr0.eq bit will be set to 0. + */ + if (stepped == 1 && ((regs.gpr[5] == a && (regs.ccr & cr0_eq)) + || (regs.gpr[5] != a && !(regs.ccr & cr0_eq)))) + show_result("ldarx / stdcx.", "PASS"); + else + show_result("ldarx / stdcx.", "FAIL (stdcx.)"); +} + +#ifdef CONFIG_PPC_FPU +static void __init test_lfsx_stfsx(void) +{ + struct pt_regs regs; + union { + float a; + int b; + } c; + int cached_b; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lfsx ***/ + + c.a = 123.45; + cached_b = c.b; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lfsx frt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LFSX(10, 3, 4)); + + if (stepped == 1) + show_result("lfsx", "PASS"); + else + show_result("lfsx", "FAIL"); + + + /*** stfsx ***/ + + c.a = 678.91; + + /* stfsx frs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STFSX(10, 3, 4)); + + if (stepped == 1 && c.b == cached_b) + show_result("stfsx", "PASS"); + else + show_result("stfsx", "FAIL"); +} + +static void __init test_lfdx_stfdx(void) +{ + struct pt_regs regs; + union { + double a; + long b; + } c; + long cached_b; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lfdx ***/ + + c.a = 123456.78; + cached_b = c.b; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lfdx frt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LFDX(10, 3, 4)); + + if (stepped == 1) + show_result("lfdx", "PASS"); + else + show_result("lfdx", "FAIL"); + + + /*** stfdx ***/ + + c.a = 987654.32; + + /* stfdx frs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STFDX(10, 3, 4)); + + if (stepped == 1 && c.b == cached_b) + show_result("stfdx", "PASS"); + else + show_result("stfdx", "FAIL"); +} +#else +static void __init test_lfsx_stfsx(void) +{ + show_result("lfsx", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)"); +} + +static void __init test_lfdx_stfdx(void) +{ + show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)"); +} +#endif /* CONFIG_PPC_FPU */ + +#ifdef CONFIG_ALTIVEC +static void __init test_lvx_stvx(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c; + u32 cached_b[4]; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lvx ***/ + + cached_b[0] = c.b[0] = 923745; + cached_b[1] = c.b[1] = 2139478; + cached_b[2] = c.b[2] = 9012; + cached_b[3] = c.b[3] = 982134; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lvx vrt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LVX(10, 3, 4)); + + if (stepped == 1) + show_result("lvx", "PASS"); + else + show_result("lvx", "FAIL"); + + + /*** stvx ***/ + + c.b[0] = 4987513; + c.b[1] = 84313948; + c.b[2] = 71; + c.b[3] = 498532; + + /* stvx vrs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STVX(10, 3, 4)); + + if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && + cached_b[2] == c.b[2] && cached_b[3] == c.b[3]) + show_result("stvx", "PASS"); + else + show_result("stvx", "FAIL"); +} +#else +static void __init test_lvx_stvx(void) +{ + show_result("lvx", "SKIP (CONFIG_ALTIVEC is not set)"); + show_result("stvx", "SKIP (CONFIG_ALTIVEC is not set)"); +} +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_VSX +static void __init test_lxvd2x_stxvd2x(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c; + u32 cached_b[4]; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lxvd2x ***/ + + cached_b[0] = c.b[0] = 18233; + cached_b[1] = c.b[1] = 34863571; + cached_b[2] = c.b[2] = 834; + cached_b[3] = c.b[3] = 6138911; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lxvd2x vsr39, r3, r4 */ + stepped = emulate_step(®s, TEST_LXVD2X(39, 3, 4)); + + if (stepped == 1) + show_result("lxvd2x", "PASS"); + else + show_result("lxvd2x", "FAIL"); + + + /*** stxvd2x ***/ + + c.b[0] = 21379463; + c.b[1] = 87; + c.b[2] = 374234; + c.b[3] = 4; + + /* stxvd2x vsr39, r3, r4 */ + stepped = emulate_step(®s, TEST_STXVD2X(39, 3, 4)); + + if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && + cached_b[2] == c.b[2] && cached_b[3] == c.b[3]) + show_result("stxvd2x", "PASS"); + else + show_result("stxvd2x", "FAIL"); +} +#else +static void __init test_lxvd2x_stxvd2x(void) +{ + show_result("lxvd2x", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvd2x", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +static int __init test_emulate_step(void) +{ + test_ld(); + test_lwz(); + test_lwzx(); + test_std(); + test_ldarx_stdcx(); + test_lfsx_stfsx(); + test_lfdx_stfdx(); + test_lvx_stvx(); + test_lxvd2x_stxvd2x(); + + return 0; +} +late_initcall(test_emulate_step); -- cgit From 7a70d7288c926ae88e0c773fbb506aa374e99c2d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 27 Feb 2017 14:32:41 +1100 Subject: powerpc/64: Invalidate process table caching after setting process table The POWER9 MMU reads and caches entries from the process table. When we kexec from one kernel to another, the second kernel sets its process table pointer but doesn't currently do anything to make the CPU invalidate any cached entries from the old process table. This adds a tlbie (TLB invalidate entry) instruction with parameters to invalidate caching of the process table after the new process table is installed. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/mm/pgtable-radix.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index feeda90cd06d..01c94f141164 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -186,6 +186,10 @@ static void __init radix_init_pgtable(void) */ register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12); pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd); + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } static void __init radix_init_partition_table(void) -- cgit From 424f8acd328a111319ae30bf384e5dfb9bc8f8cb Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Mon, 27 Feb 2017 11:10:07 +0530 Subject: powerpc/powernv: Fix bug due to labeling ambiguity in power_enter_stop Commit 09206b600c76 ("powernv: Pass PSSCR value and mask to power9_idle_stop") added additional code in power_enter_stop() to distinguish between stop requests whose PSSCR had ESL=EC=1 from those which did not. When ESL=EC=1, we do a forward-jump to a location labelled by "1", which had the code to handle the ESL=EC=1 case. Unfortunately just a couple of instructions before this label, is the macro IDLE_STATE_ENTER_SEQ() which also has a label "1" in its expansion. As a result, the current code can result in directly executing stop instruction for deep stop requests with PSSCR ESL=EC=1, without saving the hypervisor state. Fix this BUG by labeling the location that handles ESL=EC=1 case with a more descriptive label ".Lhandle_esl_ec_set" (local label suggestion a la .Lxx from Anton Blanchard). While at it, rename the label "2" labelling the location of the code handling entry into deep stop states with ".Lhandle_deep_stop". For a good measure, change the label in IDLE_STATE_ENTER_SEQ() macro to an not-so commonly used value in order to avoid similar mishaps in the future. Fixes: 09206b600c76 ("powernv: Pass PSSCR value and mask to power9_idle_stop") Signed-off-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpuidle.h | 4 ++-- arch/powerpc/kernel/idle_book3s.S | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index fd321eb423cb..155731557c9b 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -70,8 +70,8 @@ static inline void report_invalid_psscr_val(u64 psscr_val, int err) std r0,0(r1); \ ptesync; \ ld r0,0(r1); \ -1: cmpd cr0,r0,r0; \ - bne 1b; \ +236: cmpd cr0,r0,r0; \ + bne 236b; \ IDLE_INST; \ #define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 5f61cc0349c0..995728736677 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -276,19 +276,21 @@ power_enter_stop: */ andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */ - bne 1f + bne .Lhandle_esl_ec_set IDLE_STATE_ENTER_SEQ(PPC_STOP) li r3,0 /* Since we didn't lose state, return 0 */ b pnv_wakeup_noloss + +.Lhandle_esl_ec_set: /* * Check if the requested state is a deep idle state. */ -1: LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) cmpd r3,r4 - bge 2f + bge .Lhandle_deep_stop IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) -2: +.Lhandle_deep_stop: /* * Entering deep idle state. * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to -- cgit From 4dc831aa88132f835cefe876aa0206977c4d7710 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2016 13:46:20 +1100 Subject: powerpc: Fix compiling a BE kernel with a powerpc64le toolchain GCC can compile with either endian, but the default ABI version is set based on the default endianness of the toolchain. Alan Modra says: you need both -mbig and -mabi=elfv1 to make a powerpc64le gcc generate powerpc64 code The opposite is true for powerpc64 when generating -mlittle it requires -mabi=elfv2 to generate v2 ABI, which we were already doing. This change adds ABI annotations together with endianness for all cases, LE and BE. This fixes the case of building a BE kernel with a toolchain that is LE by default. Signed-off-by: Nicholas Piggin Tested-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 31286fa7873c..19b0d1a81959 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -72,8 +72,15 @@ GNUTARGET := powerpc MULTIPLEWORD := -mmultiple endif -cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) +ifdef CONFIG_PPC64 +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mcall-aixdesc) +aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) +aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mabi=elfv2 +endif + cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) ifneq ($(cc-name),clang) cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align endif @@ -113,7 +120,9 @@ ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc)) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2) else +CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc) +AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) endif CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) -- cgit From 3fb66a70a4ae886445743354e4b60e54058bb3ff Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Thu, 16 Feb 2017 09:11:29 -0600 Subject: powerpc/booke: Fix boot crash due to null hugepd On 32-bit book-e machines, hugepd_ok() no longer takes into account null hugepd values, causing this crash at boot: Unable to handle kernel paging request for data at address 0x80000000 ... NIP [c0018378] follow_huge_addr+0x38/0xf0 LR [c001836c] follow_huge_addr+0x2c/0xf0 Call Trace: follow_huge_addr+0x2c/0xf0 (unreliable) follow_page_mask+0x40/0x3e0 __get_user_pages+0xc8/0x450 get_user_pages_remote+0x8c/0x250 copy_strings+0x110/0x390 copy_strings_kernel+0x2c/0x50 do_execveat_common+0x478/0x630 do_execve+0x2c/0x40 try_to_run_init_process+0x18/0x60 kernel_init+0xbc/0x110 ret_from_kernel_thread+0x5c/0x64 This impacts all nxp (ex-freescale) 32-bit booke platforms. This was caused by the change of hugepd_t.pd from signed to unsigned, and the update to the nohash version of hugepd_ok(). Previously hugepd_ok() could exclude all non-huge and NULL pgds using > 0, whereas now we need to explicitly check that the value is not zero and also that PD_HUGE is *clear*. This isn't protected by the pgd_none() check in __find_linux_pte_or_hugepte() because on 32-bit we use pgtable-nopud.h, which causes the pgd_none() check to be always false. Fixes: 20717e1ff526 ("powerpc/mm: Fix little-endian 4K hugetlb") Cc: stable@vger.kernel.org # v4.7+ Reported-by: Madalin-Cristian Bucur Signed-off-by: Laurentiu Tudor [mpe: Flesh out change log details.] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nohash/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 0cd8a3852763..e5805ad78e12 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -230,7 +230,7 @@ static inline int hugepd_ok(hugepd_t hpd) return ((hpd_val(hpd) & 0x4) != 0); #else /* We clear the top bit to indicate hugepd */ - return ((hpd_val(hpd) & PD_HUGE) == 0); + return (hpd_val(hpd) && (hpd_val(hpd) & PD_HUGE) == 0); #endif } -- cgit From 2a9c4f40ab2c281f95d41577ff0f7f78668feb73 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 2 Mar 2017 17:41:24 +1100 Subject: powerpc/powernv: Fix opal tracepoints with JUMP_LABEL=n The recent commit to allow calling OPAL calls in real mode, commit ab9bad0ead9a ("powerpc/powernv: Remove separate entry for OPAL real mode calls"), introduced a bug when CONFIG_JUMP_LABEL=n. The commit moved the "mfmsr r12" prior to the call to OPAL_BRANCH, but we missed that OPAL_BRANCH clobbers r12 when jump labels are disabled. This leads to us using the tracepoint refcount as the MSR value, typically zero, and saving that into PACASAVEDMSR. When we return from OPAL we use that value as the MSR value for rfid, meaning we switch to 32-bit BE real mode - hilarity ensues. Fix it by using r11 in OPAL_BRANCH, which is not live at the time the macro is used in OPAL_CALL. Fixes: ab9bad0ead9a ("powerpc/powernv: Remove separate entry for OPAL real mode calls") Suggested-by: Paul Mackerras Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-wrappers.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 6693f75e93d1..da8a0f7a035c 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -39,8 +39,8 @@ opal_tracepoint_refcount: BEGIN_FTR_SECTION; \ b 1f; \ END_FTR_SECTION(0, 1); \ - ld r12,opal_tracepoint_refcount@toc(r2); \ - cmpdi r12,0; \ + ld r11,opal_tracepoint_refcount@toc(r2); \ + cmpdi r11,0; \ bne- LABEL; \ 1: -- cgit From 6ad966d7303b70165228dba1ee8da1a05c10eefe Mon Sep 17 00:00:00 2001 From: Shile Zhang Date: Sat, 4 Feb 2017 17:03:40 +0800 Subject: powerpc/64: Fix checksum folding in csum_add() Paul's patch to fix checksum folding, commit b492f7e4e07a ("powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold") missed a case in csum_add(). Fix it. Signed-off-by: Shile Zhang Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 4e63787dc3be..842124b199b5 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -112,7 +112,7 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) #ifdef __powerpc64__ res += (__force u64)addend; - return (__force __wsum)((u32)res + (res >> 32)); + return (__force __wsum) from64to32(res); #else asm("addc %0,%0,%1;" "addze %0,%0;" -- cgit From 68925176296a8b995e503349200e256674bfe5ac Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 17 Feb 2017 14:32:18 +0000 Subject: arm64: KVM: VHE: Clear HCR_TGE when invalidating guest TLBs When invalidating guest TLBs, special care must be taken to actually shoot the guest TLBs and not the host ones if we're running on a VHE system. This is controlled by the HCR_EL2.TGE bit, which we forget to clear before invalidating TLBs. Address the issue by introducing two wrappers (__tlb_switch_to_guest and __tlb_switch_to_host) that take care of both the VTTBR_EL2 and HCR_EL2.TGE switching. Reported-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Reviewed-by: Christoffer Dall Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/tlb.c | 64 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 55 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index e8e7ba2bc11f..9e1d2b75eecd 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -18,14 +18,62 @@ #include #include +static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm) +{ + u64 val; + + /* + * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and + * most TLB operations target EL2/EL0. In order to affect the + * guest TLBs (EL1/EL0), we need to change one of these two + * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so + * let's flip TGE before executing the TLB operation. + */ + write_sysreg(kvm->arch.vttbr, vttbr_el2); + val = read_sysreg(hcr_el2); + val &= ~HCR_TGE; + write_sysreg(val, hcr_el2); + isb(); +} + +static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm) +{ + write_sysreg(kvm->arch.vttbr, vttbr_el2); + isb(); +} + +static hyp_alternate_select(__tlb_switch_to_guest, + __tlb_switch_to_guest_nvhe, + __tlb_switch_to_guest_vhe, + ARM64_HAS_VIRT_HOST_EXTN); + +static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm) +{ + /* + * We're done with the TLB operation, let's restore the host's + * view of HCR_EL2. + */ + write_sysreg(0, vttbr_el2); + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); +} + +static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm) +{ + write_sysreg(0, vttbr_el2); +} + +static hyp_alternate_select(__tlb_switch_to_host, + __tlb_switch_to_host_nvhe, + __tlb_switch_to_host_vhe, + ARM64_HAS_VIRT_HOST_EXTN); + void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { dsb(ishst); /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - write_sysreg(kvm->arch.vttbr, vttbr_el2); - isb(); + __tlb_switch_to_guest()(kvm); /* * We could do so much better if we had the VA as well. @@ -46,7 +94,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) dsb(ish); isb(); - write_sysreg(0, vttbr_el2); + __tlb_switch_to_host()(kvm); } void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) @@ -55,14 +103,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - write_sysreg(kvm->arch.vttbr, vttbr_el2); - isb(); + __tlb_switch_to_guest()(kvm); __tlbi(vmalls12e1is); dsb(ish); isb(); - write_sysreg(0, vttbr_el2); + __tlb_switch_to_host()(kvm); } void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) @@ -70,14 +117,13 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); /* Switch to requested VMID */ - write_sysreg(kvm->arch.vttbr, vttbr_el2); - isb(); + __tlb_switch_to_guest()(kvm); __tlbi(vmalle1); dsb(nsh); isb(); - write_sysreg(0, vttbr_el2); + __tlb_switch_to_host()(kvm); } void __hyp_text __kvm_flush_vm_context(void) -- cgit From a69e2fb70350a66f91175cd2625f1e8215c5b6e9 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Fri, 3 Mar 2017 11:58:44 +1100 Subject: powerpc/xics: Work around limitations of OPAL XICS priority handling The CPPR (Current Processor Priority Register) of a XICS interrupt presentation controller contains a value N, such that only interrupts with a priority "more favoured" than N will be received by the CPU, where "more favoured" means "less than". So if the CPPR has the value 5 then only interrupts with a priority of 0-4 inclusive will be received. In theory the CPPR can support a value of 0 to 255 inclusive. In practice Linux only uses values of 0, 4, 5 and 0xff. Setting the CPPR to 0 rejects all interrupts, setting it to 0xff allows all interrupts. The values 4 and 5 are used to differentiate IPIs from external interrupts. Setting the CPPR to 5 allows IPIs to be received but not external interrupts. The CPPR emulation in the OPAL XICS implementation only directly supports priorities 0 and 0xff. All other priorities are considered equivalent, and mapped to a single priority value internally. This means when using icp-opal we can not allow IPIs but not externals. This breaks Linux's use of priority values when a CPU is hot unplugged. After migrating IRQs away from the CPU that is being offlined, we set the priority to 5, meaning we still want the offline CPU to receive IPIs. But the effect of the OPAL XICS emulation's use of a single priority value is that all interrupts are rejected by the CPU. With the CPU offline, and not receiving IPIs, we may not be able to wake it up to bring it back online. The first part of the fix is in icp_opal_set_cpu_priority(). CPPR values of 0 to 4 inclusive will correctly cause all interrupts to be rejected, so we pass those CPPR values through to OPAL. However if we are called with a CPPR of 5 or greater, the caller is expecting to be able to allow IPIs but not external interrupts. We know this doesn't work, so instead of rejecting all interrupts we choose the opposite which is to allow all interrupts. This is still not correct behaviour, but we know for the only existing caller (xics_migrate_irqs_away()), that it is the better option. The other part of the fix is in xics_migrate_irqs_away(). Instead of setting priority (CPPR) to 0, and then back to 5 before migrating IRQs, we migrate the IRQs before setting the priority back to 5. This should have no effect on an ICP backend with a working set_priority(), and on icp-opal it means we will keep all interrupts blocked until after we've finished doing the IRQ migration. Additionally we wait for 5ms after doing the migration to make sure there are no IRQs in flight. Fixes: d74361881f0d ("powerpc/xics: Add ICP OPAL backend") Cc: stable@vger.kernel.org # v4.8+ Suggested-by: Michael Ellerman Reported-by: Vaidyanathan Srinivasan Tested-by: Vaidyanathan Srinivasan Signed-off-by: Balbir Singh [mpe: Rewrote comments and change log, change delay to 5ms] Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/xics/icp-opal.c | 10 ++++++++++ arch/powerpc/sysdev/xics/xics-common.c | 17 ++++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index f9670eabfcfa..b53f80f0b4d8 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -91,6 +91,16 @@ static unsigned int icp_opal_get_irq(void) static void icp_opal_set_cpu_priority(unsigned char cppr) { + /* + * Here be dragons. The caller has asked to allow only IPI's and not + * external interrupts. But OPAL XIVE doesn't support that. So instead + * of allowing no interrupts allow all. That's still not right, but + * currently the only caller who does this is xics_migrate_irqs_away() + * and it works in that case. + */ + if (cppr >= DEFAULT_PRIORITY) + cppr = LOWEST_PRIORITY; + xics_set_base_cppr(cppr); opal_int_set_cppr(cppr); iosync(); diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 69d858e51ac7..23efe4e42172 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -198,9 +199,6 @@ void xics_migrate_irqs_away(void) /* Remove ourselves from the global interrupt queue */ xics_set_cpu_giq(xics_default_distrib_server, 0); - /* Allow IPIs again... */ - icp_ops->set_priority(DEFAULT_PRIORITY); - for_each_irq_desc(virq, desc) { struct irq_chip *chip; long server; @@ -255,6 +253,19 @@ void xics_migrate_irqs_away(void) unlock: raw_spin_unlock_irqrestore(&desc->lock, flags); } + + /* Allow "sufficient" time to drop any inflight IRQ's */ + mdelay(5); + + /* + * Allow IPIs again. This is done at the very end, after migrating all + * interrupts, the expectation is that we'll only get woken up by an IPI + * interrupt beyond this point, but leave externals masked just to be + * safe. If we're using icp-opal this may actually allow all + * interrupts anyway, but that should be OK. + */ + icp_ops->set_priority(DEFAULT_PRIORITY); + } #endif /* CONFIG_HOTPLUG_CPU */ -- cgit From 12cc9fd6b2d8ee307a735b3b9faed0d17b719463 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Tue, 28 Feb 2017 17:03:47 +1100 Subject: powerpc: Parse the command line before calling CAS On POWER9 the hypervisor requires the guest to decide whether it would like to use a hash or radix mmu model at the time it calls ibm,client-architecture-support (CAS) based on what the hypervisor has said it's allowed to do. It is possible to disable radix by passing "disable_radix" on the command line. The next patch will add support for the new CAS format, thus we need to parse the command line before calling CAS so we can correctly select which mmu we would like to use. Signed-off-by: Suraj Jitindar Singh Reviewed-by: Paul Mackerras Acked-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom_init.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index a3944540fe0d..bf3966d12565 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2993,6 +2993,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, */ prom_check_initrd(r3, r4); + /* + * Do early parsing of command line + */ + early_cmdline_parse(); + #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* * On pSeries, inform the firmware about our capabilities @@ -3008,11 +3013,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, if (of_platform != PLATFORM_POWERMAC) copy_and_flush(0, kbase, 0x100, 0); - /* - * Do early parsing of command line - */ - early_cmdline_parse(); - /* * Initialize memory management within prom_init */ -- cgit From 014d02cbf16b3106dc8e93281d2a9c189751ed5e Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Tue, 28 Feb 2017 17:03:48 +1100 Subject: powerpc: Update to new option-vector-5 format for CAS On POWER9 the ibm,client-architecture-support (CAS) negotiation process has been updated to change how the host to guest negotiation is done for the new hash/radix mmu as well as the nest mmu, process tables and guest translation shootdown (GTSE). This is documented in the unreleased PAPR ACR "CAS option vector additions for P9". The host tells the guest which options it supports in ibm,arch-vec-5-platform-support. The guest then chooses a subset of these to request in the CAS call and these are agreed to in the ibm,architecture-vec-5 property of the chosen node. Thus we read ibm,arch-vec-5-platform-support and make our selection before calling CAS. We then parse the ibm,architecture-vec-5 property of the chosen node to check whether we should run as hash or radix. ibm,arch-vec-5-platform-support format: index value pairs: ... index: Option vector 5 byte number val: Some representation of supported values Signed-off-by: Suraj Jitindar Singh Acked-by: Paul Mackerras [mpe: Don't print about unknown options, be consistent with OV5_FEAT] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/prom.h | 18 ++++--- arch/powerpc/kernel/prom_init.c | 110 +++++++++++++++++++++++++++++++++++++++- arch/powerpc/mm/init_64.c | 36 ++++++++++--- 3 files changed, 150 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 4a90634e8322..35c00d7a0cf8 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -160,12 +160,18 @@ struct of_drconf_cell { #define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */ #define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */ #define OV5_XIVE_EXPLOIT 0x1701 /* XIVE exploitation supported */ -#define OV5_MMU_RADIX_300 0x1880 /* ISA v3.00 radix MMU supported */ -#define OV5_MMU_HASH_300 0x1840 /* ISA v3.00 hash MMU supported */ -#define OV5_MMU_SEGM_RADIX 0x1820 /* radix mode (no segmentation) */ -#define OV5_MMU_PROC_TBL 0x1810 /* hcall selects SLB or proc table */ -#define OV5_MMU_SLB 0x1800 /* always use SLB */ -#define OV5_MMU_GTSE 0x1808 /* Guest translation shootdown */ +/* MMU Base Architecture */ +#define OV5_MMU_SUPPORT 0x18C0 /* MMU Mode Support Mask */ +#define OV5_MMU_HASH 0x1800 /* Hash MMU Only */ +#define OV5_MMU_RADIX 0x1840 /* Radix MMU Only */ +#define OV5_MMU_EITHER 0x1880 /* Hash or Radix Supported */ +#define OV5_MMU_DYNAMIC 0x18C0 /* Hash or Radix Can Switch Later */ +#define OV5_NMMU 0x1820 /* Nest MMU Available */ +/* Hash Table Extensions */ +#define OV5_HASH_SEG_TBL 0x1980 /* In Memory Segment Tables Available */ +#define OV5_HASH_GTSE 0x1940 /* Guest Translation Shoot Down Avail */ +/* Radix Table Extensions */ +#define OV5_RADIX_GTSE 0x1A40 /* Guest Translation Shoot Down Avail */ /* Option Vector 6: IBM PAPR hints */ #define OV6_LINUX 0x02 /* Linux is our OS */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index bf3966d12565..1c1b44ec7642 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -168,6 +168,14 @@ static unsigned long __initdata prom_tce_alloc_start; static unsigned long __initdata prom_tce_alloc_end; #endif +static bool __initdata prom_radix_disable; + +struct platform_support { + bool hash_mmu; + bool radix_mmu; + bool radix_gtse; +}; + /* Platforms codes are now obsolete in the kernel. Now only used within this * file and ultimately gone too. Feel free to change them if you need, they * are not shared with anything outside of this file anymore @@ -626,6 +634,12 @@ static void __init early_cmdline_parse(void) prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000); #endif } + + opt = strstr(prom_cmd_line, "disable_radix"); + if (opt) { + prom_debug("Radix disabled from cmdline\n"); + prom_radix_disable = true; + } } #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) @@ -695,6 +709,8 @@ struct option_vector5 { u8 byte22; u8 intarch; u8 mmu; + u8 hash_ext; + u8 radix_ext; } __packed; struct option_vector6 { @@ -850,8 +866,9 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .reserved3 = 0, .subprocessors = 1, .intarch = 0, - .mmu = OV5_FEAT(OV5_MMU_RADIX_300) | OV5_FEAT(OV5_MMU_HASH_300) | - OV5_FEAT(OV5_MMU_PROC_TBL) | OV5_FEAT(OV5_MMU_GTSE), + .mmu = 0, + .hash_ext = 0, + .radix_ext = 0, }, /* option vector 6: IBM PAPR hints */ @@ -990,6 +1007,92 @@ static int __init prom_count_smt_threads(void) } +static void __init prom_parse_mmu_model(u8 val, + struct platform_support *support) +{ + switch (val) { + case OV5_FEAT(OV5_MMU_DYNAMIC): + case OV5_FEAT(OV5_MMU_EITHER): /* Either Available */ + prom_debug("MMU - either supported\n"); + support->radix_mmu = !prom_radix_disable; + support->hash_mmu = true; + break; + case OV5_FEAT(OV5_MMU_RADIX): /* Only Radix */ + prom_debug("MMU - radix only\n"); + if (prom_radix_disable) { + /* + * If we __have__ to do radix, we're better off ignoring + * the command line rather than not booting. + */ + prom_printf("WARNING: Ignoring cmdline option disable_radix\n"); + } + support->radix_mmu = true; + break; + case OV5_FEAT(OV5_MMU_HASH): + prom_debug("MMU - hash only\n"); + support->hash_mmu = true; + break; + default: + prom_debug("Unknown mmu support option: 0x%x\n", val); + break; + } +} + +static void __init prom_parse_platform_support(u8 index, u8 val, + struct platform_support *support) +{ + switch (index) { + case OV5_INDX(OV5_MMU_SUPPORT): /* MMU Model */ + prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support); + break; + case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */ + if (val & OV5_FEAT(OV5_RADIX_GTSE)) { + prom_debug("Radix - GTSE supported\n"); + support->radix_gtse = true; + } + break; + } +} + +static void __init prom_check_platform_support(void) +{ + struct platform_support supported = { + .hash_mmu = false, + .radix_mmu = false, + .radix_gtse = false + }; + int prop_len = prom_getproplen(prom.chosen, + "ibm,arch-vec-5-platform-support"); + if (prop_len > 1) { + int i; + u8 vec[prop_len]; + prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n", + prop_len); + prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", + &vec, sizeof(vec)); + for (i = 0; i < prop_len; i += 2) { + prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2 + , vec[i] + , vec[i + 1]); + prom_parse_platform_support(vec[i], vec[i + 1], + &supported); + } + } + + if (supported.radix_mmu && supported.radix_gtse) { + /* Radix preferred - but we require GTSE for now */ + prom_debug("Asking for radix with GTSE\n"); + ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX); + ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE); + } else if (supported.hash_mmu) { + /* Default to hash mmu (if we can) */ + prom_debug("Asking for hash\n"); + ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_HASH); + } else { + /* We're probably on a legacy hypervisor */ + prom_debug("Assuming legacy hash support\n"); + } +} static void __init prom_send_capabilities(void) { @@ -997,6 +1100,9 @@ static void __init prom_send_capabilities(void) prom_arg_t ret; u32 cores; + /* Check ibm,arch-vec-5-platform-support and fixup vec5 if required */ + prom_check_platform_support(); + root = call_prom("open", 1, 1, ADDR("/")); if (root != 0) { /* We need to tell the FW about the number of cores we support. diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 6aa3b76aa0d6..9be992083d2a 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -356,18 +356,42 @@ static void early_check_vec5(void) unsigned long root, chosen; int size; const u8 *vec5; + u8 mmu_supported; root = of_get_flat_dt_root(); chosen = of_get_flat_dt_subnode_by_name(root, "chosen"); - if (chosen == -FDT_ERR_NOTFOUND) + if (chosen == -FDT_ERR_NOTFOUND) { + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; return; + } vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size); - if (!vec5) + if (!vec5) { + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; return; - if (size <= OV5_INDX(OV5_MMU_RADIX_300) || - !(vec5[OV5_INDX(OV5_MMU_RADIX_300)] & OV5_FEAT(OV5_MMU_RADIX_300))) - /* Hypervisor doesn't support radix */ + } + if (size <= OV5_INDX(OV5_MMU_SUPPORT)) { cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + return; + } + + /* Check for supported configuration */ + mmu_supported = vec5[OV5_INDX(OV5_MMU_SUPPORT)] & + OV5_FEAT(OV5_MMU_SUPPORT); + if (mmu_supported == OV5_FEAT(OV5_MMU_RADIX)) { + /* Hypervisor only supports radix - check enabled && GTSE */ + if (!early_radix_enabled()) { + pr_warn("WARNING: Ignoring cmdline option disable_radix\n"); + } + if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] & + OV5_FEAT(OV5_RADIX_GTSE))) { + pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n"); + } + /* Do radix anyway - the hypervisor said we had to */ + cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; + } else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) { + /* Hypervisor only supports hash - disable radix */ + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + } } void __init mmu_early_init_devtree(void) @@ -383,7 +407,7 @@ void __init mmu_early_init_devtree(void) * even though the ibm,architecture-vec-5 property created by * skiboot doesn't have the necessary bits set. */ - if (early_radix_enabled() && !(mfmsr() & MSR_HV)) + if (!(mfmsr() & MSR_HV)) early_check_vec5(); if (early_radix_enabled()) -- cgit From 6ba422c75facb1b1e0e206c464ee121b8073f7e0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 5 Mar 2017 10:54:34 +1100 Subject: powerpc/64: Avoid panic during boot due to divide by zero in init_cache_info() I see a panic in early boot when building with a recent gcc toolchain. The issue is a divide by zero, which is undefined. Older toolchains let us get away with it: int foo(int a) { return a / 0; } foo: li 9,0 divw 3,3,9 extsw 3,3 blr But newer ones catch it: foo: trap Add a check to avoid the divide by zero. Fixes: e2827fe5c156 ("powerpc/64: Clean up ppc64_caches using a struct per cache") Signed-off-by: Anton Blanchard Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index adf2084f214b..9cfaa8b69b5f 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -408,7 +408,10 @@ static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, info->line_size = lsize; info->block_size = bsize; info->log_block_size = __ilog2(bsize); - info->blocks_per_page = PAGE_SIZE / bsize; + if (bsize) + info->blocks_per_page = PAGE_SIZE / bsize; + else + info->blocks_per_page = 0; if (sets == 0) info->assoc = 0xffff; -- cgit From 3b3e78552d3077ec70d2640e629e07e3ab416a6a Mon Sep 17 00:00:00 2001 From: Matjaz Hegedic Date: Sun, 5 Mar 2017 19:16:44 +0100 Subject: x86/reboot/quirks: Add ASUS EeeBook X205TA/W reboot quirk Without the parameter reboot=a, ASUS EeeBook X205TA/W will hang when it should reboot. This adds the appropriate quirk, thus fixing the problem. Signed-off-by: Matjaz Hegedic Link: http://lkml.kernel.org/r/1488737804-20681-1-git-send-email-matjaz.hegedic@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 01c9cda3e1b7..4194d6f9bb29 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -231,6 +231,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"), }, }, + { /* Handle problems with rebooting on ASUS EeeBook X205TAW */ + .callback = set_acpi_reboot, + .ident = "ASUS EeeBook X205TAW", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"), + }, + }, /* Certec */ { /* Handle problems with rebooting on Certec BPC600 */ -- cgit From f2853308b6409b97799b0beceacd9da43a82fe43 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 6 Mar 2017 10:57:48 +0200 Subject: x86/build/x86_64_defconfig: Enable CONFIG_R8169 Very common PCIe ethernet card. Already enabled in i386_defconfig. Signed-off-by: Andy Shevchenko Cc: Peter Zijlstra Cc: Konstantin Khlebnikov Link: http://lkml.kernel.org/r/20170306085748.85957-1-andriy.shevchenko@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/configs/x86_64_defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 7ef4a099defc..6205d3b81e6d 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -176,6 +176,7 @@ CONFIG_E1000E=y CONFIG_SKY2=y CONFIG_FORCEDETH=y CONFIG_8139TOO=y +CONFIG_R8169=y CONFIG_FDDI=y CONFIG_INPUT_POLLDEV=y # CONFIG_INPUT_MOUSEDEV_PSAUX is not set -- cgit From 9c7a00868c3a77c86ab07a2c51f3bb4897bd8550 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 6 Mar 2017 21:51:32 +1100 Subject: powerpc/64: Fix L1D cache shape vector reporting L1I values It seems we didn't pay quite enough attention when testing the new cache shape vectors, which means we didn't notice the bug where the vector for the L1D was using the L1I values. Fix it, resulting in eg: L1I cache size: 0x8000 32768B 32K L1I line size: 0x80 8-way associative L1D cache size: 0x10000 65536B 64K L1D line size: 0x80 8-way associative Fixes: 98a5f361b862 ("powerpc: Add new cache geometry aux vectors") Cut-and-paste-bug-by: Benjamin Herrenschmidt Badly-reviewed-by: Michael Ellerman Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/elf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 93b9b84568e8..09bde6e34f5d 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -144,8 +144,8 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #define ARCH_DLINFO_CACHE_GEOMETRY \ NEW_AUX_ENT(AT_L1I_CACHESIZE, ppc64_caches.l1i.size); \ NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, get_cache_geometry(l1i)); \ - NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1i.size); \ - NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1i)); \ + NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1d.size); \ + NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1d)); \ NEW_AUX_ENT(AT_L2_CACHESIZE, ppc64_caches.l2.size); \ NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, get_cache_geometry(l2)); \ NEW_AUX_ENT(AT_L3_CACHESIZE, ppc64_caches.l3.size); \ -- cgit From a7d2475af7aedcb9b5c6343989a8bfadbf84429b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 6 Mar 2017 22:53:59 +1100 Subject: powerpc: Sort the selects under CONFIG_PPC We have a big list of selects under CONFIG_PPC, and currently they're completely unsorted. This means people tend to add new selects at the bottom of the list, and so two commits which both add a new select will often conflict. Instead sort it alphabetically. This is nicer in and of itself, but also means two commits that add a new select will have a greater chance of not conflicting. Add a note at the top and bottom asking people to keep it sorted. And while we're here pad out the 'if' expressions to make them stand out. Suggested-by: Stephen Rothwell Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 138 +++++++++++++++++++++++++++------------------------ 1 file changed, 72 insertions(+), 66 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 494091762bd7..97a8bc8a095c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -80,93 +80,99 @@ config ARCH_HAS_DMA_SET_COHERENT_MASK config PPC bool default y - select BUILDTIME_EXTABLE_SORT + # + # Please keep this list sorted alphabetically. + # + select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_DMA_SET_COHERENT_MASK + select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE + select ARCH_HAS_SG_CHAIN + select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF if PPC64 + select ARCH_WANT_IPC_PARSE_VERSION select BINFMT_ELF - select ARCH_HAS_ELF_RANDOMIZE - select OF - select OF_EARLY_FLATTREE - select OF_RESERVED_MEM - select HAVE_FTRACE_MCOUNT_RECORD + select BUILDTIME_EXTABLE_SORT + select CLONE_BACKWARDS + select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN + select EDAC_ATOMIC_SCRUB + select EDAC_SUPPORT + select GENERIC_ATOMIC64 if PPC32 + select GENERIC_CLOCKEVENTS + select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_CMOS_UPDATE + select GENERIC_CPU_AUTOPROBE + select GENERIC_IRQ_SHOW + select GENERIC_IRQ_SHOW_LEVEL + select GENERIC_SMP_IDLE_THREAD + select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER + select GENERIC_TIME_VSYSCALL_OLD + select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HARDENED_USERCOPY + select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_KGDB + select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_TRACEHOOK + select HAVE_CBPF_JIT if !PPC64 + select HAVE_CONTEXT_TRACKING if PPC64 + select HAVE_DEBUG_KMEMLEAK + select HAVE_DEBUG_STACKOVERFLOW + select HAVE_DMA_API_DEBUG select HAVE_DYNAMIC_FTRACE - select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL - select HAVE_FUNCTION_TRACER + select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL + select HAVE_EBPF_JIT if PPC64 + select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) + select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS - select SYSCTL_EXCEPTION_TRACE - select VIRT_TO_BUS if !PPC64 + select HAVE_GENERIC_RCU_GUP + select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) select HAVE_IDE select HAVE_IOREMAP_PROT - select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) + select HAVE_IRQ_EXIT_ON_IRQ_STACK + select HAVE_KERNEL_GZIP select HAVE_KPROBES - select HAVE_OPTPROBES if PPC64 - select HAVE_ARCH_KGDB select HAVE_KRETPROBES - select HAVE_ARCH_TRACEHOOK + select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP - select HAVE_DMA_API_DEBUG + select HAVE_MOD_ARCH_SPECIFIC + select HAVE_NMI if PERF_EVENTS select HAVE_OPROFILE - select HAVE_DEBUG_KMEMLEAK - select ARCH_HAS_SG_CHAIN - select GENERIC_ATOMIC64 if PPC32 + select HAVE_OPTPROBES if PPC64 select HAVE_PERF_EVENTS + select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_RCU_TABLE_FREE if SMP select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) - select ARCH_WANT_IPC_PARSE_VERSION - select SPARSE_IRQ + select HAVE_SYSCALL_TRACEPOINTS + select HAVE_VIRT_CPU_ACCOUNTING select IRQ_DOMAIN - select GENERIC_IRQ_SHOW - select GENERIC_IRQ_SHOW_LEVEL select IRQ_FORCED_THREADING - select HAVE_RCU_TABLE_FREE if SMP - select HAVE_SYSCALL_TRACEPOINTS - select HAVE_CBPF_JIT if !PPC64 - select HAVE_EBPF_JIT if PPC64 - select HAVE_ARCH_JUMP_LABEL - select ARCH_HAVE_NMI_SAFE_CMPXCHG - select ARCH_HAS_GCOV_PROFILE_ALL - select GENERIC_SMP_IDLE_THREAD - select GENERIC_CMOS_UPDATE - select GENERIC_TIME_VSYSCALL_OLD - select GENERIC_CLOCKEVENTS - select GENERIC_CLOCKEVENTS_BROADCAST if SMP - select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST - select GENERIC_STRNCPY_FROM_USER - select GENERIC_STRNLEN_USER - select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA - select CLONE_BACKWARDS - select ARCH_USE_BUILTIN_BSWAP - select OLD_SIGSUSPEND - select OLD_SIGACTION if PPC32 - select HAVE_DEBUG_STACKOVERFLOW - select HAVE_IRQ_EXIT_ON_IRQ_STACK - select ARCH_USE_CMPXCHG_LOCKREF if PPC64 - select HAVE_ARCH_AUDITSYSCALL - select ARCH_SUPPORTS_ATOMIC_RMW - select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN select NO_BOOTMEM - select HAVE_GENERIC_RCU_GUP - select HAVE_PERF_EVENTS_NMI if PPC64 - select HAVE_NMI if PERF_EVENTS - select EDAC_SUPPORT - select EDAC_ATOMIC_SCRUB - select ARCH_HAS_DMA_SET_COHERENT_MASK - select ARCH_HAS_DEVMEM_IS_ALLOWED - select HAVE_ARCH_SECCOMP_FILTER - select ARCH_HAS_UBSAN_SANITIZE_ALL - select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT - select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS - select GENERIC_CPU_AUTOPROBE - select HAVE_VIRT_CPU_ACCOUNTING - select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE - select HAVE_ARCH_HARDENED_USERCOPY - select HAVE_KERNEL_GZIP - select HAVE_CONTEXT_TRACKING if PPC64 + select OF + select OF_EARLY_FLATTREE + select OF_RESERVED_MEM + select OLD_SIGACTION if PPC32 + select OLD_SIGSUSPEND + select SPARSE_IRQ + select SYSCTL_EXCEPTION_TRACE + select VIRT_TO_BUS if !PPC64 + # + # Please keep this list sorted alphabetically. + # config GENERIC_CSUM def_bool n -- cgit From 587d7e72aedca91cee80c0a56811649c3efab765 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 2 Mar 2017 12:41:48 -0800 Subject: kvm: nVMX: VMCLEAR should not cause the vCPU to shut down MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VMCLEAR should silently ignore a failure to clear the launch state of the VMCS referenced by the operand. Signed-off-by: Jim Mattson [Changed "kvm_write_guest(vcpu->kvm" to "kvm_vcpu_write_guest(vcpu".] Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 283aa8601833..3b626d6dc3ac 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7258,9 +7258,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu) static int handle_vmclear(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 zero = 0; gpa_t vmptr; - struct vmcs12 *vmcs12; - struct page *page; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -7271,22 +7270,9 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) if (vmptr == vmx->nested.current_vmptr) nested_release_vmcs12(vmx); - page = nested_get_page(vcpu, vmptr); - if (page == NULL) { - /* - * For accurate processor emulation, VMCLEAR beyond available - * physical memory should do nothing at all. However, it is - * possible that a nested vmx bug, not a guest hypervisor bug, - * resulted in this case, so let's shut down before doing any - * more damage: - */ - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); - return 1; - } - vmcs12 = kmap(page); - vmcs12->launch_state = 0; - kunmap(page); - nested_release_page(page); + kvm_vcpu_write_guest(vcpu, + vmptr + offsetof(struct vmcs12, launch_state), + &zero, sizeof(zero)); nested_free_vmcs02(vmx, vmptr); -- cgit From 1fbdbcea80056acfc8c8506594744f5ec52208c1 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 5 Mar 2017 17:05:57 -0800 Subject: avr32: Fix build error caused by include file reshuffling Various avr32 builds fail: arch/avr32/oprofile/backtrace.c:58: error: dereferencing pointer to incomplete type arch/avr32/oprofile/backtrace.c:60: error: implicit declaration of function 'user_mode' Signed-off-by: Guenter Roeck Acked-by: Hans-Christian Noren Egtvedt Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Robert Richter Cc: Thomas Gleixner Cc: oprofile-list@lists.sf.net Fixes: f780d89a0e82 ("sched/headers: Remove from ...") Link: http://lkml.kernel.org/r/1488762357-4500-1-git-send-email-linux@roeck-us.net Signed-off-by: Ingo Molnar --- arch/avr32/oprofile/backtrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/avr32/oprofile/backtrace.c b/arch/avr32/oprofile/backtrace.c index 75d9ad6f99cf..29cf2f191bfd 100644 --- a/arch/avr32/oprofile/backtrace.c +++ b/arch/avr32/oprofile/backtrace.c @@ -14,7 +14,7 @@ */ #include -#include +#include #include /* The first two words of each frame on the stack look like this if we have -- cgit From 80aa1a54f054a1c71cc88e0cad6cfa0d20a10f23 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 5 Mar 2017 10:27:14 -0800 Subject: h8300: Fix build breakage caused by header file changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following h8300 build failures: arch/h8300/kernel/ptrace_h.c: In function ‘trace_trap’: arch/h8300/kernel/ptrace_h.c:253:3: error: implicit declaration of function ‘force_sig’ Signed-off-by: Guenter Roeck Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yoshinori Sato Cc: uclinux-h8-devel@lists.sourceforge.jp Fixes: c3edc4010e9d ("sched/headers: Move task_struct::signal and ...") Link: http://lkml.kernel.org/r/1488738434-3504-1-git-send-email-linux@roeck-us.net Signed-off-by: Ingo Molnar --- arch/h8300/kernel/ptrace_h.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/h8300/kernel/ptrace_h.c b/arch/h8300/kernel/ptrace_h.c index fe3b5673baba..f5ff3b794c85 100644 --- a/arch/h8300/kernel/ptrace_h.c +++ b/arch/h8300/kernel/ptrace_h.c @@ -9,7 +9,7 @@ */ #include -#include +#include #include #define BREAKINST 0x5730 /* trapa #3 */ -- cgit From 90922a2d03d84de36bf8a9979d62580102f31a92 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Tue, 7 Mar 2017 08:20:38 -0600 Subject: irqchip/gicv3-its: Add workaround for QDF2400 ITS erratum 0065 On Qualcomm Datacenter Technologies QDF2400 SoCs, the ITS hardware implementation uses 16Bytes for Interrupt Translation Entry (ITE), but reports an incorrect value of 8Bytes in GITS_TYPER.ITTE_size. It might cause kernel memory corruption depending on the number of MSI(x) that are configured and the amount of memory that has been allocated for ITEs in its_create_device(). This patch fixes the potential memory corruption by setting the correct ITE size to 16Bytes. Cc: stable@vger.kernel.org Signed-off-by: Shanker Donthineni Signed-off-by: Marc Zyngier --- arch/arm64/Kconfig | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a39029b5414e..8c7c244247b6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -508,6 +508,16 @@ config QCOM_FALKOR_ERRATUM_1009 If unsure, say Y. +config QCOM_QDF2400_ERRATUM_0065 + bool "QDF2400 E0065: Incorrect GITS_TYPER.ITT_Entry_size" + default y + help + On Qualcomm Datacenter Technologies QDF2400 SoC, ITS hardware reports + ITE size incorrectly. The GITS_TYPER.ITT_Entry_size field should have + been indicated as 16Bytes (0xf), not 8Bytes (0x7). + + If unsure, say Y. + endmenu -- cgit From 2f707d97982286b307ef2a9b034e19aabc1abb56 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 6 Mar 2017 04:03:28 -0800 Subject: KVM: nVMX: reset nested_run_pending if the vCPU is going to be reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: WARNING: CPU: 1 PID: 27742 at arch/x86/kvm/vmx.c:11029 nested_vmx_vmexit+0x5c35/0x74d0 arch/x86/kvm/vmx.c:11029 CPU: 1 PID: 27742 Comm: a.out Not tainted 4.10.0+ #229 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 panic+0x1fb/0x412 kernel/panic.c:179 __warn+0x1c4/0x1e0 kernel/panic.c:540 warn_slowpath_null+0x2c/0x40 kernel/panic.c:583 nested_vmx_vmexit+0x5c35/0x74d0 arch/x86/kvm/vmx.c:11029 vmx_leave_nested arch/x86/kvm/vmx.c:11136 [inline] vmx_set_msr+0x1565/0x1910 arch/x86/kvm/vmx.c:3324 kvm_set_msr+0xd4/0x170 arch/x86/kvm/x86.c:1099 do_set_msr+0x11e/0x190 arch/x86/kvm/x86.c:1128 __msr_io arch/x86/kvm/x86.c:2577 [inline] msr_io+0x24b/0x450 arch/x86/kvm/x86.c:2614 kvm_arch_vcpu_ioctl+0x35b/0x46a0 arch/x86/kvm/x86.c:3497 kvm_vcpu_ioctl+0x232/0x1120 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2721 vfs_ioctl fs/ioctl.c:43 [inline] do_vfs_ioctl+0x1bf/0x1790 fs/ioctl.c:683 SYSC_ioctl fs/ioctl.c:698 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:689 entry_SYSCALL_64_fastpath+0x1f/0xc2 The syzkaller folks reported a nested_run_pending warning during userspace clear VMX capability which is exposed to L1 before. The warning gets thrown while doing (*(uint32_t*)0x20aecfe8 = (uint32_t)0x1); (*(uint32_t*)0x20aecfec = (uint32_t)0x0); (*(uint32_t*)0x20aecff0 = (uint32_t)0x3a); (*(uint32_t*)0x20aecff4 = (uint32_t)0x0); (*(uint64_t*)0x20aecff8 = (uint64_t)0x0); r[29] = syscall(__NR_ioctl, r[4], 0x4008ae89ul, 0x20aecfe8ul, 0, 0, 0, 0, 0, 0); i.e. KVM_SET_MSR ioctl with struct kvm_msrs { .nmsrs = 1, .pad = 0, .entries = { {.index = MSR_IA32_FEATURE_CONTROL, .reserved = 0, .data = 0} } } The VMLANCH/VMRESUME emulation should be stopped since the CPU is going to reset here. This patch resets the nested_run_pending since the CPU is going to be reset hence there should be nothing pending. Reported-by: Dmitry Vyukov Suggested-by: Radim Krčmář Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Dmitry Vyukov Cc: David Hildenbrand Signed-off-by: Wanpeng Li Reviewed-by: David Hildenbrand Reviewed-by: Jim Mattson Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3b626d6dc3ac..ab338581b3ec 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11107,8 +11107,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, */ static void vmx_leave_nested(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu)) + if (is_guest_mode(vcpu)) { + to_vmx(vcpu)->nested.nested_run_pending = 0; nested_vmx_vmexit(vcpu, -1, 0, 0); + } free_nested(to_vmx(vcpu)); } -- cgit From f050fe7a9164945dd1c28be05bf00e8cfb082ccf Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 20 Feb 2017 12:30:11 +0000 Subject: arm: KVM: Survive unknown traps from guests Currently we BUG() if we see a HSR.EC value we don't recognise. As configurable disables/enables are added to the architecture (controlled by RES1/RES0 bits respectively), with associated synchronous exceptions, it may be possible for a guest to trigger exceptions with classes that we don't recognise. While we can't service these exceptions in a manner useful to the guest, we can avoid bringing down the host. Per ARM DDI 0406C.c, all currently unallocated HSR EC encodings are reserved, and per ARM DDI 0487A.k_iss10775, page G6-4395, EC values within the range 0x00 - 0x2c are reserved for future use with synchronous exceptions, and EC values within the range 0x2d - 0x3f may be used for either synchronous or asynchronous exceptions. The patch makes KVM handle any unknown EC by injecting an UNDEFINED exception into the guest, with a corresponding (ratelimited) warning in the host dmesg. We could later improve on this with with a new (opt-in) exit to the host userspace. Cc: Dave Martin Cc: Suzuki K Poulose Reviewed-by: Christoffer Dall Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_arm.h | 1 + arch/arm/kvm/handle_exit.c | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index e22089fb44dc..a3f0b3d50089 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -209,6 +209,7 @@ #define HSR_EC_IABT_HYP (0x21) #define HSR_EC_DABT (0x24) #define HSR_EC_DABT_HYP (0x25) +#define HSR_EC_MAX (0x3f) #define HSR_WFI_IS_WFE (_AC(1, UL) << 0) diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 4e40d1955e35..96af65a30d78 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -79,7 +79,19 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } +static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + u32 hsr = kvm_vcpu_get_hsr(vcpu); + + kvm_pr_unimpl("Unknown exception class: hsr: %#08x\n", + hsr); + + kvm_inject_undefined(vcpu); + return 1; +} + static exit_handle_fn arm_exit_handlers[] = { + [0 ... HSR_EC_MAX] = kvm_handle_unknown_ec, [HSR_EC_WFI] = kvm_handle_wfx, [HSR_EC_CP15_32] = kvm_handle_cp15_32, [HSR_EC_CP15_64] = kvm_handle_cp15_64, @@ -98,13 +110,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) { u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); - if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || - !arm_exit_handlers[hsr_ec]) { - kvm_err("Unknown exception class: hsr: %#08x\n", - (unsigned int)kvm_vcpu_get_hsr(vcpu)); - BUG(); - } - return arm_exit_handlers[hsr_ec]; } -- cgit From ba4dd156eabdca93501d92a980ba27fa5f4bbd27 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 20 Feb 2017 12:30:12 +0000 Subject: arm64: KVM: Survive unknown traps from guests Currently we BUG() if we see an ESR_EL2.EC value we don't recognise. As configurable disables/enables are added to the architecture (controlled by RES1/RES0 bits respectively), with associated synchronous exceptions, it may be possible for a guest to trigger exceptions with classes that we don't recognise. While we can't service these exceptions in a manner useful to the guest, we can avoid bringing down the host. Per ARM DDI 0487A.k_iss10775, page D7-1937, EC values within the range 0x00 - 0x2c are reserved for future use with synchronous exceptions, and EC values within the range 0x2d - 0x3f may be used for either synchronous or asynchronous exceptions. The patch makes KVM handle any unknown EC by injecting an UNDEFINED exception into the guest, with a corresponding (ratelimited) warning in the host dmesg. We could later improve on this with with a new (opt-in) exit to the host userspace. Cc: Dave Martin Cc: Suzuki K Poulose Reviewed-by: Christoffer Dall Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/kvm/handle_exit.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 1bfe30dfbfe7..fa1b18e364fc 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -135,7 +135,19 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) return ret; } +static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + u32 hsr = kvm_vcpu_get_hsr(vcpu); + + kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n", + hsr, esr_get_class_string(hsr)); + + kvm_inject_undefined(vcpu); + return 1; +} + static exit_handle_fn arm_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec, [ESR_ELx_EC_WFx] = kvm_handle_wfx, [ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32, [ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64, @@ -162,13 +174,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) u32 hsr = kvm_vcpu_get_hsr(vcpu); u8 hsr_ec = ESR_ELx_EC(hsr); - if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || - !arm_exit_handlers[hsr_ec]) { - kvm_err("Unknown exception class: hsr: %#08x -- %s\n", - hsr, esr_get_class_string(hsr)); - BUG(); - } - return arm_exit_handlers[hsr_ec]; } -- cgit From fc69910f329d61821897871e0e957eda39beb3d8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 8 Mar 2017 08:29:31 +0100 Subject: MIPS: Add missing include files After the split of linux/sched.h, several platforms in arch/mips stopped building. Add the respective additional #include statements to fix the problem I first tried adding these into asm/processor.h, but ran into circular header dependencies with that which I could not figure out. The commit I listed as causing the problem is the branch merge, as there is likely a combination of multiple patches in that branch. Signed-off-by: Arnd Bergmann Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Cc: ralf@linux-mips.org Fixes: 1827adb11ad2 ("Merge branch 'WIP.sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip") Link: http://lkml.kernel.org/r/20170308072931.3836696-1-arnd@arndb.de Signed-off-by: Ingo Molnar --- arch/mips/cavium-octeon/cpu.c | 2 ++ arch/mips/cavium-octeon/crypto/octeon-crypto.c | 1 + arch/mips/cavium-octeon/smp.c | 1 + arch/mips/include/asm/fpu.h | 1 + arch/mips/kernel/smp-bmips.c | 1 + arch/mips/kernel/smp-mt.c | 1 + arch/mips/loongson64/loongson-3/cop2-ex.c | 1 + arch/mips/netlogic/common/smp.c | 1 + arch/mips/netlogic/xlp/cop2-ex.c | 3 +++ arch/mips/sgi-ip22/ip28-berr.c | 1 + arch/mips/sgi-ip27/ip27-berr.c | 2 ++ arch/mips/sgi-ip27/ip27-smp.c | 3 +++ arch/mips/sgi-ip32/ip32-berr.c | 1 + arch/mips/sgi-ip32/ip32-reset.c | 1 + 14 files changed, 20 insertions(+) (limited to 'arch') diff --git a/arch/mips/cavium-octeon/cpu.c b/arch/mips/cavium-octeon/cpu.c index a5b427909b5c..036d56cc4591 100644 --- a/arch/mips/cavium-octeon/cpu.c +++ b/arch/mips/cavium-octeon/cpu.c @@ -10,7 +10,9 @@ #include #include #include +#include #include +#include #include #include diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.c b/arch/mips/cavium-octeon/crypto/octeon-crypto.c index 4d22365844af..cfb4a146cf17 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-crypto.c +++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "octeon-crypto.h" diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 4b94b7fbafa3..3de786545ded 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index 321752bcbab6..f94455f964ec 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -12,6 +12,7 @@ #include #include +#include #include #include diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 3daa2cae50b0..1b070a76fcdd 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c index e077ea3e11fb..e398cbc3d776 100644 --- a/arch/mips/kernel/smp-mt.c +++ b/arch/mips/kernel/smp-mt.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/loongson64/loongson-3/cop2-ex.c b/arch/mips/loongson64/loongson-3/cop2-ex.c index ea13764d0a03..621d6af5f6eb 100644 --- a/arch/mips/loongson64/loongson-3/cop2-ex.c +++ b/arch/mips/loongson64/loongson-3/cop2-ex.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/netlogic/common/smp.c b/arch/mips/netlogic/common/smp.c index 10d86d54880a..bddf1ef553a4 100644 --- a/arch/mips/netlogic/common/smp.c +++ b/arch/mips/netlogic/common/smp.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/netlogic/xlp/cop2-ex.c b/arch/mips/netlogic/xlp/cop2-ex.c index 52bc5de42005..21e439b3db70 100644 --- a/arch/mips/netlogic/xlp/cop2-ex.c +++ b/arch/mips/netlogic/xlp/cop2-ex.c @@ -9,11 +9,14 @@ * Copyright (C) 2009 Wind River Systems, * written by Ralf Baechle */ +#include #include #include #include #include +#include #include +#include #include #include diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c index 1f2a5bc4779e..75460e1e106b 100644 --- a/arch/mips/sgi-ip22/ip28-berr.c +++ b/arch/mips/sgi-ip22/ip28-berr.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c index d12879eb2b1f..83efe03d5c60 100644 --- a/arch/mips/sgi-ip27/ip27-berr.c +++ b/arch/mips/sgi-ip27/ip27-berr.c @@ -12,7 +12,9 @@ #include /* for SIGBUS */ #include /* schow_regs(), force_sig() */ #include +#include +#include #include #include #include diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c index f5ed45e8f442..4cd47d23d81a 100644 --- a/arch/mips/sgi-ip27/ip27-smp.c +++ b/arch/mips/sgi-ip27/ip27-smp.c @@ -8,10 +8,13 @@ */ #include #include +#include #include #include + #include #include +#include #include #include #include diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c index 57d8c7486fe6..c1f12a9cf305 100644 --- a/arch/mips/sgi-ip32/ip32-berr.c +++ b/arch/mips/sgi-ip32/ip32-berr.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c index 8bd415c8729f..b3b442def423 100644 --- a/arch/mips/sgi-ip32/ip32-reset.c +++ b/arch/mips/sgi-ip32/ip32-reset.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include -- cgit From 672a2c87c83649fb0167202342ce85af9a3b4f1c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 8 Mar 2017 14:56:05 +0100 Subject: axonram: Fix gendisk handling It is invalid to call del_gendisk() when disk->queue is NULL. Fix error handling in axon_ram_probe() to avoid doing that. Also del_gendisk() does not drop a reference to gendisk allocated by alloc_disk(). That has to be done by put_disk(). Add that call where needed. Reported-by: Dan Carpenter Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- arch/powerpc/sysdev/axonram.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index ada29eaed6e2..f523ac883150 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -274,7 +274,9 @@ failed: if (bank->disk->major > 0) unregister_blkdev(bank->disk->major, bank->disk->disk_name); - del_gendisk(bank->disk); + if (bank->disk->flags & GENHD_FL_UP) + del_gendisk(bank->disk); + put_disk(bank->disk); } device->dev.platform_data = NULL; if (bank->io_addr != 0) @@ -299,6 +301,7 @@ axon_ram_remove(struct platform_device *device) device_remove_file(&device->dev, &dev_attr_ecc); free_irq(bank->irq_id, device); del_gendisk(bank->disk); + put_disk(bank->disk); iounmap((void __iomem *) bank->io_addr); kfree(bank); -- cgit From 7af4df85796589e60a2dfc0f821eca0c4bbce4d2 Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Wed, 8 Mar 2017 11:38:33 +0530 Subject: KVM: arm/arm64: Enable KVM_CAP_NR_MEMSLOTS on arm/arm64 Return KVM_USER_MEM_SLOTS for userspace capability query on NR_MEMSLOTS. Reviewed-by: Christoffer Dall Signed-off-by: Linu Cherian Signed-off-by: Marc Zyngier --- arch/arm/kvm/arm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index c9a2103faeb9..96dba7cd8be7 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -221,6 +221,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; + case KVM_CAP_NR_MEMSLOTS: + r = KVM_USER_MEM_SLOTS; + break; case KVM_CAP_MSI_DEVID: if (!kvm) r = -EINVAL; -- cgit From 3e92f94a3b8e925a6dd7ec88a5794b2084b5fb65 Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Wed, 8 Mar 2017 11:38:34 +0530 Subject: KVM: arm/arm64: Remove KVM_PRIVATE_MEM_SLOTS definition that are unused arm/arm64 architecture doesnt use private memslots, hence removing KVM_PRIVATE_MEM_SLOTS macro definition. Reviewed-by: Christoffer Dall Signed-off-by: Linu Cherian Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_host.h | 1 - arch/arm64/include/asm/kvm_host.h | 1 - 2 files changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index cc495d799c67..31ee468ce667 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -30,7 +30,6 @@ #define __KVM_HAVE_ARCH_INTC_INITIALIZED #define KVM_USER_MEM_SLOTS 32 -#define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_HAVE_ONE_REG #define KVM_HALT_POLL_NS_DEFAULT 500000 diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f21fd3894370..6ac17ee887c9 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -31,7 +31,6 @@ #define __KVM_HAVE_ARCH_INTC_INITIALIZED #define KVM_USER_MEM_SLOTS 32 -#define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_HALT_POLL_NS_DEFAULT 500000 -- cgit From 955a3fc6d2a1c11d6d00bce4f3816100ce0530cf Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Wed, 8 Mar 2017 11:38:35 +0530 Subject: KVM: arm64: Increase number of user memslots to 512 Having only 32 memslots is a real constraint for the maximum number of PCI devices that can be assigned to a single guest. Assuming each PCI device/virtual function having two memory BAR regions, we could assign only 15 devices/virtual functions to a guest. Hence increase KVM_USER_MEM_SLOTS to 512 as done in other archs like powerpc. Reviewed-by: Christoffer Dall Signed-off-by: Linu Cherian Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 6ac17ee887c9..e7705e7bb07b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -30,7 +30,7 @@ #define __KVM_HAVE_ARCH_INTC_INITIALIZED -#define KVM_USER_MEM_SLOTS 32 +#define KVM_USER_MEM_SLOTS 512 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_HALT_POLL_NS_DEFAULT 500000 -- cgit From 05d8d34611139f8435af90ac54b65eb31e82e388 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Tue, 7 Mar 2017 17:51:49 +0100 Subject: KVM: nVMX: do not warn when MSR bitmap address is not backed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before trying to do nested_get_page() in nested_vmx_merge_msr_bitmap(), we have already checked that the MSR bitmap address is valid (4k aligned and within physical limits). SDM doesn't specify what happens if the there is no memory mapped at the valid address, but Intel CPUs treat the situation as if the bitmap was configured to trap all MSRs. KVM already does that by returning false and a correct handling doesn't need the guest-trigerrable warning that was reported by syzkaller: (The warning was originally there to catch some possible bugs in nVMX.) ------------[ cut here ]------------ WARNING: CPU: 0 PID: 7832 at arch/x86/kvm/vmx.c:9709 nested_vmx_merge_msr_bitmap arch/x86/kvm/vmx.c:9709 [inline] WARNING: CPU: 0 PID: 7832 at arch/x86/kvm/vmx.c:9709 nested_get_vmcs12_pages+0xfb6/0x15c0 arch/x86/kvm/vmx.c:9640 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 7832 Comm: syz-executor1 Not tainted 4.10.0+ #229 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 panic+0x1fb/0x412 kernel/panic.c:179 __warn+0x1c4/0x1e0 kernel/panic.c:540 warn_slowpath_null+0x2c/0x40 kernel/panic.c:583 nested_vmx_merge_msr_bitmap arch/x86/kvm/vmx.c:9709 [inline] nested_get_vmcs12_pages+0xfb6/0x15c0 arch/x86/kvm/vmx.c:9640 enter_vmx_non_root_mode arch/x86/kvm/vmx.c:10471 [inline] nested_vmx_run+0x6186/0xaab0 arch/x86/kvm/vmx.c:10561 handle_vmlaunch+0x1a/0x20 arch/x86/kvm/vmx.c:7312 vmx_handle_exit+0xfc0/0x3f00 arch/x86/kvm/vmx.c:8526 vcpu_enter_guest arch/x86/kvm/x86.c:6982 [inline] vcpu_run arch/x86/kvm/x86.c:7044 [inline] kvm_arch_vcpu_ioctl_run+0x1418/0x4840 arch/x86/kvm/x86.c:7205 kvm_vcpu_ioctl+0x673/0x1120 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2570 Reported-by: Dmitry Vyukov Reviewed-by: Jim Mattson [Jim Mattson explained the bare metal behavior: "I believe this behavior would be documented in the chipset data sheet rather than the SDM, since the chipset returns all 1s for an unclaimed read."] Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ab338581b3ec..98e82ee1e699 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9680,10 +9680,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, return false; page = nested_get_page(vcpu, vmcs12->msr_bitmap); - if (!page) { - WARN_ON(1); + if (!page) return false; - } msr_bitmap_l1 = (unsigned long *)kmap(page); memset(msr_bitmap_l0, 0xff, PAGE_SIZE); -- cgit From 6fb895692a034393d58679cd8d00c9e229719a5f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 9 Mar 2017 17:24:02 +0300 Subject: x86/cpufeature: Add 5-level paging detection Look for 'la57' in /proc/cpuinfo to see if your machine supports 5-level paging. Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- arch/x86/include/asm/cpufeatures.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 4e7772387c6e..b04bb6dfed7f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -289,7 +289,8 @@ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ -#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */ +#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ +#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ -- cgit From 9849a5697d3defb2087cb6b9be5573a142697889 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 9 Mar 2017 17:24:05 +0300 Subject: arch, mm: convert all architectures to use 5level-fixup.h If an architecture uses 4level-fixup.h we don't need to do anything as it includes 5level-fixup.h. If an architecture uses pgtable-nop*d.h, define __ARCH_USE_5LEVEL_HACK before inclusion of the header. It makes asm-generic code to use 5level-fixup.h. If an architecture has 4-level paging or folds levels on its own, include 5level-fixup.h directly. Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- arch/arc/include/asm/hugepage.h | 1 + arch/arc/include/asm/pgtable.h | 1 + arch/arm/include/asm/pgtable.h | 1 + arch/arm64/include/asm/pgtable-types.h | 4 ++++ arch/avr32/include/asm/pgtable-2level.h | 1 + arch/cris/include/asm/pgtable.h | 1 + arch/frv/include/asm/pgtable.h | 1 + arch/h8300/include/asm/pgtable.h | 1 + arch/hexagon/include/asm/pgtable.h | 1 + arch/ia64/include/asm/pgtable.h | 2 ++ arch/metag/include/asm/pgtable.h | 1 + arch/microblaze/include/asm/page.h | 3 ++- arch/mips/include/asm/pgtable-32.h | 1 + arch/mips/include/asm/pgtable-64.h | 1 + arch/mn10300/include/asm/page.h | 1 + arch/nios2/include/asm/pgtable.h | 1 + arch/openrisc/include/asm/pgtable.h | 1 + arch/powerpc/include/asm/book3s/32/pgtable.h | 1 + arch/powerpc/include/asm/book3s/64/pgtable.h | 3 +++ arch/powerpc/include/asm/nohash/32/pgtable.h | 1 + arch/powerpc/include/asm/nohash/64/pgtable-4k.h | 3 +++ arch/powerpc/include/asm/nohash/64/pgtable-64k.h | 1 + arch/s390/include/asm/pgtable.h | 1 + arch/score/include/asm/pgtable.h | 1 + arch/sh/include/asm/pgtable-2level.h | 1 + arch/sh/include/asm/pgtable-3level.h | 1 + arch/sparc/include/asm/pgtable_64.h | 1 + arch/tile/include/asm/pgtable_32.h | 1 + arch/tile/include/asm/pgtable_64.h | 1 + arch/um/include/asm/pgtable-2level.h | 1 + arch/um/include/asm/pgtable-3level.h | 1 + arch/unicore32/include/asm/pgtable.h | 1 + arch/x86/include/asm/pgtable_types.h | 4 ++++ arch/xtensa/include/asm/pgtable.h | 1 + 34 files changed, 46 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h index 317ff773e1ca..b18fcb606908 100644 --- a/arch/arc/include/asm/hugepage.h +++ b/arch/arc/include/asm/hugepage.h @@ -11,6 +11,7 @@ #define _ASM_ARC_HUGEPAGE_H #include +#define __ARCH_USE_5LEVEL_HACK #include static inline pte_t pmd_pte(pmd_t pmd) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index e94ca72b974e..ee22d40afef4 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -37,6 +37,7 @@ #include #include +#define __ARCH_USE_5LEVEL_HACK #include #include diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index a8d656d9aec7..1c462381c225 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -20,6 +20,7 @@ #else +#define __ARCH_USE_5LEVEL_HACK #include #include #include diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h index 69b2fd41503c..345a072b5856 100644 --- a/arch/arm64/include/asm/pgtable-types.h +++ b/arch/arm64/include/asm/pgtable-types.h @@ -55,9 +55,13 @@ typedef struct { pteval_t pgprot; } pgprot_t; #define __pgprot(x) ((pgprot_t) { (x) } ) #if CONFIG_PGTABLE_LEVELS == 2 +#define __ARCH_USE_5LEVEL_HACK #include #elif CONFIG_PGTABLE_LEVELS == 3 +#define __ARCH_USE_5LEVEL_HACK #include +#elif CONFIG_PGTABLE_LEVELS == 4 +#include #endif #endif /* __ASM_PGTABLE_TYPES_H */ diff --git a/arch/avr32/include/asm/pgtable-2level.h b/arch/avr32/include/asm/pgtable-2level.h index 425dd567b5b9..d5b1c63993ec 100644 --- a/arch/avr32/include/asm/pgtable-2level.h +++ b/arch/avr32/include/asm/pgtable-2level.h @@ -8,6 +8,7 @@ #ifndef __ASM_AVR32_PGTABLE_2LEVEL_H #define __ASM_AVR32_PGTABLE_2LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include /* diff --git a/arch/cris/include/asm/pgtable.h b/arch/cris/include/asm/pgtable.h index 2a3210ba4c72..fa3a73004cc5 100644 --- a/arch/cris/include/asm/pgtable.h +++ b/arch/cris/include/asm/pgtable.h @@ -6,6 +6,7 @@ #define _CRIS_PGTABLE_H #include +#define __ARCH_USE_5LEVEL_HACK #include #ifndef __ASSEMBLY__ diff --git a/arch/frv/include/asm/pgtable.h b/arch/frv/include/asm/pgtable.h index a0513d463a1f..ab6e7e961b54 100644 --- a/arch/frv/include/asm/pgtable.h +++ b/arch/frv/include/asm/pgtable.h @@ -16,6 +16,7 @@ #ifndef _ASM_PGTABLE_H #define _ASM_PGTABLE_H +#include #include #include #include diff --git a/arch/h8300/include/asm/pgtable.h b/arch/h8300/include/asm/pgtable.h index 8341db67821d..7d265d28ba5e 100644 --- a/arch/h8300/include/asm/pgtable.h +++ b/arch/h8300/include/asm/pgtable.h @@ -1,5 +1,6 @@ #ifndef _H8300_PGTABLE_H #define _H8300_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include #include #define pgtable_cache_init() do { } while (0) diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index 49eab8136ec3..24a9177fb897 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -26,6 +26,7 @@ */ #include #include +#define __ARCH_USE_5LEVEL_HACK #include /* A handy thing to have if one has the RAM. Declared in head.S */ diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 384794e665fc..6cc22c8d8923 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -587,8 +587,10 @@ extern struct page *zero_page_memmap_ptr; #if CONFIG_PGTABLE_LEVELS == 3 +#define __ARCH_USE_5LEVEL_HACK #include #endif +#include #include #endif /* _ASM_IA64_PGTABLE_H */ diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h index ffa3a3a2ecad..0c151e5af079 100644 --- a/arch/metag/include/asm/pgtable.h +++ b/arch/metag/include/asm/pgtable.h @@ -6,6 +6,7 @@ #define _METAG_PGTABLE_H #include +#define __ARCH_USE_5LEVEL_HACK #include /* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */ diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index fd850879854d..d506bb0893f9 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -95,7 +95,8 @@ typedef struct { unsigned long pgd; } pgd_t; # else /* CONFIG_MMU */ typedef struct { unsigned long ste[64]; } pmd_t; typedef struct { pmd_t pue[1]; } pud_t; -typedef struct { pud_t pge[1]; } pgd_t; +typedef struct { pud_t p4e[1]; } p4d_t; +typedef struct { p4d_t pge[1]; } pgd_t; # endif /* CONFIG_MMU */ # define pte_val(x) ((x).pte) diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h index d21f3da7bdb6..6f94bed571c4 100644 --- a/arch/mips/include/asm/pgtable-32.h +++ b/arch/mips/include/asm/pgtable-32.h @@ -16,6 +16,7 @@ #include #include +#define __ARCH_USE_5LEVEL_HACK #include extern int temp_tlb_entry; diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index 514cbc0a6a67..130a2a6c1531 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -17,6 +17,7 @@ #include #include +#define __ARCH_USE_5LEVEL_HACK #if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48) #include #else diff --git a/arch/mn10300/include/asm/page.h b/arch/mn10300/include/asm/page.h index 3810a6f740fd..dfe730a5ede0 100644 --- a/arch/mn10300/include/asm/page.h +++ b/arch/mn10300/include/asm/page.h @@ -57,6 +57,7 @@ typedef struct page *pgtable_t; #define __pgd(x) ((pgd_t) { (x) }) #define __pgprot(x) ((pgprot_t) { (x) }) +#define __ARCH_USE_5LEVEL_HACK #include #endif /* !__ASSEMBLY__ */ diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index 298393c3cb42..db4f7d179220 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -22,6 +22,7 @@ #include #include +#define __ARCH_USE_5LEVEL_HACK #include #define FIRST_USER_ADDRESS 0UL diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 3567aa7be555..ff97374ca069 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -25,6 +25,7 @@ #ifndef __ASM_OPENRISC_PGTABLE_H #define __ASM_OPENRISC_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 012223638815..26ed228d4dc6 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H #define _ASM_POWERPC_BOOK3S_32_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include #include diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 1eeeb72c7015..13c39b6d5d64 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1,9 +1,12 @@ #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ +#include + #ifndef __ASSEMBLY__ #include #endif + /* * Common bits between hash and Radix page table */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index ba9921bf202e..5134ade2e850 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H #define _ASM_POWERPC_NOHASH_32_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h index d0db98793dd8..9f4de0a1035e 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h @@ -1,5 +1,8 @@ #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H #define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H + +#include + /* * Entries per page directory level. The PTE level must use a 64b record * for each page table entry. The PMD and PGD level use a 32b record for diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h index 55b28ef3409a..1facb584dd29 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H #define _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H +#define __ARCH_USE_5LEVEL_HACK #include diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 7ed1972b1920..93e37b12e882 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -24,6 +24,7 @@ * the S390 page table tree. */ #ifndef __ASSEMBLY__ +#include #include #include #include diff --git a/arch/score/include/asm/pgtable.h b/arch/score/include/asm/pgtable.h index 0553e5cd5985..46ff8fd678a7 100644 --- a/arch/score/include/asm/pgtable.h +++ b/arch/score/include/asm/pgtable.h @@ -2,6 +2,7 @@ #define _ASM_SCORE_PGTABLE_H #include +#define __ARCH_USE_5LEVEL_HACK #include #include diff --git a/arch/sh/include/asm/pgtable-2level.h b/arch/sh/include/asm/pgtable-2level.h index 19bd89db17e7..f75cf4387257 100644 --- a/arch/sh/include/asm/pgtable-2level.h +++ b/arch/sh/include/asm/pgtable-2level.h @@ -1,6 +1,7 @@ #ifndef __ASM_SH_PGTABLE_2LEVEL_H #define __ASM_SH_PGTABLE_2LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include /* diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h index 249a985d9648..9b1e776eca31 100644 --- a/arch/sh/include/asm/pgtable-3level.h +++ b/arch/sh/include/asm/pgtable-3level.h @@ -1,6 +1,7 @@ #ifndef __ASM_SH_PGTABLE_3LEVEL_H #define __ASM_SH_PGTABLE_3LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include /* diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 56e49c8f770d..8a598528ec1f 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -12,6 +12,7 @@ * the SpitFire page tables. */ +#include #include #include #include diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h index d26a42279036..5f8c615cb5e9 100644 --- a/arch/tile/include/asm/pgtable_32.h +++ b/arch/tile/include/asm/pgtable_32.h @@ -74,6 +74,7 @@ extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */; #define MAXMEM (_VMALLOC_START - PAGE_OFFSET) /* We have no pmd or pud since we are strictly a two-level page table */ +#define __ARCH_USE_5LEVEL_HACK #include static inline int pud_huge_page(pud_t pud) { return 0; } diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h index e96cec52f6d8..96fe58b45118 100644 --- a/arch/tile/include/asm/pgtable_64.h +++ b/arch/tile/include/asm/pgtable_64.h @@ -59,6 +59,7 @@ #ifndef __ASSEMBLY__ /* We have no pud since we are a three-level page table. */ +#define __ARCH_USE_5LEVEL_HACK #include /* diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h index cfbe59752469..179c0ea87a0c 100644 --- a/arch/um/include/asm/pgtable-2level.h +++ b/arch/um/include/asm/pgtable-2level.h @@ -8,6 +8,7 @@ #ifndef __UM_PGTABLE_2LEVEL_H #define __UM_PGTABLE_2LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include /* PGDIR_SHIFT determines what a third-level page table entry can map */ diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h index bae8523a162f..c4d876dfb9ac 100644 --- a/arch/um/include/asm/pgtable-3level.h +++ b/arch/um/include/asm/pgtable-3level.h @@ -7,6 +7,7 @@ #ifndef __UM_PGTABLE_3LEVEL_H #define __UM_PGTABLE_3LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include /* PGDIR_SHIFT determines what a third-level page table entry can map */ diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h index 818d0f5598e3..a4f2bef37e70 100644 --- a/arch/unicore32/include/asm/pgtable.h +++ b/arch/unicore32/include/asm/pgtable.h @@ -12,6 +12,7 @@ #ifndef __UNICORE_PGTABLE_H__ #define __UNICORE_PGTABLE_H__ +#define __ARCH_USE_5LEVEL_HACK #include #include diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 8b4de22d6429..62484333673d 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -273,6 +273,8 @@ static inline pgdval_t pgd_flags(pgd_t pgd) } #if CONFIG_PGTABLE_LEVELS > 3 +#include + typedef struct { pudval_t pud; } pud_t; static inline pud_t native_make_pud(pmdval_t val) @@ -285,6 +287,7 @@ static inline pudval_t native_pud_val(pud_t pud) return pud.pud; } #else +#define __ARCH_USE_5LEVEL_HACK #include static inline pudval_t native_pud_val(pud_t pud) @@ -306,6 +309,7 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) return pmd.pmd; } #else +#define __ARCH_USE_5LEVEL_HACK #include static inline pmdval_t native_pmd_val(pmd_t pmd) diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 8aa0e0d9cbb2..30dd5b2e4ad5 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -11,6 +11,7 @@ #ifndef _XTENSA_PGTABLE_H #define _XTENSA_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include #include #include -- cgit From 8bd49ac86677ca43d64f08c45864e438283d6a76 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 6 Mar 2017 09:57:17 +0100 Subject: s390: wire up statx system call Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/uapi/asm/unistd.h | 4 +++- arch/s390/kernel/compat_wrapper.c | 1 + arch/s390/kernel/syscalls.S | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 4384bc797a54..152de9b796e1 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -313,7 +313,9 @@ #define __NR_copy_file_range 375 #define __NR_preadv2 376 #define __NR_pwritev2 377 -#define NR_syscalls 378 +/* Number 378 is reserved for guarded storage */ +#define __NR_statx 379 +#define NR_syscalls 380 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index ae2cda5eee5a..e89cc2e71db1 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -178,3 +178,4 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len); COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags); COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 9b59e6212d8f..2659b5cfeddb 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -386,3 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2) SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */ SYSCALL(sys_preadv2,compat_sys_preadv2) SYSCALL(sys_pwritev2,compat_sys_pwritev2) +NI_SYSCALL +SYSCALL(sys_statx,compat_sys_statx) -- cgit From 8a1115ff6b6d90cf1066ec3a0c4e51276553eebe Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 9 Mar 2017 16:16:31 -0800 Subject: scripts/spelling.txt: add "disble(d)" pattern and fix typo instances Fix typos and add the following to the scripts/spelling.txt: disble||disable disbled||disabled I kept the TSL2563_INT_DISBLED in /drivers/iio/light/tsl2563.c untouched. The macro is not referenced at all, but this commit is touching only comment blocks just in case. Link: http://lkml.kernel.org/r/1481573103-11329-20-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v32/drivers/cryptocop.c | 2 +- arch/x86/kernel/ftrace.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c index ae6903d7fdbe..14970f11bbf2 100644 --- a/arch/cris/arch-v32/drivers/cryptocop.c +++ b/arch/cris/arch-v32/drivers/cryptocop.c @@ -2086,7 +2086,7 @@ static void cryptocop_job_queue_close(void) dma_in_cfg.en = regk_dma_no; REG_WR(dma, IN_DMA_INST, rw_cfg, dma_in_cfg); - /* Disble the cryptocop. */ + /* Disable the cryptocop. */ rw_cfg = REG_RD(strcop, regi_strcop, rw_cfg); rw_cfg.en = 0; REG_WR(strcop, regi_strcop, rw_cfg, rw_cfg); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 8639bb2ae058..8f3d9cf26ff9 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -535,7 +535,7 @@ static void run_sync(void) { int enable_irqs = irqs_disabled(); - /* We may be called with interrupts disbled (on bootup). */ + /* We may be called with interrupts disabled (on bootup). */ if (enable_irqs) local_irq_enable(); on_each_cpu(do_sync_core, NULL, 1); -- cgit From 52c50ca75c534c0772b71900a29b3a71439b32ef Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 9 Mar 2017 16:16:36 -0800 Subject: powerpc/mm: handle protnone ptes on fork We need to mark pages of parent process read only on fork. Numa fault pte needs a protnone ptes variant with saved write flag set. On fork we need to make sure we remove the saved write bit. Instead of adding the protnone check in the caller update ptep_set_wrprotect variants to clear savedwrite bit. Without this we see random segfaults in application on fork. Fixes: c137a2757b886 ("powerpc/mm/autonuma: switch ppc64 to its own implementation of saved write") Link: http://lkml.kernel.org/r/1488203787-17849-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com Signed-off-by: Aneesh Kumar K.V Cc: Rik van Riel Cc: Mel Gorman Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/book3s/64/pgtable.h | 73 ++++++++++++++++------------ 1 file changed, 42 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 1eeeb72c7015..f0b08acda5eb 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -347,23 +347,53 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, __r; \ }) +static inline int pte_write(pte_t pte) +{ + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE)); +} + +#ifdef CONFIG_NUMA_BALANCING +#define pte_savedwrite pte_savedwrite +static inline bool pte_savedwrite(pte_t pte) +{ + /* + * Saved write ptes are prot none ptes that doesn't have + * privileged bit sit. We mark prot none as one which has + * present and pviliged bit set and RWX cleared. To mark + * protnone which used to have _PAGE_WRITE set we clear + * the privileged bit. + */ + return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED)); +} +#else +#define pte_savedwrite pte_savedwrite +static inline bool pte_savedwrite(pte_t pte) +{ + return false; +} +#endif + #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0) - return; - - pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0); + if (pte_write(*ptep)) + pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0); + else if (unlikely(pte_savedwrite(*ptep))) + pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0); } static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0) - return; - - pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1); + /* + * We should not find protnone for hugetlb, but this complete the + * interface. + */ + if (pte_write(*ptep)) + pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1); + else if (unlikely(pte_savedwrite(*ptep))) + pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 1); } #define __HAVE_ARCH_PTEP_GET_AND_CLEAR @@ -397,11 +427,6 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_update(mm, addr, ptep, ~0UL, 0, 0); } -static inline int pte_write(pte_t pte) -{ - return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE)); -} - static inline int pte_dirty(pte_t pte) { return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DIRTY)); @@ -466,19 +491,6 @@ static inline pte_t pte_clear_savedwrite(pte_t pte) return __pte(pte_val(pte) | _PAGE_PRIVILEGED); } -#define pte_savedwrite pte_savedwrite -static inline bool pte_savedwrite(pte_t pte) -{ - /* - * Saved write ptes are prot none ptes that doesn't have - * privileged bit sit. We mark prot none as one which has - * present and pviliged bit set and RWX cleared. To mark - * protnone which used to have _PAGE_WRITE set we clear - * the privileged bit. - */ - VM_BUG_ON(!pte_protnone(pte)); - return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED)); -} #endif /* CONFIG_NUMA_BALANCING */ static inline int pte_present(pte_t pte) @@ -982,11 +994,10 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - - if ((pmd_raw(*pmdp) & cpu_to_be64(_PAGE_WRITE)) == 0) - return; - - pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0); + if (pmd_write((*pmdp))) + pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0); + else if (unlikely(pmd_savedwrite(*pmdp))) + pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED); } static inline int pmd_trans_huge(pmd_t pmd) -- cgit From d19469e8415813cceaa494b6f538e327b9a95f3b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 9 Mar 2017 16:16:39 -0800 Subject: power/mm: update pte_write and pte_wrprotect to handle savedwrite We use pte_write() to check whethwer the pte entry is writable. This is mostly used to later mark the pte read only if it is writable. The other use of pte_write() is to check whether the pte_entry is writable so that hardware page table entry can be marked accordingly. This is used in kvm where we look at qemu page table entry and update hardware hash page table for the guest with correct write enable bit. With the above, for the first usage we should also check the savedwrite bit so that we can correctly clear the savedwite bit. For the later, we add a new variant __pte_write(). With this we can revert write_protect_page part of 595cd8f256d2 ("mm/ksm: handle protnone saved writes when making page write protect"). But I left it as it is as an example code for savedwrite check. Fixes: c137a2757b886 ("powerpc/mm/autonuma: switch ppc64 to its own implementation of saved write") Link: http://lkml.kernel.org/r/1488203787-17849-2-git-send-email-aneesh.kumar@linux.vnet.ibm.com Signed-off-by: Aneesh Kumar K.V Cc: Rik van Riel Cc: Mel Gorman Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/book3s/64/pgtable.h | 24 +++++++++++++++++++----- arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +- 3 files changed, 21 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index f0b08acda5eb..ec1e731e6a2d 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -347,7 +347,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, __r; \ }) -static inline int pte_write(pte_t pte) +static inline int __pte_write(pte_t pte) { return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE)); } @@ -373,11 +373,16 @@ static inline bool pte_savedwrite(pte_t pte) } #endif +static inline int pte_write(pte_t pte) +{ + return __pte_write(pte) || pte_savedwrite(pte); +} + #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - if (pte_write(*ptep)) + if (__pte_write(*ptep)) pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0); else if (unlikely(pte_savedwrite(*ptep))) pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0); @@ -390,7 +395,7 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, * We should not find protnone for hugetlb, but this complete the * interface. */ - if (pte_write(*ptep)) + if (__pte_write(*ptep)) pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1); else if (unlikely(pte_savedwrite(*ptep))) pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 1); @@ -490,7 +495,13 @@ static inline pte_t pte_clear_savedwrite(pte_t pte) VM_BUG_ON(!pte_protnone(pte)); return __pte(pte_val(pte) | _PAGE_PRIVILEGED); } - +#else +#define pte_clear_savedwrite pte_clear_savedwrite +static inline pte_t pte_clear_savedwrite(pte_t pte) +{ + VM_WARN_ON(1); + return __pte(pte_val(pte) & ~_PAGE_WRITE); +} #endif /* CONFIG_NUMA_BALANCING */ static inline int pte_present(pte_t pte) @@ -518,6 +529,8 @@ static inline unsigned long pte_pfn(pte_t pte) /* Generic modifiers for PTE bits */ static inline pte_t pte_wrprotect(pte_t pte) { + if (unlikely(pte_savedwrite(pte))) + return pte_clear_savedwrite(pte); return __pte(pte_val(pte) & ~_PAGE_WRITE); } @@ -938,6 +951,7 @@ static inline int pmd_protnone(pmd_t pmd) #define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) pte_write(pmd_pte(pmd)) +#define __pmd_write(pmd) __pte_write(pmd_pte(pmd)) #define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd)) #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -994,7 +1008,7 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - if (pmd_write((*pmdp))) + if (__pmd_write((*pmdp))) pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0); else if (unlikely(pmd_savedwrite(*pmdp))) pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index f3158fb16de3..8c68145ba1bd 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -601,7 +601,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, hva, NULL, NULL); if (ptep) { pte = kvmppc_read_update_linux_pte(ptep, 1); - if (pte_write(pte)) + if (__pte_write(pte)) write_ok = 1; } local_irq_restore(flags); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 6fca970373ee..ce6f2121fffe 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -256,7 +256,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, } pte = kvmppc_read_update_linux_pte(ptep, writing); if (pte_present(pte) && !pte_protnone(pte)) { - if (writing && !pte_write(pte)) + if (writing && !__pte_write(pte)) /* make the actual HPTE be read-only */ ptel = hpte_make_readonly(ptel); is_ci = pte_ci(pte); -- cgit From ef947b2529f918d9606533eb9c32b187ed6a5ede Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 9 Mar 2017 16:16:42 -0800 Subject: x86, mm: fix gup_pte_range() vs DAX mappings gup_pte_range() fails to check pte_allows_gup() before translating a DAX pte entry, pte_devmap(), to a page. This allows writes to read-only mappings, and bypasses the DAX cacheline dirty tracking due to missed 'mkwrite' faults. The gup_huge_pmd() path and the gup_huge_pud() path correctly check pte_allows_gup() before checking for _devmap() entries. Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Link: http://lkml.kernel.org/r/148804251312.36605.12665024794196605053.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ross Zwisler Signed-off-by: Dan Williams Reported-by: Dave Hansen Reported-by: Ross Zwisler Cc: Xiong Zhou Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/gup.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 99c7805a9693..9d32ee608807 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -120,6 +120,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, return 0; } + if (!pte_allows_gup(pte_val(pte), write)) { + pte_unmap(ptep); + return 0; + } + if (pte_devmap(pte)) { pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); if (unlikely(!pgmap)) { @@ -127,8 +132,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, pte_unmap(ptep); return 0; } - } else if (!pte_allows_gup(pte_val(pte), write) || - pte_special(pte)) { + } else if (pte_special(pte)) { pte_unmap(ptep); return 0; } -- cgit From b2e593e271b0760ebc8999e5f9dd068ae2b9d30a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 9 Mar 2017 16:16:45 -0800 Subject: x86, mm: unify exit paths in gup_pte_range() All exit paths from gup_pte_range() require pte_unmap() of the original pte page before returning. Refactor the code to have a single exit point to do the unmap. This mirrors the flow of the generic gup_pte_range() in mm/gup.c. Link: http://lkml.kernel.org/r/148804251828.36605.14910389618497006945.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Dave Hansen Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/gup.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 9d32ee608807..1f3b6ef105cd 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -106,36 +106,35 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { struct dev_pagemap *pgmap = NULL; - int nr_start = *nr; - pte_t *ptep; + int nr_start = *nr, ret = 0; + pte_t *ptep, *ptem; - ptep = pte_offset_map(&pmd, addr); + /* + * Keep the original mapped PTE value (ptem) around since we + * might increment ptep off the end of the page when finishing + * our loop iteration. + */ + ptem = ptep = pte_offset_map(&pmd, addr); do { pte_t pte = gup_get_pte(ptep); struct page *page; /* Similar to the PMD case, NUMA hinting must take slow path */ - if (pte_protnone(pte)) { - pte_unmap(ptep); - return 0; - } + if (pte_protnone(pte)) + break; - if (!pte_allows_gup(pte_val(pte), write)) { - pte_unmap(ptep); - return 0; - } + if (!pte_allows_gup(pte_val(pte), write)) + break; if (pte_devmap(pte)) { pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); if (unlikely(!pgmap)) { undo_dev_pagemap(nr, nr_start, pages); - pte_unmap(ptep); - return 0; + break; } - } else if (pte_special(pte)) { - pte_unmap(ptep); - return 0; - } + } else if (pte_special(pte)) + break; + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); get_page(page); @@ -145,9 +144,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, (*nr)++; } while (ptep++, addr += PAGE_SIZE, addr != end); - pte_unmap(ptep - 1); + if (addr == end) + ret = 1; + pte_unmap(ptem); - return 1; + return ret; } static inline void get_head_page_multiple(struct page *page, int nr) -- cgit From ca5b58ea3db88b5e69ba2a1f6bc3cf239cdcc64f Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 9 Mar 2017 16:17:34 -0800 Subject: sh: cayman: IDE support fix Remove incorrect CONFIG_IDE ifdef (CONFIG_IDE config option is for internal drivers/ide/ use) and make IDE hardware interface always initialized (not only when IDE subsystem is built-in). This patch allows Cayman board to work with modular IDE subsystem support and removes the requirement of having the whole core IDE subsystem built-in when using libata PATA support. Link: http://lkml.kernel.org/r/1990884.yFoE6lSB9G@amdc3058 Signed-off-by: Bartlomiej Zolnierkiewicz Cc: Yoshinori Sato Cc: Rich Felker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/boards/mach-cayman/setup.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/sh/boards/mach-cayman/setup.c b/arch/sh/boards/mach-cayman/setup.c index 340fd40b381d..9c292c27e0d7 100644 --- a/arch/sh/boards/mach-cayman/setup.c +++ b/arch/sh/boards/mach-cayman/setup.c @@ -128,7 +128,6 @@ static int __init smsc_superio_setup(void) SMSC_SUPERIO_WRITE_INDEXED(1, SMSC_PRIMARY_INT_INDEX); SMSC_SUPERIO_WRITE_INDEXED(12, SMSC_SECONDARY_INT_INDEX); -#ifdef CONFIG_IDE /* * Only IDE1 exists on the Cayman */ @@ -158,7 +157,6 @@ static int __init smsc_superio_setup(void) SMSC_SUPERIO_WRITE_INDEXED(0x01, 0xc5); /* GP45 = IDE1_IRQ */ SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc6); /* GP46 = nIOROP */ SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc7); /* GP47 = nIOWOP */ -#endif /* Exit the configuration state */ outb(SMSC_EXIT_CONFIG_KEY, SMSC_CONFIG_PORT_ADDR); -- cgit From bba8376aea1dcbbe22bbda118c52abee317c7609 Mon Sep 17 00:00:00 2001 From: Matjaz Hegedic Date: Thu, 9 Mar 2017 14:00:17 +0100 Subject: x86/reboot/quirks: Fix typo in ASUS EeeBook X205TA reboot quirk The reboot quirk for ASUS EeeBook X205TA contains a typo in DMI_PRODUCT_NAME, improperly referring to X205TAW instead of X205TA, which prevents the quirk from being triggered. The model X205TAW already has a reboot quirk of its own. This fix simply removes the inappropriate final letter W. Fixes: 90b28ded88dd ("x86/reboot/quirks: Add ASUS EeeBook X205TA reboot quirk") Signed-off-by: Matjaz Hegedic Link: http://lkml.kernel.org/r/1489064417-7445-1-git-send-email-matjaz.hegedic@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/reboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 4194d6f9bb29..067f9813fd2c 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -228,7 +228,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { .ident = "ASUS EeeBook X205TA", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"), + DMI_MATCH(DMI_PRODUCT_NAME, "X205TA"), }, }, { /* Handle problems with rebooting on ASUS EeeBook X205TAW */ -- cgit From 40c50c1fecdf012a3bf055ec813f0ef2eda2749c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Mar 2017 13:17:18 +0100 Subject: kexec, x86/purgatory: Unbreak it and clean it up The purgatory code defines global variables which are referenced via a symbol lookup in the kexec code (core and arch). A recent commit addressing sparse warnings made these static and thereby broke kexec_file. Why did this happen? Simply because the whole machinery is undocumented and lacks any form of forward declarations. The variable names are unspecific and lack a prefix, so adding forward declarations creates shadow variables in the core code. Aside of that the code relies on magic constants and duplicate struct definitions with no way to ensure that these things stay in sync. The section placement of the purgatory variables happened by chance and not by design. Unbreak kexec and cleanup the mess: - Add proper forward declarations and document the usage - Use common struct definition - Use the proper common defines instead of magic constants - Add a purgatory_ prefix to have a proper name space - Use ARRAY_SIZE() instead of a homebrewn reimplementation - Add proper sections to the purgatory variables [ From Mike ] Fixes: 72042a8c7b01 ("x86/purgatory: Make functions and variables static") Reported-by: Mike Galbraith < Signed-off-by: Thomas Gleixner Cc: Nicholas Mc Guire Cc: Borislav Petkov Cc: Vivek Goyal Cc: "Tobin C. Harding" Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1703101315140.3681@nanos Signed-off-by: Thomas Gleixner --- arch/powerpc/purgatory/trampoline.S | 12 ++++++------ arch/x86/include/asm/purgatory.h | 20 ++++++++++++++++++++ arch/x86/kernel/machine_kexec_64.c | 9 ++++++--- arch/x86/purgatory/purgatory.c | 35 +++++++++++++++++------------------ arch/x86/purgatory/purgatory.h | 8 -------- arch/x86/purgatory/setup-x86_64.S | 2 +- arch/x86/purgatory/sha256.h | 1 - 7 files changed, 50 insertions(+), 37 deletions(-) create mode 100644 arch/x86/include/asm/purgatory.h delete mode 100644 arch/x86/purgatory/purgatory.h (limited to 'arch') diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S index f9760ccf4032..3696ea6c4826 100644 --- a/arch/powerpc/purgatory/trampoline.S +++ b/arch/powerpc/purgatory/trampoline.S @@ -116,13 +116,13 @@ dt_offset: .data .balign 8 -.globl sha256_digest -sha256_digest: +.globl purgatory_sha256_digest +purgatory_sha256_digest: .skip 32 - .size sha256_digest, . - sha256_digest + .size purgatory_sha256_digest, . - purgatory_sha256_digest .balign 8 -.globl sha_regions -sha_regions: +.globl purgatory_sha_regions +purgatory_sha_regions: .skip 8 * 2 * 16 - .size sha_regions, . - sha_regions + .size purgatory_sha_regions, . - purgatory_sha_regions diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h new file mode 100644 index 000000000000..d7da2729903d --- /dev/null +++ b/arch/x86/include/asm/purgatory.h @@ -0,0 +1,20 @@ +#ifndef _ASM_X86_PURGATORY_H +#define _ASM_X86_PURGATORY_H + +#ifndef __ASSEMBLY__ +#include + +extern void purgatory(void); +/* + * These forward declarations serve two purposes: + * + * 1) Make sparse happy when checking arch/purgatory + * 2) Document that these are required to be global so the symbol + * lookup in kexec works + */ +extern unsigned long purgatory_backup_dest; +extern unsigned long purgatory_backup_src; +extern unsigned long purgatory_backup_sz; +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_PURGATORY_H */ diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 307b1f4543de..857cdbd02867 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -194,19 +194,22 @@ static int arch_update_purgatory(struct kimage *image) /* Setup copying of backup region */ if (image->type == KEXEC_TYPE_CRASH) { - ret = kexec_purgatory_get_set_symbol(image, "backup_dest", + ret = kexec_purgatory_get_set_symbol(image, + "purgatory_backup_dest", &image->arch.backup_load_addr, sizeof(image->arch.backup_load_addr), 0); if (ret) return ret; - ret = kexec_purgatory_get_set_symbol(image, "backup_src", + ret = kexec_purgatory_get_set_symbol(image, + "purgatory_backup_src", &image->arch.backup_src_start, sizeof(image->arch.backup_src_start), 0); if (ret) return ret; - ret = kexec_purgatory_get_set_symbol(image, "backup_sz", + ret = kexec_purgatory_get_set_symbol(image, + "purgatory_backup_sz", &image->arch.backup_src_sz, sizeof(image->arch.backup_src_sz), 0); if (ret) diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index b6d5c8946e66..470edad96bb9 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -10,22 +10,19 @@ * Version 2. See the file COPYING for more details. */ +#include +#include + #include "sha256.h" -#include "purgatory.h" #include "../boot/string.h" -struct sha_region { - unsigned long start; - unsigned long len; -}; - -static unsigned long backup_dest; -static unsigned long backup_src; -static unsigned long backup_sz; +unsigned long purgatory_backup_dest __section(.kexec-purgatory); +unsigned long purgatory_backup_src __section(.kexec-purgatory); +unsigned long purgatory_backup_sz __section(.kexec-purgatory); -static u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 }; +u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(.kexec-purgatory); -struct sha_region sha_regions[16] = {}; +struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(.kexec-purgatory); /* * On x86, second kernel requries first 640K of memory to boot. Copy @@ -34,26 +31,28 @@ struct sha_region sha_regions[16] = {}; */ static int copy_backup_region(void) { - if (backup_dest) - memcpy((void *)backup_dest, (void *)backup_src, backup_sz); - + if (purgatory_backup_dest) { + memcpy((void *)purgatory_backup_dest, + (void *)purgatory_backup_src, purgatory_backup_sz); + } return 0; } static int verify_sha256_digest(void) { - struct sha_region *ptr, *end; + struct kexec_sha_region *ptr, *end; u8 digest[SHA256_DIGEST_SIZE]; struct sha256_state sctx; sha256_init(&sctx); - end = &sha_regions[sizeof(sha_regions)/sizeof(sha_regions[0])]; - for (ptr = sha_regions; ptr < end; ptr++) + end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions); + + for (ptr = purgatory_sha_regions; ptr < end; ptr++) sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len); sha256_final(&sctx, digest); - if (memcmp(digest, sha256_digest, sizeof(digest))) + if (memcmp(digest, purgatory_sha256_digest, sizeof(digest))) return 1; return 0; diff --git a/arch/x86/purgatory/purgatory.h b/arch/x86/purgatory/purgatory.h deleted file mode 100644 index e2e365a6c192..000000000000 --- a/arch/x86/purgatory/purgatory.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef PURGATORY_H -#define PURGATORY_H - -#ifndef __ASSEMBLY__ -extern void purgatory(void); -#endif /* __ASSEMBLY__ */ - -#endif /* PURGATORY_H */ diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S index f90e9dfa90bb..dfae9b9e60b5 100644 --- a/arch/x86/purgatory/setup-x86_64.S +++ b/arch/x86/purgatory/setup-x86_64.S @@ -9,7 +9,7 @@ * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. */ -#include "purgatory.h" +#include .text .globl purgatory_start diff --git a/arch/x86/purgatory/sha256.h b/arch/x86/purgatory/sha256.h index bd15a4127735..2867d9825a57 100644 --- a/arch/x86/purgatory/sha256.h +++ b/arch/x86/purgatory/sha256.h @@ -10,7 +10,6 @@ #ifndef SHA256_H #define SHA256_H - #include #include -- cgit From 0acf611997d9d05dbfb559c3c6e379c861eb5957 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 22 Feb 2017 11:07:57 -0800 Subject: score: Fix implicit includes now failing build after extable change After changing from module.h to extable.h, score builds fail with: arch/score/kernel/traps.c: In function 'do_ri': arch/score/kernel/traps.c:248:4: error: implicit declaration of function 'user_disable_single_step' arch/score/mm/extable.c: In function 'fixup_exception': arch/score/mm/extable.c:32:38: error: dereferencing pointer to incomplete type arch/score/mm/extable.c:34:24: error: dereferencing pointer to incomplete type because extable.h doesn't drag in the same amount of headers as the module.h did. Add in the headers which were implicitly expected. Fixes: 90858794c960 ("module.h: remove extable.h include now users have migrated") Signed-off-by: Guenter Roeck [PG: tweak commit log; refresh for sched header refactoring.] Signed-off-by: Paul Gortmaker --- arch/score/kernel/traps.c | 1 + arch/score/mm/extable.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c index e359ec675869..12daf45369b4 100644 --- a/arch/score/kernel/traps.c +++ b/arch/score/kernel/traps.c @@ -24,6 +24,7 @@ */ #include +#include #include #include #include diff --git a/arch/score/mm/extable.c b/arch/score/mm/extable.c index ec871355fc2d..6736a3ad6286 100644 --- a/arch/score/mm/extable.c +++ b/arch/score/mm/extable.c @@ -24,6 +24,8 @@ */ #include +#include +#include int fixup_exception(struct pt_regs *regs) { -- cgit From 2c4ea6e28dbf15ab93632c5c189f3948366b8885 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 11 Mar 2017 01:31:19 +0100 Subject: x86/tlb: Fix tlb flushing when lguest clears PGE Fengguang reported random corruptions from various locations on x86-32 after commits d2852a224050 ("arch: add ARCH_HAS_SET_MEMORY config") and 9d876e79df6a ("bpf: fix unlocking of jited image when module ronx not set") that uses the former. While x86-32 doesn't have a JIT like x86_64, the bpf_prog_lock_ro() and bpf_prog_unlock_ro() got enabled due to ARCH_HAS_SET_MEMORY, whereas Fengguang's test kernel doesn't have module support built in and therefore never had the DEBUG_SET_MODULE_RONX setting enabled. After investigating the crashes further, it turned out that using set_memory_ro() and set_memory_rw() didn't have the desired effect, for example, setting the pages as read-only on x86-32 would still let probe_kernel_write() succeed without error. This behavior would manifest itself in situations where the vmalloc'ed buffer was accessed prior to set_memory_*() such as in case of bpf_prog_alloc(). In cases where it wasn't, the page attribute changes seemed to have taken effect, leading to the conclusion that a TLB invalidate didn't happen. Moreover, it turned out that this issue reproduced with qemu in "-cpu kvm64" mode, but not for "-cpu host". When the issue occurs, change_page_attr_set_clr() did trigger a TLB flush as expected via __flush_tlb_all() through cpa_flush_range(), though. There are 3 variants for issuing a TLB flush: invpcid_flush_all() (depends on CPU feature bits X86_FEATURE_INVPCID, X86_FEATURE_PGE), cr4 based flush (depends on X86_FEATURE_PGE), and cr3 based flush. For "-cpu host" case in my setup, the flush used invpcid_flush_all() variant, whereas for "-cpu kvm64", the flush was cr4 based. Switching the kvm64 case to cr3 manually worked fine, and further investigating the cr4 one turned out that X86_CR4_PGE bit was not set in cr4 register, meaning the __native_flush_tlb_global_irq_disabled() wrote cr4 twice with the same value instead of clearing X86_CR4_PGE in the first write to trigger the flush. It turned out that X86_CR4_PGE was cleared from cr4 during init from lguest_arch_host_init() via adjust_pge(). The X86_FEATURE_PGE bit is also cleared from there due to concerns of using PGE in guest kernel that can lead to hard to trace bugs (see bff672e630a0 ("lguest: documentation V: Host") in init()). The CPU feature bits are cleared in dynamic boot_cpu_data, but they never propagated to __flush_tlb_all() as it uses static_cpu_has() instead of boot_cpu_has() for testing which variant of TLB flushing to use, meaning they still used the old setting of the host kernel. Clearing via setup_clear_cpu_cap(X86_FEATURE_PGE) so this would propagate to static_cpu_has() checks is too late at this point as sections have been patched already, so for now, it seems reasonable to switch back to boot_cpu_has(X86_FEATURE_PGE) as it was prior to commit c109bf95992b ("x86/cpufeature: Remove cpu_has_pge"). This lets the TLB flush trigger via cr3 as originally intended, properly makes the new page attributes visible and thus fixes the crashes seen by Fengguang. Fixes: c109bf95992b ("x86/cpufeature: Remove cpu_has_pge") Reported-by: Fengguang Wu Signed-off-by: Daniel Borkmann Cc: bp@suse.de Cc: Kees Cook Cc: "David S. Miller" Cc: netdev@vger.kernel.org Cc: Rusty Russell Cc: Alexei Starovoitov Cc: Linus Torvalds Cc: lkp@01.org Cc: Laura Abbott Cc: stable@vger.kernel.org Link: http://lkml.kernrl.org/r/20170301125426.l4nf65rx4wahohyl@wfg-t540p.sh.intel.com Link: http://lkml.kernel.org/r/25c41ad9eca164be4db9ad84f768965b7eb19d9e.1489191673.git.daniel@iogearbox.net Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/tlbflush.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6fa85944af83..fc5abff9b7fd 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -188,7 +188,7 @@ static inline void __native_flush_tlb_single(unsigned long addr) static inline void __flush_tlb_all(void) { - if (static_cpu_has(X86_FEATURE_PGE)) + if (boot_cpu_has(X86_FEATURE_PGE)) __flush_tlb_global(); else __flush_tlb(); -- cgit From f3fbd7ec62dec1528fb8044034e2885f2b257941 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 14 Feb 2017 00:03:38 +0900 Subject: arm: kprobes: Allow to handle reentered kprobe on single-stepping This is arm port of commit 6a5022a56ac3 ("kprobes/x86: Allow to handle reentered kprobe on single-stepping") Since the FIQ handlers can interrupt in the single stepping (or preparing the single stepping, do_debug etc.), we should consider a kprobe is hit in the NMI handler. Even in that case, the kprobe is allowed to be reentered as same as the kprobes hit in kprobe handlers (KPROBE_HIT_ACTIVE or KPROBE_HIT_SSDONE). The real issue will happen when a kprobe hit while another reentered kprobe is processing (KPROBE_REENTER), because we already consumed a saved-area for the previous kprobe. Signed-off-by: Masami Hiramatsu Signed-off-by: Jon Medhurst --- arch/arm/probes/kprobes/core.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index b6dc9d838a9a..35148b46c4f5 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -271,6 +271,7 @@ void __kprobes kprobe_handler(struct pt_regs *regs) switch (kcb->kprobe_status) { case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: + case KPROBE_HIT_SS: /* A pre- or post-handler probe got us here. */ kprobes_inc_nmissed_count(p); save_previous_kprobe(kcb); @@ -279,6 +280,11 @@ void __kprobes kprobe_handler(struct pt_regs *regs) singlestep(p, regs, kcb); restore_previous_kprobe(kcb); break; + case KPROBE_REENTER: + /* A nested probe was hit in FIQ, it is a BUG */ + pr_warn("Unrecoverable kprobe detected at %p.\n", + p->addr); + /* fall through */ default: /* impossible cases */ BUG(); -- cgit From 91fc862c613ab7a0ef6b0b7755c33619127f4e5a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 14 Feb 2017 00:04:48 +0900 Subject: arm: kprobes: Skip single-stepping in recursing path if possible Kprobes/arm skips single-stepping (moreover handling the event) if the conditional instruction must not be executed. This also apply the rule when we hit the recursing kprobe, so that kprobe does not count nmissed up in that case. Signed-off-by: Masami Hiramatsu Signed-off-by: Jon Medhurst --- arch/arm/probes/kprobes/core.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index 35148b46c4f5..269f66e66ff5 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -266,7 +266,15 @@ void __kprobes kprobe_handler(struct pt_regs *regs) #endif if (p) { - if (cur) { + if (!p->ainsn.insn_check_cc(regs->ARM_cpsr)) { + /* + * Probe hit but conditional execution check failed, + * so just skip the instruction and continue as if + * nothing had happened. + * In this case, we can skip recursing check too. + */ + singlestep_skip(p, regs); + } else if (cur) { /* Kprobe is pending, so we're recursing. */ switch (kcb->kprobe_status) { case KPROBE_HIT_ACTIVE: @@ -289,7 +297,7 @@ void __kprobes kprobe_handler(struct pt_regs *regs) /* impossible cases */ BUG(); } - } else if (p->ainsn.insn_check_cc(regs->ARM_cpsr)) { + } else { /* Probe hit and conditional execution check ok. */ set_current_kprobe(p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; @@ -310,13 +318,6 @@ void __kprobes kprobe_handler(struct pt_regs *regs) } reset_current_kprobe(); } - } else { - /* - * Probe hit but conditional execution check failed, - * so just skip the instruction and continue as if - * nothing had happened. - */ - singlestep_skip(p, regs); } } else if (cur) { /* We probably hit a jprobe. Call its break handler. */ -- cgit From 06553175f585b52509c7df37d6f4a50aacb7b211 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 14 Feb 2017 00:05:59 +0900 Subject: arm: kprobes: Fix the return address of multiple kretprobes This is arm port of commit 737480a0d525 ("kprobes/x86: Fix the return address of multiple kretprobes"). Fix the return address of subsequent kretprobes when multiple kretprobes are set on the same function. For example: # cd /sys/kernel/debug/tracing # echo "r:event1 sys_symlink" > kprobe_events # echo "r:event2 sys_symlink" >> kprobe_events # echo 1 > events/kprobes/enable # ln -s /tmp/foo /tmp/bar (without this patch) # cat trace | grep -v ^# ln-82 [000] dn.2 68.446525: event1: (kretprobe_trampoline+0x0/0x18 <- SyS_symlink) ln-82 [000] dn.2 68.447831: event2: (ret_fast_syscall+0x0/0x1c <- SyS_symlink) (with this patch) # cat trace | grep -v ^# ln-81 [000] dn.1 39.463469: event1: (ret_fast_syscall+0x0/0x1c <- SyS_symlink) ln-81 [000] dn.1 39.464701: event2: (ret_fast_syscall+0x0/0x1c <- SyS_symlink) Signed-off-by: Masami Hiramatsu Cc: KUMANO Syuhei Signed-off-by: Jon Medhurst --- arch/arm/probes/kprobes/core.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index 269f66e66ff5..ad1f4e6a9e33 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -441,6 +441,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; + kprobe_opcode_t *correct_ret_addr = NULL; INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); @@ -463,14 +464,34 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) /* another task is sharing our hash bucket */ continue; + orig_ret_address = (unsigned long)ri->ret_addr; + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + correct_ret_addr = ri->ret_addr; + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; if (ri->rp && ri->rp->handler) { __this_cpu_write(current_kprobe, &ri->rp->kp); get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; + ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); __this_cpu_write(current_kprobe, NULL); } - orig_ret_address = (unsigned long)ri->ret_addr; recycle_rp_inst(ri, &empty_rp); if (orig_ret_address != trampoline_address) @@ -482,7 +503,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) break; } - kretprobe_assert(ri, orig_ret_address, trampoline_address); kretprobe_hash_unlock(current, &flags); hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { -- cgit From 974310d047f3c7788a51d10c8d255eebdb1fa857 Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Thu, 2 Mar 2017 13:04:09 +0000 Subject: arm: kprobes: Align stack to 8-bytes in test code kprobes test cases need to have a stack that is aligned to an 8-byte boundary because they call other functions (and the ARM ABI mandates that alignment) and because test cases include 64-bit accesses to the stack. Unfortunately, GCC doesn't ensure this alignment for inline assembler and for the code in question seems to always misalign it by pushing just the LR register onto the stack. We therefore need to explicitly perform stack alignment at the start of each test case. Without this fix, some test cases will generate alignment faults on systems where alignment is enforced. Even if the kernel is configured to handle these faults in software, triggering them is ugly. It also exposes limitations in the fault handling code which doesn't cope with writes to the stack. E.g. when handling this instruction strd r6, [sp, #-64]! the fault handling code will write to a stack location below the SP value at the point the fault occurred, which coincides with where the exception handler has pushed the saved register context. This results in corruption of those registers. Signed-off-by: Jon Medhurst --- arch/arm/probes/kprobes/test-core.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c index c893726aa52d..1c98a87786ca 100644 --- a/arch/arm/probes/kprobes/test-core.c +++ b/arch/arm/probes/kprobes/test-core.c @@ -977,7 +977,10 @@ static void coverage_end(void) void __naked __kprobes_test_case_start(void) { __asm__ __volatile__ ( - "stmdb sp!, {r4-r11} \n\t" + "mov r2, sp \n\t" + "bic r3, r2, #7 \n\t" + "mov sp, r3 \n\t" + "stmdb sp!, {r2-r11} \n\t" "sub sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t" "bic r0, lr, #1 @ r0 = inline data \n\t" "mov r1, sp \n\t" @@ -997,7 +1000,8 @@ void __naked __kprobes_test_case_end_32(void) "movne pc, r0 \n\t" "mov r0, r4 \n\t" "add sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t" - "ldmia sp!, {r4-r11} \n\t" + "ldmia sp!, {r2-r11} \n\t" + "mov sp, r2 \n\t" "mov pc, r0 \n\t" ); } @@ -1013,7 +1017,8 @@ void __naked __kprobes_test_case_end_16(void) "bxne r0 \n\t" "mov r0, r4 \n\t" "add sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t" - "ldmia sp!, {r4-r11} \n\t" + "ldmia sp!, {r2-r11} \n\t" + "mov sp, r2 \n\t" "bx r0 \n\t" ); } -- cgit