From a545cf032d11437ed86e62f00d499108d91cae54 Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Tue, 12 Jun 2018 01:59:04 +0530 Subject: ocxl: Change return type for fault handler Use new return type vm_fault_t for fault handler. For now, this is just documenting that the function returns a VM_FAULT value rather than an errno. Once all instances are converted, vm_fault_t will become a distinct type. Ref-> commit 1c8f422059ae ("mm: change return type to vm_fault_t") There is an existing bug when vm_insert_pfn() can return ENOMEM which was ignored and VM_FAULT_NOPAGE returned as default. The new inline vmf_insert_pfn() has removed this inefficiency by returning correct vm_fault_ type. Signed-off-by: Souptick Joarder Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman --- drivers/misc/ocxl/context.c | 22 +++++++++++----------- drivers/misc/ocxl/sysfs.c | 5 ++--- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c index 95f74623113e..c10a940e3b38 100644 --- a/drivers/misc/ocxl/context.c +++ b/drivers/misc/ocxl/context.c @@ -86,7 +86,7 @@ out: return rc; } -static int map_afu_irq(struct vm_area_struct *vma, unsigned long address, +static vm_fault_t map_afu_irq(struct vm_area_struct *vma, unsigned long address, u64 offset, struct ocxl_context *ctx) { u64 trigger_addr; @@ -95,15 +95,15 @@ static int map_afu_irq(struct vm_area_struct *vma, unsigned long address, if (!trigger_addr) return VM_FAULT_SIGBUS; - vm_insert_pfn(vma, address, trigger_addr >> PAGE_SHIFT); - return VM_FAULT_NOPAGE; + return vmf_insert_pfn(vma, address, trigger_addr >> PAGE_SHIFT); } -static int map_pp_mmio(struct vm_area_struct *vma, unsigned long address, +static vm_fault_t map_pp_mmio(struct vm_area_struct *vma, unsigned long address, u64 offset, struct ocxl_context *ctx) { u64 pp_mmio_addr; int pasid_off; + vm_fault_t ret; if (offset >= ctx->afu->config.pp_mmio_stride) return VM_FAULT_SIGBUS; @@ -121,27 +121,27 @@ static int map_pp_mmio(struct vm_area_struct *vma, unsigned long address, pasid_off * ctx->afu->config.pp_mmio_stride + offset; - vm_insert_pfn(vma, address, pp_mmio_addr >> PAGE_SHIFT); + ret = vmf_insert_pfn(vma, address, pp_mmio_addr >> PAGE_SHIFT); mutex_unlock(&ctx->status_mutex); - return VM_FAULT_NOPAGE; + return ret; } -static int ocxl_mmap_fault(struct vm_fault *vmf) +static vm_fault_t ocxl_mmap_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct ocxl_context *ctx = vma->vm_file->private_data; u64 offset; - int rc; + vm_fault_t ret; offset = vmf->pgoff << PAGE_SHIFT; pr_debug("%s: pasid %d address 0x%lx offset 0x%llx\n", __func__, ctx->pasid, vmf->address, offset); if (offset < ctx->afu->irq_base_offset) - rc = map_pp_mmio(vma, vmf->address, offset, ctx); + ret = map_pp_mmio(vma, vmf->address, offset, ctx); else - rc = map_afu_irq(vma, vmf->address, offset, ctx); - return rc; + ret = map_afu_irq(vma, vmf->address, offset, ctx); + return ret; } static const struct vm_operations_struct ocxl_vmops = { diff --git a/drivers/misc/ocxl/sysfs.c b/drivers/misc/ocxl/sysfs.c index d9753a1db14b..0ab1fd1b2682 100644 --- a/drivers/misc/ocxl/sysfs.c +++ b/drivers/misc/ocxl/sysfs.c @@ -64,7 +64,7 @@ static ssize_t global_mmio_read(struct file *filp, struct kobject *kobj, return count; } -static int global_mmio_fault(struct vm_fault *vmf) +static vm_fault_t global_mmio_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct ocxl_afu *afu = vma->vm_private_data; @@ -75,8 +75,7 @@ static int global_mmio_fault(struct vm_fault *vmf) offset = vmf->pgoff; offset += (afu->global_mmio_start >> PAGE_SHIFT); - vm_insert_pfn(vma, vmf->address, offset); - return VM_FAULT_NOPAGE; + return vmf_insert_pfn(vma, vmf->address, offset); } static const struct vm_operations_struct global_mmio_vmops = { -- cgit From ee8c446fed99ffdc29dedf7d2a8854f1ea9a1022 Mon Sep 17 00:00:00 2001 From: "Mauro S. M. Rodrigues" Date: Thu, 22 Mar 2018 23:10:52 -0300 Subject: powerpc/eeh: Avoid misleading message "EEH: no capable adapters found" Due to recent refactoring in EEH in: commit b9fde58db7e5 ("powerpc/powernv: Rework EEH initialization on powernv") a misleading message was seen in the kernel message buffer: [ 0.108431] EEH: PowerNV platform initialized [ 0.589979] EEH: No capable adapters found This happened due to the removal of the initialization delay for powernv platform. Even though the EEH infrastructure for the devices is eventually initialized and still works just fine the eeh device probe step is postponed in order to assure the PEs are created. Later pnv_eeh_post_init does the probe devices job but at that point the message was already shown right after eeh_init flow. This patch introduces a new flag EEH_POSTPONED_PROBE to represent that temporary state and avoid the message mentioned above and showing the follow one instead: [ 0.107724] EEH: PowerNV platform initialized [ 4.844825] EEH: PCI Enhanced I/O Error Handling Enabled Signed-off-by: Mauro S. M. Rodrigues Acked-by: Russell Currey Tested-by:Venkat Rao B Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 15 ++++++++------- arch/powerpc/kernel/eeh.c | 2 +- arch/powerpc/platforms/powernv/eeh-powernv.c | 12 +++++++++++- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 677102baf3cd..219637ea69a1 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -36,13 +36,14 @@ struct pci_dn; #ifdef CONFIG_EEH /* EEH subsystem flags */ -#define EEH_ENABLED 0x01 /* EEH enabled */ -#define EEH_FORCE_DISABLED 0x02 /* EEH disabled */ -#define EEH_PROBE_MODE_DEV 0x04 /* From PCI device */ -#define EEH_PROBE_MODE_DEVTREE 0x08 /* From device tree */ -#define EEH_VALID_PE_ZERO 0x10 /* PE#0 is valid */ -#define EEH_ENABLE_IO_FOR_LOG 0x20 /* Enable IO for log */ -#define EEH_EARLY_DUMP_LOG 0x40 /* Dump log immediately */ +#define EEH_ENABLED 0x01 /* EEH enabled */ +#define EEH_FORCE_DISABLED 0x02 /* EEH disabled */ +#define EEH_PROBE_MODE_DEV 0x04 /* From PCI device */ +#define EEH_PROBE_MODE_DEVTREE 0x08 /* From device tree */ +#define EEH_VALID_PE_ZERO 0x10 /* PE#0 is valid */ +#define EEH_ENABLE_IO_FOR_LOG 0x20 /* Enable IO for log */ +#define EEH_EARLY_DUMP_LOG 0x40 /* Dump log immediately */ +#define EEH_POSTPONED_PROBE 0x80 /* Powernv may postpone device probe */ /* * Delay for PE reset, all in ms diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 5746809cfaad..6ebba3e48b01 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1087,7 +1087,7 @@ static int eeh_init(void) if (eeh_enabled()) pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); - else + else if (!eeh_has_flag(EEH_POSTPONED_PROBE)) pr_info("EEH: No capable adapters found\n"); return ret; diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index ddfc3544d285..3c1beae29f2d 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -223,6 +223,14 @@ int pnv_eeh_post_init(void) eeh_probe_devices(); eeh_addr_cache_build(); + if (eeh_has_flag(EEH_POSTPONED_PROBE)) { + eeh_clear_flag(EEH_POSTPONED_PROBE); + if (eeh_enabled()) + pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); + else + pr_info("EEH: No capable adapters found\n"); + } + /* Register OPAL event notifier */ eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR)); if (eeh_event_irq < 0) { @@ -384,8 +392,10 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) return NULL; /* Skip if we haven't probed yet */ - if (phb->ioda.pe_rmap[config_addr] == IODA_INVALID_PE) + if (phb->ioda.pe_rmap[config_addr] == IODA_INVALID_PE) { + eeh_add_flag(EEH_POSTPONED_PROBE); return NULL; + } /* Initialize eeh device */ edev->class_code = pdn->class_code; -- cgit From f6bd74fa084eb9ad573ffbb236a095454163f66d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 Jun 2018 11:56:24 +0200 Subject: powerpc: xmon: use ktime_get_coarse_boottime64 get_monotonic_boottime() is deprecated, and may not be safe to call in every context, as it has to read a hardware clocksource. This changes xmon to print the time using ktime_get_coarse_boottime64() instead, which avoids the old timespec type and the HW access. Signed-off-by: Arnd Bergmann Acked-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/xmon/xmon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 47166ad2a669..45e3d0ec1246 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -918,13 +918,13 @@ static void remove_cpu_bpts(void) static void show_uptime(void) { - struct timespec uptime; + struct timespec64 uptime; if (setjmp(bus_error_jmp) == 0) { catch_memory_errors = 1; sync(); - get_monotonic_boottime(&uptime); + ktime_get_coarse_boottime_ts64(&uptime); printf("Uptime: %lu.%.2lu seconds\n", (unsigned long)uptime.tv_sec, ((unsigned long)uptime.tv_nsec / (NSEC_PER_SEC/100))); -- cgit From 8272f598523d0975259080d2f9d760b9ce625e18 Mon Sep 17 00:00:00 2001 From: Jonathan Neuschäfer Date: Sun, 17 Jun 2018 14:49:06 +0200 Subject: powerpc: wii: Remove outdated comment about memory fixups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The workaround has been removed. What stays is just code to find the memory hole so the BATs can be configured properly in the function below. Fixes: 57deb8fea01f ("powerpc/wii: Don't rely on the reserved memory hack") Signed-off-by: Jonathan Neuschäfer Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/embedded6xx/wii.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 8bb46dcbebd8..403523c061ba 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -67,16 +67,6 @@ void __init wii_memory_fixups(void) { struct memblock_region *p = memblock.memory.regions; - /* - * This is part of a workaround to allow the use of two - * discontinuous RAM ranges on the Wii, even if this is - * currently unsupported on 32-bit PowerPC Linux. - * - * We coalesce the two memory ranges of the Wii into a - * single range, then create a reservation for the "hole" - * between both ranges. - */ - BUG_ON(memblock.memory.cnt != 2); BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base)); -- cgit From 8449a4cb69ab6fbb873d653a82787a2ae6f352aa Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 28 Nov 2017 17:29:34 -0200 Subject: scripts: Add ppc64le support for checkstack.pl 64-bit ELF v2 ABI specification for POWER describes, on section "General Stack Frame Requirements", that the stack should use the following instructions when compiled with backchain: mflr r0 std r0, 16(r1) stdu r1, -XX(r1) Where XX is the frame size for that function, and this is the value checkstack.pl will find the stack size for each function. This patch also simplifies the entire Powerpc section, since just two type of instructions are used, 'stdu' for 64 bits and 'stwu' for 32 bits platform. Signed-off-by: Breno Leitao Signed-off-by: Michael Ellerman --- scripts/checkstack.pl | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl index cbdf0dfd4c22..8081b6cf67d2 100755 --- a/scripts/checkstack.pl +++ b/scripts/checkstack.pl @@ -15,6 +15,7 @@ # M68k port by Geert Uytterhoeven and Andreas Schwab # AArch64, PARISC ports by Kyle McMartin # sparc port by Martin Habets +# ppc64le port by Breno Leitao # # Usage: # objdump -d vmlinux | scripts/checkstack.pl [arch] @@ -78,13 +79,9 @@ my (@stack, $re, $dre, $x, $xs, $funcre); $re = qr/.*l\.addi.*r1,r1,-(([0-9]{2}|[3-9])[0-9]{2})/o; } elsif ($arch eq 'parisc' || $arch eq 'parisc64') { $re = qr/.*ldo ($x{1,8})\(sp\),sp/o; - } elsif ($arch eq 'ppc') { - #c00029f4: 94 21 ff 30 stwu r1,-208(r1) - $re = qr/.*stwu.*r1,-($x{1,8})\(r1\)/o; - } elsif ($arch eq 'ppc64') { - #XXX - $re = qr/.*stdu.*r1,-($x{1,8})\(r1\)/o; - } elsif ($arch eq 'powerpc') { + } elsif ($arch eq 'powerpc' || $arch =~ /^ppc(64)?(le)?$/ ) { + # powerpc : 94 21 ff 30 stwu r1,-208(r1) + # ppc64(le) : 81 ff 21 f8 stdu r1,-128(r1) $re = qr/.*st[dw]u.*r1,-($x{1,8})\(r1\)/o; } elsif ($arch =~ /^s390x?$/) { # 11160: a7 fb ff 60 aghi %r15,-160 -- cgit From 3bfb450ee7b52d41ce2117738ee9c155f3c75237 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 18 Jun 2018 19:15:02 -0300 Subject: powerpc/pci: Remove legacy debug code Commit 59f47eff03a0 ("powerpc/pci: Use of_irq_parse_and_map_pci() helper") removed the 'oirq' variable, but kept memsetting it when the DEBUG macro is defined. When setting DEBUG macro for debugging purpose, the kernel fails to build since 'oirq' is not defined anymore. This patch simply remove the debug block, since it does not seem to sense now. Fixes: 59f47eff03a08c ("powerpc/pci: Use of_irq_parse_and_map_pci() helper") Signed-off-by: Breno Leitao Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci-common.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index fe9733ffffaa..f9352167e619 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -366,9 +366,6 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev)); -#ifdef DEBUG - memset(&oirq, 0xff, sizeof(oirq)); -#endif /* Try to get a mapping from the device-tree */ virq = of_irq_parse_and_map_pci(pci_dev, 0, 0); if (virq <= 0) { -- cgit From c6bef2e9e50ca27987aae90147511bd320a2264e Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 25 Jun 2018 11:34:55 +1000 Subject: powerpc: Document issues with the DAWR on POWER9 Signed-off-by: Michael Neuling Acked-by: Stewart Smith Signed-off-by: Michael Ellerman --- Documentation/powerpc/DAWR-POWER9.txt | 58 +++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 Documentation/powerpc/DAWR-POWER9.txt diff --git a/Documentation/powerpc/DAWR-POWER9.txt b/Documentation/powerpc/DAWR-POWER9.txt new file mode 100644 index 000000000000..2feaa6619658 --- /dev/null +++ b/Documentation/powerpc/DAWR-POWER9.txt @@ -0,0 +1,58 @@ +DAWR issues on POWER9 +============================ + +On POWER9 the DAWR can cause a checkstop if it points to cache +inhibited (CI) memory. Currently Linux has no way to disinguish CI +memory when configuring the DAWR, so (for now) the DAWR is disabled by +this commit: + + commit 9654153158d3e0684a1bdb76dbababdb7111d5a0 + Author: Michael Neuling + Date: Tue Mar 27 15:37:24 2018 +1100 + powerpc: Disable DAWR in the base POWER9 CPU features + +Technical Details: +============================ + +DAWR has 6 different ways of being set. +1) ptrace +2) h_set_mode(DAWR) +3) h_set_dabr() +4) kvmppc_set_one_reg() +5) xmon + +For ptrace, we now advertise zero breakpoints on POWER9 via the +PPC_PTRACE_GETHWDBGINFO call. This results in GDB falling back to +software emulation of the watchpoint (which is slow). + +h_set_mode(DAWR) and h_set_dabr() will now return an error to the +guest on a POWER9 host. Current Linux guests ignore this error, so +they will silently not get the DAWR. + +kvmppc_set_one_reg() will store the value in the vcpu but won't +actually set it on POWER9 hardware. This is done so we don't break +migration from POWER8 to POWER9, at the cost of silently losing the +DAWR on the migration. + +For xmon, the 'bd' command will return an error on P9. + +Consequences for users +============================ + +For GDB watchpoints (ie 'watch' command) on POWER9 bare metal , GDB +will accept the command. Unfortunately since there is no hardware +support for the watchpoint, GDB will software emulate the watchpoint +making it run very slowly. + +The same will also be true for any guests started on a POWER9 +host. The watchpoint will fail and GDB will fall back to software +emulation. + +If a guest is started on a POWER8 host, GDB will accept the watchpoint +and configure the hardware to use the DAWR. This will run at full +speed since it can use the hardware emulation. Unfortunately if this +guest is migrated to a POWER9 host, the watchpoint will be lost on the +POWER9. Loads and stores to the watchpoint locations will not be +trapped in GDB. The watchpoint is remembered, so if the guest is +migrated back to the POWER8 host, it will start working again. + -- cgit From aad15ccc4c043280624ebd345d311dc7b6398bba Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 25 Jun 2018 11:34:56 +1000 Subject: powerpc: Document issues with TM on POWER9 Signed-off-by: Michael Neuling Acked-by: Stewart Smith Signed-off-by: Michael Ellerman --- Documentation/powerpc/transactional_memory.txt | 44 ++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/Documentation/powerpc/transactional_memory.txt b/Documentation/powerpc/transactional_memory.txt index e32fdbb4c9a7..52c023e14f26 100644 --- a/Documentation/powerpc/transactional_memory.txt +++ b/Documentation/powerpc/transactional_memory.txt @@ -198,3 +198,47 @@ presented). The transaction cannot then be continued and will take the failure handler route. Furthermore, the transactional 2nd register state will be inaccessible. GDB can currently be used on programs using TM, but not sensibly in parts within transactions. + +POWER9 +====== + +TM on POWER9 has issues with storing the complete register state. This +is described in this commit: + + commit 4bb3c7a0208fc13ca70598efd109901a7cd45ae7 + Author: Paul Mackerras + Date: Wed Mar 21 21:32:01 2018 +1100 + KVM: PPC: Book3S HV: Work around transactional memory bugs in POWER9 + +To account for this different POWER9 chips have TM enabled in +different ways. + +On POWER9N DD2.01 and below, TM is disabled. ie +HWCAP2[PPC_FEATURE2_HTM] is not set. + +On POWER9N DD2.1 TM is configured by firmware to always abort a +transaction when tm suspend occurs. So tsuspend will cause a +transaction to be aborted and rolled back. Kernel exceptions will also +cause the transaction to be aborted and rolled back and the exception +will not occur. If userspace constructs a sigcontext that enables TM +suspend, the sigcontext will be rejected by the kernel. This mode is +advertised to users with HWCAP2[PPC_FEATURE2_HTM_NO_SUSPEND] set. +HWCAP2[PPC_FEATURE2_HTM] is not set in this mode. + +On POWER9N DD2.2 and above, KVM and POWERVM emulate TM for guests (as +described in commit 4bb3c7a0208f), hence TM is enabled for guests +ie. HWCAP2[PPC_FEATURE2_HTM] is set for guest userspace. Guests that +makes heavy use of TM suspend (tsuspend or kernel suspend) will result +in traps into the hypervisor and hence will suffer a performance +degradation. Host userspace has TM disabled +ie. HWCAP2[PPC_FEATURE2_HTM] is not set. (although we make enable it +at some point in the future if we bring the emulation into host +userspace context switching). + +POWER9C DD1.2 and above are only available with POWERVM and hence +Linux only runs as a guest. On these systems TM is emulated like on +POWER9N DD2.2. + +Guest migration from POWER8 to POWER9 will work with POWER9N DD2.2 and +POWER9C DD1.2. Since earlier POWER9 processors don't support TM +emulation, migration from POWER8 to POWER9 is not supported there. -- cgit From d3d4ffaae439981e1e441ebb125aa3588627c5d8 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 1 Jun 2018 18:06:16 +1000 Subject: powerpc/powernv/ioda2: Reduce upper limit for DMA window size We use PHB in mode1 which uses bit 59 to select a correct DMA window. However there is mode2 which uses bits 59:55 and allows up to 32 DMA windows per a PE. Even though documentation does not clearly specify that, it seems that the actual hardware does not support bits 59:55 even in mode1, in other words we can create a window as big as 1<<58 but DMA simply won't work. This reduces the upper limit from 59 to 55 bits to let the userspace know about the hardware limits. Fixes: 7aafac11e3 "powerpc/powernv/ioda2: Gracefully fail if too many TCE levels requested" Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 5bd0eb6681bc..ab678177d36e 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2840,7 +2840,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, level_shift = entries_shift + 3; level_shift = max_t(unsigned, level_shift, PAGE_SHIFT); - if ((level_shift - 3) * levels + page_shift >= 60) + if ((level_shift - 3) * levels + page_shift >= 55) return -EINVAL; /* Allocate TCE table */ -- cgit From 7dea6f2f053599d90f7894216db0dd0bedeb3a1c Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 29 Jun 2018 10:12:09 +1000 Subject: powerpc/powernv/memtrace: Remove memtrace mmap() debugfs doesn't support mmap(), so this code is never used. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/memtrace.c | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index b99283df8584..f73101119eba 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -47,38 +47,9 @@ static ssize_t memtrace_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size); } -static bool valid_memtrace_range(struct memtrace_entry *dev, - unsigned long start, unsigned long size) -{ - if ((start >= dev->start) && - ((start + size) <= (dev->start + dev->size))) - return true; - - return false; -} - -static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma) -{ - unsigned long size = vma->vm_end - vma->vm_start; - struct memtrace_entry *dev = filp->private_data; - - if (!valid_memtrace_range(dev, vma->vm_pgoff << PAGE_SHIFT, size)) - return -EINVAL; - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - if (remap_pfn_range(vma, vma->vm_start, - vma->vm_pgoff + (dev->start >> PAGE_SHIFT), - size, vma->vm_page_prot)) - return -EAGAIN; - - return 0; -} - static const struct file_operations memtrace_fops = { .llseek = default_llseek, .read = memtrace_read, - .mmap = memtrace_mmap, .open = simple_open, }; -- cgit From c5828150067c47a97f30e690a472e0548d3ac97d Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Thu, 28 Jun 2018 12:05:00 +0200 Subject: Revert "cxl: Add kernel API to allow a context to operate with relocate disabled" Remove abandonned capi support for the Mellanox CX4. The symbol 'cxl_set_translation_mode' is never called, so ctx->real_mode is always false. This reverts commit 7a0d85d313c2066712e530e668bc02bb741a685c. Signed-off-by: Alastair D'Silva Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- drivers/misc/cxl/api.c | 19 ------------------- drivers/misc/cxl/cxl.h | 1 - drivers/misc/cxl/guest.c | 3 --- drivers/misc/cxl/native.c | 3 ++- include/misc/cxl.h | 8 -------- 5 files changed, 2 insertions(+), 32 deletions(-) diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 753b1a698fc4..21d620e29fea 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -324,7 +324,6 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed, if (task) { ctx->pid = get_task_pid(task, PIDTYPE_PID); kernel = false; - ctx->real_mode = false; /* acquire a reference to the task's mm */ ctx->mm = get_task_mm(current); @@ -388,24 +387,6 @@ void cxl_set_master(struct cxl_context *ctx) } EXPORT_SYMBOL_GPL(cxl_set_master); -int cxl_set_translation_mode(struct cxl_context *ctx, bool real_mode) -{ - if (ctx->status == STARTED) { - /* - * We could potentially update the PE and issue an update LLCMD - * to support this, but it doesn't seem to have a good use case - * since it's trivial to just create a second kernel context - * with different translation modes, so until someone convinces - * me otherwise: - */ - return -EBUSY; - } - - ctx->real_mode = real_mode; - return 0; -} -EXPORT_SYMBOL_GPL(cxl_set_translation_mode); - /* wrappers around afu_* file ops which are EXPORTED */ int cxl_fd_open(struct inode *inode, struct file *file) { diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 918d4fb742d1..af8794719956 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -613,7 +613,6 @@ struct cxl_context { bool pe_inserted; bool master; bool kernel; - bool real_mode; bool pending_irq; bool pending_fault; bool pending_afu_err; diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c index 4644f16606a3..f5dc740fcd13 100644 --- a/drivers/misc/cxl/guest.c +++ b/drivers/misc/cxl/guest.c @@ -623,9 +623,6 @@ static int guest_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u { pr_devel("in %s\n", __func__); - if (ctx->real_mode) - return -EPERM; - ctx->kernel = kernel; if (ctx->afu->current_mode == CXL_MODE_DIRECTED) return attach_afu_directed(ctx, wed, amr); diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 98f867fcef24..c9d5d82dce8e 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -605,6 +605,7 @@ u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9) sr |= CXL_PSL_SR_An_MP; if (mfspr(SPRN_LPCR) & LPCR_TC) sr |= CXL_PSL_SR_An_TC; + if (kernel) { if (!real_mode) sr |= CXL_PSL_SR_An_R; @@ -629,7 +630,7 @@ u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9) static u64 calculate_sr(struct cxl_context *ctx) { - return cxl_calculate_sr(ctx->master, ctx->kernel, ctx->real_mode, + return cxl_calculate_sr(ctx->master, ctx->kernel, false, cxl_is_power9()); } diff --git a/include/misc/cxl.h b/include/misc/cxl.h index b712be544f8c..82cc6ffafe2d 100644 --- a/include/misc/cxl.h +++ b/include/misc/cxl.h @@ -173,14 +173,6 @@ int cxl_afu_reset(struct cxl_context *ctx); */ void cxl_set_master(struct cxl_context *ctx); -/* - * Sets the context to use real mode memory accesses to operate with - * translation disabled. Note that this only makes sense for kernel contexts - * under bare metal, and will not work with virtualisation. May only be - * performed on stopped contexts. - */ -int cxl_set_translation_mode(struct cxl_context *ctx, bool real_mode); - /* * Map and unmap the AFU Problem Space area. The amount and location mapped * depends on if this context is a master or slave. -- cgit From 0cfd7335d1ebea42cf113fd22452f6a10d3960fe Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Thu, 28 Jun 2018 12:05:01 +0200 Subject: Revert "cxl: Add support for interrupts on the Mellanox CX4" Remove abandonned capi support for the Mellanox CX4. This reverts commit a2f67d5ee8d950caaa7a6144cf0bfb256500b73e. Signed-off-by: Alastair D'Silva Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-cxl.c | 84 ------------------------------- arch/powerpc/platforms/powernv/pci-ioda.c | 4 -- arch/powerpc/platforms/powernv/pci.h | 2 - drivers/misc/cxl/api.c | 71 -------------------------- drivers/misc/cxl/base.c | 31 ------------ drivers/misc/cxl/cxl.h | 4 -- drivers/misc/cxl/main.c | 2 - include/misc/cxl-base.h | 4 -- 8 files changed, 202 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c index cee003de63af..c447b7f03c09 100644 --- a/arch/powerpc/platforms/powernv/pci-cxl.c +++ b/arch/powerpc/platforms/powernv/pci-cxl.c @@ -8,7 +8,6 @@ */ #include -#include #include #include #include @@ -292,86 +291,3 @@ void pnv_cxl_disable_device(struct pci_dev *dev) cxl_pci_disable_device(dev); cxl_afu_put(afu); } - -/* - * This is a special version of pnv_setup_msi_irqs for cards in cxl mode. This - * function handles setting up the IVTE entries for the XSL to use. - * - * We are currently not filling out the MSIX table, since the only currently - * supported adapter (CX4) uses a custom MSIX table format in cxl mode and it - * is up to their driver to fill that out. In the future we may fill out the - * MSIX table (and change the IVTE entries to be an index to the MSIX table) - * for adapters implementing the Full MSI-X mode described in the CAIA. - */ -int pnv_cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) -{ - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; - struct msi_desc *entry; - struct cxl_context *ctx = NULL; - unsigned int virq; - int hwirq; - int afu_irq = 0; - int rc; - - if (WARN_ON(!phb) || !phb->msi_bmp.bitmap) - return -ENODEV; - - if (pdev->no_64bit_msi && !phb->msi32_support) - return -ENODEV; - - rc = cxl_cx4_setup_msi_irqs(pdev, nvec, type); - if (rc) - return rc; - - for_each_pci_msi_entry(entry, pdev) { - if (!entry->msi_attrib.is_64 && !phb->msi32_support) { - pr_warn("%s: Supports only 64-bit MSIs\n", - pci_name(pdev)); - return -ENXIO; - } - - hwirq = cxl_next_msi_hwirq(pdev, &ctx, &afu_irq); - if (WARN_ON(hwirq <= 0)) - return (hwirq ? hwirq : -ENOMEM); - - virq = irq_create_mapping(NULL, hwirq); - if (!virq) { - pr_warn("%s: Failed to map cxl mode MSI to linux irq\n", - pci_name(pdev)); - return -ENOMEM; - } - - rc = pnv_cxl_ioda_msi_setup(pdev, hwirq, virq); - if (rc) { - pr_warn("%s: Failed to setup cxl mode MSI\n", pci_name(pdev)); - irq_dispose_mapping(virq); - return rc; - } - - irq_set_msi_desc(virq, entry); - } - - return 0; -} - -void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) -{ - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; - struct msi_desc *entry; - irq_hw_number_t hwirq; - - if (WARN_ON(!phb)) - return; - - for_each_pci_msi_entry(entry, pdev) { - if (!entry->irq) - continue; - hwirq = virq_to_hw(entry->irq); - irq_set_msi_desc(entry->irq, NULL); - irq_dispose_mapping(entry->irq); - } - - cxl_cx4_teardown_msi_irqs(pdev); -} diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index ab678177d36e..5b819c55868d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3847,10 +3847,6 @@ static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = { const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops = { .dma_dev_setup = pnv_pci_dma_dev_setup, .dma_bus_setup = pnv_pci_dma_bus_setup, -#ifdef CONFIG_PCI_MSI - .setup_msi_irqs = pnv_cxl_cx4_setup_msi_irqs, - .teardown_msi_irqs = pnv_cxl_cx4_teardown_msi_irqs, -#endif .enable_device_hook = pnv_cxl_enable_device_hook, .disable_device = pnv_cxl_disable_device, .release_device = pnv_pci_release_device, diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index eada4b6068cb..ba41913c7e21 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -265,8 +265,6 @@ extern int pnv_npu2_init(struct pnv_phb *phb); /* cxl functions */ extern bool pnv_cxl_enable_device_hook(struct pci_dev *dev); extern void pnv_cxl_disable_device(struct pci_dev *dev); -extern int pnv_cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); -extern void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev); /* phb ops (cxl switches these when enabling the kernel api on the phb) */ diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 21d620e29fea..2e5862b7a074 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -595,73 +594,3 @@ int cxl_get_max_irqs_per_process(struct pci_dev *dev) return afu->irqs_max; } EXPORT_SYMBOL_GPL(cxl_get_max_irqs_per_process); - -/* - * This is a special interrupt allocation routine called from the PHB's MSI - * setup function. When capi interrupts are allocated in this manner they must - * still be associated with a running context, but since the MSI APIs have no - * way to specify this we use the default context associated with the device. - * - * The Mellanox CX4 has a hardware limitation that restricts the maximum AFU - * interrupt number, so in order to overcome this their driver informs us of - * the restriction by setting the maximum interrupts per context, and we - * allocate additional contexts as necessary so that we can keep the AFU - * interrupt number within the supported range. - */ -int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) -{ - struct cxl_context *ctx, *new_ctx, *default_ctx; - int remaining; - int rc; - - ctx = default_ctx = cxl_get_context(pdev); - if (WARN_ON(!default_ctx)) - return -ENODEV; - - remaining = nvec; - while (remaining > 0) { - rc = cxl_allocate_afu_irqs(ctx, min(remaining, ctx->afu->irqs_max)); - if (rc) { - pr_warn("%s: Failed to find enough free MSIs\n", pci_name(pdev)); - return rc; - } - remaining -= ctx->afu->irqs_max; - - if (ctx != default_ctx && default_ctx->status == STARTED) { - WARN_ON(cxl_start_context(ctx, - be64_to_cpu(default_ctx->elem->common.wed), - NULL)); - } - - if (remaining > 0) { - new_ctx = cxl_dev_context_init(pdev); - if (IS_ERR(new_ctx)) { - pr_warn("%s: Failed to allocate enough contexts for MSIs\n", pci_name(pdev)); - return -ENOSPC; - } - list_add(&new_ctx->extra_irq_contexts, &ctx->extra_irq_contexts); - ctx = new_ctx; - } - } - - return 0; -} -/* Exported via cxl_base */ - -void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) -{ - struct cxl_context *ctx, *pos, *tmp; - - ctx = cxl_get_context(pdev); - if (WARN_ON(!ctx)) - return; - - cxl_free_afu_irqs(ctx); - list_for_each_entry_safe(pos, tmp, &ctx->extra_irq_contexts, extra_irq_contexts) { - cxl_stop_context(pos); - cxl_free_afu_irqs(pos); - list_del(&pos->extra_irq_contexts); - cxl_release_context(pos); - } -} -/* Exported via cxl_base */ diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c index cd54ce6f6230..fe90f895bb10 100644 --- a/drivers/misc/cxl/base.c +++ b/drivers/misc/cxl/base.c @@ -158,37 +158,6 @@ int cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_ } EXPORT_SYMBOL_GPL(cxl_next_msi_hwirq); -int cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) -{ - int ret; - struct cxl_calls *calls; - - calls = cxl_calls_get(); - if (!calls) - return false; - - ret = calls->cxl_cx4_setup_msi_irqs(pdev, nvec, type); - - cxl_calls_put(calls); - - return ret; -} -EXPORT_SYMBOL_GPL(cxl_cx4_setup_msi_irqs); - -void cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) -{ - struct cxl_calls *calls; - - calls = cxl_calls_get(); - if (!calls) - return; - - calls->cxl_cx4_teardown_msi_irqs(pdev); - - cxl_calls_put(calls); -} -EXPORT_SYMBOL_GPL(cxl_cx4_teardown_msi_irqs); - static int __init cxl_base_init(void) { struct device_node *np; diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index af8794719956..9688fe8b4d80 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -879,16 +879,12 @@ ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu); void _cxl_pci_disable_device(struct pci_dev *dev); int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); -int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); -void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev); struct cxl_calls { void (*cxl_slbia)(struct mm_struct *mm); bool (*cxl_pci_associate_default_context)(struct pci_dev *dev, struct cxl_afu *afu); void (*cxl_pci_disable_device)(struct pci_dev *dev); int (*cxl_next_msi_hwirq)(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); - int (*cxl_cx4_setup_msi_irqs)(struct pci_dev *pdev, int nvec, int type); - void (*cxl_cx4_teardown_msi_irqs)(struct pci_dev *pdev); struct module *owner; }; diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index c1ba0d42cbc8..59a904efd104 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -107,8 +107,6 @@ static struct cxl_calls cxl_calls = { .cxl_pci_associate_default_context = _cxl_pci_associate_default_context, .cxl_pci_disable_device = _cxl_pci_disable_device, .cxl_next_msi_hwirq = _cxl_next_msi_hwirq, - .cxl_cx4_setup_msi_irqs = _cxl_cx4_setup_msi_irqs, - .cxl_cx4_teardown_msi_irqs = _cxl_cx4_teardown_msi_irqs, .owner = THIS_MODULE, }; diff --git a/include/misc/cxl-base.h b/include/misc/cxl-base.h index b2ebc91fe09a..bb7e629ae492 100644 --- a/include/misc/cxl-base.h +++ b/include/misc/cxl-base.h @@ -43,8 +43,6 @@ void cxl_afu_put(struct cxl_afu *afu); void cxl_slbia(struct mm_struct *mm); bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu); void cxl_pci_disable_device(struct pci_dev *dev); -int cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); -void cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev); #else /* CONFIG_CXL_BASE */ @@ -54,8 +52,6 @@ static inline void cxl_afu_put(struct cxl_afu *afu) {} static inline void cxl_slbia(struct mm_struct *mm) {} static inline bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu) { return false; } static inline void cxl_pci_disable_device(struct pci_dev *dev) {} -static inline int cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { return -ENODEV; } -static inline void cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) {} #endif /* CONFIG_CXL_BASE */ -- cgit From 17d29039388807305ab02a4d6eae7cbe09f81f90 Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Thu, 28 Jun 2018 12:05:02 +0200 Subject: Revert "cxl: Add preliminary workaround for CX4 interrupt limitation" Remove abandonned capi support for the Mellanox CX4. This reverts commit cbce0917e2e47d4bf5aa3b5fd6b1247f33e1a126. Signed-off-by: Alastair D'Silva Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- drivers/misc/cxl/api.c | 15 --------------- drivers/misc/cxl/base.c | 17 ----------------- drivers/misc/cxl/context.c | 1 - drivers/misc/cxl/cxl.h | 10 ---------- drivers/misc/cxl/main.c | 1 - include/misc/cxl.h | 20 -------------------- 6 files changed, 64 deletions(-) diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 2e5862b7a074..34ba67bc41bd 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -181,21 +181,6 @@ static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num) return 0; } -int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq) -{ - if (*ctx == NULL || *afu_irq == 0) { - *afu_irq = 1; - *ctx = cxl_get_context(pdev); - } else { - (*afu_irq)++; - if (*afu_irq > cxl_get_max_irqs_per_process(pdev)) { - *ctx = list_next_entry(*ctx, extra_irq_contexts); - *afu_irq = 1; - } - } - return cxl_find_afu_irq(*ctx, *afu_irq); -} -/* Exported via cxl_base */ int cxl_set_priv(struct cxl_context *ctx, void *priv) { diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c index fe90f895bb10..e1e80cb99ad9 100644 --- a/drivers/misc/cxl/base.c +++ b/drivers/misc/cxl/base.c @@ -141,23 +141,6 @@ void cxl_pci_disable_device(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(cxl_pci_disable_device); -int cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq) -{ - int ret; - struct cxl_calls *calls; - - calls = cxl_calls_get(); - if (!calls) - return -EBUSY; - - ret = calls->cxl_next_msi_hwirq(pdev, ctx, afu_irq); - - cxl_calls_put(calls); - - return ret; -} -EXPORT_SYMBOL_GPL(cxl_next_msi_hwirq); - static int __init cxl_base_init(void) { struct device_node *np; diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index c6ec872800a2..0355d42d367f 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -74,7 +74,6 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master) ctx->pending_afu_err = false; INIT_LIST_HEAD(&ctx->irq_names); - INIT_LIST_HEAD(&ctx->extra_irq_contexts); /* * When we have to destroy all contexts in cxl_context_detach_all() we diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 9688fe8b4d80..d95c2c98f2ab 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -623,14 +623,6 @@ struct cxl_context { struct rcu_head rcu; - /* - * Only used when more interrupts are allocated via - * pci_enable_msix_range than are supported in the default context, to - * use additional contexts to overcome the limitation. i.e. Mellanox - * CX4 only: - */ - struct list_head extra_irq_contexts; - struct mm_struct *mm; u16 tidr; @@ -878,13 +870,11 @@ ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, /* Internal functions wrapped in cxl_base to allow PHB to call them */ bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu); void _cxl_pci_disable_device(struct pci_dev *dev); -int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); struct cxl_calls { void (*cxl_slbia)(struct mm_struct *mm); bool (*cxl_pci_associate_default_context)(struct pci_dev *dev, struct cxl_afu *afu); void (*cxl_pci_disable_device)(struct pci_dev *dev); - int (*cxl_next_msi_hwirq)(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); struct module *owner; }; diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 59a904efd104..a7e83624034b 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -106,7 +106,6 @@ static struct cxl_calls cxl_calls = { .cxl_slbia = cxl_slbia_core, .cxl_pci_associate_default_context = _cxl_pci_associate_default_context, .cxl_pci_disable_device = _cxl_pci_disable_device, - .cxl_next_msi_hwirq = _cxl_next_msi_hwirq, .owner = THIS_MODULE, }; diff --git a/include/misc/cxl.h b/include/misc/cxl.h index 82cc6ffafe2d..6a3711a2e217 100644 --- a/include/misc/cxl.h +++ b/include/misc/cxl.h @@ -183,26 +183,6 @@ void cxl_psa_unmap(void __iomem *addr); /* Get the process element for this context */ int cxl_process_element(struct cxl_context *ctx); -/* - * Limit the number of interrupts that a single context can allocate via - * cxl_start_work. If using the api with a real phb, this may be used to - * request that additional default contexts be created when allocating - * interrupts via pci_enable_msix_range. These will be set to the same running - * state as the default context, and if that is running it will reuse the - * parameters previously passed to cxl_start_context for the default context. - */ -int cxl_set_max_irqs_per_process(struct pci_dev *dev, int irqs); -int cxl_get_max_irqs_per_process(struct pci_dev *dev); - -/* - * Use to simultaneously iterate over hardware interrupt numbers, contexts and - * afu interrupt numbers allocated for the device via pci_enable_msix_range and - * is a useful convenience function when working with hardware that has - * limitations on the number of interrupts per process. *ctx and *afu_irq - * should be NULL and 0 to start the iteration. - */ -int cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); - /* * These calls allow drivers to create their own file descriptors and make them * identical to the cxl file descriptor user API. An example use case: -- cgit From 82c6ae67fbbef68c80c2a39d559dd649d7530cd6 Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Thu, 28 Jun 2018 12:05:03 +0200 Subject: Revert "cxl: Add kernel APIs to get & set the max irqs per context" Remove abandonned capi support for the Mellanox CX4. This reverts commit 79384e4b71240abf50c375eea56060b0d79c242a. Signed-off-by: Alastair D'Silva Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- drivers/misc/cxl/api.c | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 34ba67bc41bd..a535c1e6aa92 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -552,30 +552,3 @@ ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count) return cxl_ops->read_adapter_vpd(afu->adapter, buf, count); } EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd); - -int cxl_set_max_irqs_per_process(struct pci_dev *dev, int irqs) -{ - struct cxl_afu *afu = cxl_pci_to_afu(dev); - if (IS_ERR(afu)) - return -ENODEV; - - if (irqs > afu->adapter->user_irqs) - return -EINVAL; - - /* Limit user_irqs to prevent the user increasing this via sysfs */ - afu->adapter->user_irqs = irqs; - afu->irqs_max = irqs; - - return 0; -} -EXPORT_SYMBOL_GPL(cxl_set_max_irqs_per_process); - -int cxl_get_max_irqs_per_process(struct pci_dev *dev) -{ - struct cxl_afu *afu = cxl_pci_to_afu(dev); - if (IS_ERR(afu)) - return -ENODEV; - - return afu->irqs_max; -} -EXPORT_SYMBOL_GPL(cxl_get_max_irqs_per_process); -- cgit From 29fea8aa21a69418386e3e08fa546a0ba9bee96d Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Thu, 28 Jun 2018 12:05:04 +0200 Subject: Revert "cxl: Add cxl_check_and_switch_mode() API to switch bi-modal cards" Remove abandonned capi support for the Mellanox CX4. This reverts commit b0b5e5918ad1babfd1d43d98c7281926a7b57b9f. Signed-off-by: Alastair D'Silva Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- drivers/misc/cxl/Kconfig | 8 -- drivers/misc/cxl/pci.c | 236 ++++------------------------------------------- include/misc/cxl.h | 25 ----- 3 files changed, 18 insertions(+), 251 deletions(-) diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig index 93397cb05b15..3ce933707828 100644 --- a/drivers/misc/cxl/Kconfig +++ b/drivers/misc/cxl/Kconfig @@ -33,11 +33,3 @@ config CXL CAPI adapters are found in POWER8 based systems. If unsure, say N. - -config CXL_BIMODAL - bool "Support for bi-modal CAPI cards" - depends on HOTPLUG_PCI_POWERNV = y && CXL || HOTPLUG_PCI_POWERNV = m && CXL = m - default y - help - Select this option to enable support for bi-modal CAPI cards, such as - the Mellanox CX-4. diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 429d6de1dde7..9c5a21fee835 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -55,8 +55,6 @@ pci_read_config_byte(dev, vsec + 0xa, dest) #define CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val) \ pci_write_config_byte(dev, vsec + 0xa, val) -#define CXL_WRITE_VSEC_MODE_CONTROL_BUS(bus, devfn, vsec, val) \ - pci_bus_write_config_byte(bus, devfn, vsec + 0xa, val) #define CXL_VSEC_PROTOCOL_MASK 0xe0 #define CXL_VSEC_PROTOCOL_1024TB 0x80 #define CXL_VSEC_PROTOCOL_512TB 0x40 @@ -800,234 +798,36 @@ static int setup_cxl_bars(struct pci_dev *dev) return 0; } -#ifdef CONFIG_CXL_BIMODAL - -struct cxl_switch_work { - struct pci_dev *dev; - struct work_struct work; - int vsec; - int mode; -}; - -static void switch_card_to_cxl(struct work_struct *work) +/* pciex node: ibm,opal-m64-window = <0x3d058 0x0 0x3d058 0x0 0x8 0x0>; */ +static int switch_card_to_cxl(struct pci_dev *dev) { - struct cxl_switch_work *switch_work = - container_of(work, struct cxl_switch_work, work); - struct pci_dev *dev = switch_work->dev; - struct pci_bus *bus = dev->bus; - struct pci_controller *hose = pci_bus_to_host(bus); - struct pci_dev *bridge; - struct pnv_php_slot *php_slot; - unsigned int devfn; + int vsec; u8 val; int rc; - dev_info(&bus->dev, "cxl: Preparing for mode switch...\n"); - bridge = list_first_entry_or_null(&hose->bus->devices, struct pci_dev, - bus_list); - if (!bridge) { - dev_WARN(&bus->dev, "cxl: Couldn't find root port!\n"); - goto err_dev_put; - } + dev_info(&dev->dev, "switch card to CXL\n"); - php_slot = pnv_php_find_slot(pci_device_to_OF_node(bridge)); - if (!php_slot) { - dev_err(&bus->dev, "cxl: Failed to find slot hotplug " - "information. You may need to upgrade " - "skiboot. Aborting.\n"); - goto err_dev_put; - } - - rc = CXL_READ_VSEC_MODE_CONTROL(dev, switch_work->vsec, &val); - if (rc) { - dev_err(&bus->dev, "cxl: Failed to read CAPI mode control: %i\n", rc); - goto err_dev_put; - } - devfn = dev->devfn; - - /* Release the reference obtained in cxl_check_and_switch_mode() */ - pci_dev_put(dev); - - dev_dbg(&bus->dev, "cxl: Removing PCI devices from kernel\n"); - pci_lock_rescan_remove(); - pci_hp_remove_devices(bridge->subordinate); - pci_unlock_rescan_remove(); - - /* Switch the CXL protocol on the card */ - if (switch_work->mode == CXL_BIMODE_CXL) { - dev_info(&bus->dev, "cxl: Switching card to CXL mode\n"); - val &= ~CXL_VSEC_PROTOCOL_MASK; - val |= CXL_VSEC_PROTOCOL_256TB | CXL_VSEC_PROTOCOL_ENABLE; - rc = pnv_cxl_enable_phb_kernel_api(hose, true); - if (rc) { - dev_err(&bus->dev, "cxl: Failed to enable kernel API" - " on real PHB, aborting\n"); - goto err_free_work; - } - } else { - dev_WARN(&bus->dev, "cxl: Switching card to PCI mode not supported!\n"); - goto err_free_work; - } - - rc = CXL_WRITE_VSEC_MODE_CONTROL_BUS(bus, devfn, switch_work->vsec, val); - if (rc) { - dev_err(&bus->dev, "cxl: Failed to configure CXL protocol: %i\n", rc); - goto err_free_work; - } - - /* - * The CAIA spec (v1.1, Section 10.6 Bi-modal Device Support) states - * we must wait 100ms after this mode switch before touching PCIe config - * space. - */ - msleep(100); - - /* - * Hot reset to cause the card to come back in cxl mode. A - * OPAL_RESET_PCI_LINK would be sufficient, but currently lacks support - * in skiboot, so we use a hot reset instead. - * - * We call pci_set_pcie_reset_state() on the bridge, as a CAPI card is - * guaranteed to sit directly under the root port, and setting the reset - * state on a device directly under the root port is equivalent to doing - * it on the root port iself. - */ - dev_info(&bus->dev, "cxl: Configuration write complete, resetting card\n"); - pci_set_pcie_reset_state(bridge, pcie_hot_reset); - pci_set_pcie_reset_state(bridge, pcie_deassert_reset); - - dev_dbg(&bus->dev, "cxl: Offlining slot\n"); - rc = pnv_php_set_slot_power_state(&php_slot->slot, OPAL_PCI_SLOT_OFFLINE); - if (rc) { - dev_err(&bus->dev, "cxl: OPAL offlining call failed: %i\n", rc); - goto err_free_work; - } - - dev_dbg(&bus->dev, "cxl: Onlining and probing slot\n"); - rc = pnv_php_set_slot_power_state(&php_slot->slot, OPAL_PCI_SLOT_ONLINE); - if (rc) { - dev_err(&bus->dev, "cxl: OPAL onlining call failed: %i\n", rc); - goto err_free_work; - } - - pci_lock_rescan_remove(); - pci_hp_add_devices(bridge->subordinate); - pci_unlock_rescan_remove(); - - dev_info(&bus->dev, "cxl: CAPI mode switch completed\n"); - kfree(switch_work); - return; - -err_dev_put: - /* Release the reference obtained in cxl_check_and_switch_mode() */ - pci_dev_put(dev); -err_free_work: - kfree(switch_work); -} - -int cxl_check_and_switch_mode(struct pci_dev *dev, int mode, int vsec) -{ - struct cxl_switch_work *work; - u8 val; - int rc; - - if (!cpu_has_feature(CPU_FTR_HVMODE)) + if (!(vsec = find_cxl_vsec(dev))) { + dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n"); return -ENODEV; - - if (!vsec) { - vsec = find_cxl_vsec(dev); - if (!vsec) { - dev_info(&dev->dev, "CXL VSEC not found\n"); - return -ENODEV; - } } - rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val); - if (rc) { - dev_err(&dev->dev, "Failed to read current mode control: %i", rc); + if ((rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val))) { + dev_err(&dev->dev, "failed to read current mode control: %i", rc); return rc; } - - if (mode == CXL_BIMODE_PCI) { - if (!(val & CXL_VSEC_PROTOCOL_ENABLE)) { - dev_info(&dev->dev, "Card is already in PCI mode\n"); - return 0; - } - /* - * TODO: Before it's safe to switch the card back to PCI mode - * we need to disable the CAPP and make sure any cachelines the - * card holds have been flushed out. Needs skiboot support. - */ - dev_WARN(&dev->dev, "CXL mode switch to PCI unsupported!\n"); - return -EIO; - } - - if (val & CXL_VSEC_PROTOCOL_ENABLE) { - dev_info(&dev->dev, "Card is already in CXL mode\n"); - return 0; + val &= ~CXL_VSEC_PROTOCOL_MASK; + val |= CXL_VSEC_PROTOCOL_256TB | CXL_VSEC_PROTOCOL_ENABLE; + if ((rc = CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val))) { + dev_err(&dev->dev, "failed to enable CXL protocol: %i", rc); + return rc; } - - dev_info(&dev->dev, "Card is in PCI mode, scheduling kernel thread " - "to switch to CXL mode\n"); - - work = kmalloc(sizeof(struct cxl_switch_work), GFP_KERNEL); - if (!work) - return -ENOMEM; - - pci_dev_get(dev); - work->dev = dev; - work->vsec = vsec; - work->mode = mode; - INIT_WORK(&work->work, switch_card_to_cxl); - - schedule_work(&work->work); - /* - * We return a failure now to abort the driver init. Once the - * link has been cycled and the card is in cxl mode we will - * come back (possibly using the generic cxl driver), but - * return success as the card should then be in cxl mode. - * - * TODO: What if the card comes back in PCI mode even after - * the switch? Don't want to spin endlessly. + * The CAIA spec (v0.12 11.6 Bi-modal Device Support) states + * we must wait 100ms after this mode switch before touching + * PCIe config space. */ - return -EBUSY; -} -EXPORT_SYMBOL_GPL(cxl_check_and_switch_mode); - -#endif /* CONFIG_CXL_BIMODAL */ - -static int setup_cxl_protocol_area(struct pci_dev *dev) -{ - u8 val; - int rc; - int vsec = find_cxl_vsec(dev); - - if (!vsec) { - dev_info(&dev->dev, "CXL VSEC not found\n"); - return -ENODEV; - } - - rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val); - if (rc) { - dev_err(&dev->dev, "Failed to read current mode control: %i\n", rc); - return rc; - } - - if (!(val & CXL_VSEC_PROTOCOL_ENABLE)) { - dev_err(&dev->dev, "Card not in CAPI mode!\n"); - return -EIO; - } - - if ((val & CXL_VSEC_PROTOCOL_MASK) != CXL_VSEC_PROTOCOL_256TB) { - val &= ~CXL_VSEC_PROTOCOL_MASK; - val |= CXL_VSEC_PROTOCOL_256TB; - rc = CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val); - if (rc) { - dev_err(&dev->dev, "Failed to set CXL protocol area: %i\n", rc); - return rc; - } - } + msleep(100); return 0; } @@ -1724,7 +1524,7 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) if ((rc = setup_cxl_bars(dev))) return rc; - if ((rc = setup_cxl_protocol_area(dev))) + if ((rc = switch_card_to_cxl(dev))) return rc; if ((rc = cxl_update_image_control(adapter))) diff --git a/include/misc/cxl.h b/include/misc/cxl.h index 6a3711a2e217..74da2e440763 100644 --- a/include/misc/cxl.h +++ b/include/misc/cxl.h @@ -39,31 +39,6 @@ bool cxl_slot_is_supported(struct pci_dev *dev, int flags); -#define CXL_BIMODE_CXL 1 -#define CXL_BIMODE_PCI 2 - -/* - * Check the mode that the given bi-modal CXL adapter is currently in and - * change it if necessary. This does not apply to AFU drivers. - * - * If the mode matches the requested mode this function will return 0 - if the - * driver was expecting the generic CXL driver to have bound to the adapter and - * it gets this return value it should fail the probe function to give the CXL - * driver a chance to probe it. - * - * If the mode does not match it will start a background task to unplug the - * device from Linux and switch its mode, and will return -EBUSY. At this - * point the calling driver should make sure it has released the device and - * fail its probe function. - * - * The offset of the CXL VSEC can be provided to this function. If 0 is passed, - * this function will search for a CXL VSEC with ID 0x1280 and return -ENODEV - * if it is not found. - */ -#ifdef CONFIG_CXL_BIMODAL -int cxl_check_and_switch_mode(struct pci_dev *dev, int mode, int vsec); -#endif - /* Get the AFU associated with a pci_dev */ struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev); -- cgit From c8d43cf08ab8c0b8829e67f7711bc72a3be6503f Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Thu, 28 Jun 2018 12:05:05 +0200 Subject: Revert "cxl: Add support for using the kernel API with a real PHB" Remove abandonned capi support for the Mellanox CX4. This reverts commit 317f5ef1b363417b6f1e93b90dfd2ffd6be6e867. Signed-off-by: Alastair D'Silva Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- drivers/misc/cxl/pci.c | 3 --- drivers/misc/cxl/vphb.c | 16 ++-------------- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 9c5a21fee835..193ff22f610b 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1886,9 +1886,6 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) dev_err(&dev->dev, "AFU %i failed to start: %i\n", slice, rc); } - if (pnv_pci_on_cxl_phb(dev) && adapter->slices >= 1) - pnv_cxl_phb_set_peer_afu(dev, adapter->afu[0]); - return 0; } diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index 7fd0bdc1436a..1a99c9c7a6fb 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -9,7 +9,6 @@ #include #include -#include #include "cxl.h" static int cxl_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) @@ -284,18 +283,13 @@ void cxl_pci_vphb_remove(struct cxl_afu *afu) */ } -static bool _cxl_pci_is_vphb_device(struct pci_controller *phb) -{ - return (phb->ops == &cxl_pcie_pci_ops); -} - bool cxl_pci_is_vphb_device(struct pci_dev *dev) { struct pci_controller *phb; phb = pci_bus_to_host(dev->bus); - return _cxl_pci_is_vphb_device(phb); + return (phb->ops == &cxl_pcie_pci_ops); } struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev) @@ -304,13 +298,7 @@ struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev) phb = pci_bus_to_host(dev->bus); - if (_cxl_pci_is_vphb_device(phb)) - return (struct cxl_afu *)phb->private_data; - - if (pnv_pci_on_cxl_phb(dev)) - return pnv_cxl_phb_to_afu(phb); - - return ERR_PTR(-ENODEV); + return (struct cxl_afu *)phb->private_data; } EXPORT_SYMBOL_GPL(cxl_pci_to_afu); -- cgit From 8bf6b91a5125ad9972281666430691d6a282d794 Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Thu, 28 Jun 2018 12:05:06 +0200 Subject: Revert "powerpc/powernv: Add support for the cxl kernel api on the real phb" Remove abandonned capi support for the Mellanox CX4. This reverts commit 4361b03430d685610e5feea3ec7846e8b9ae795f. Signed-off-by: Alastair D'Silva Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pnv-pci.h | 7 -- arch/powerpc/platforms/powernv/pci-cxl.c | 115 ------------------------------ arch/powerpc/platforms/powernv/pci-ioda.c | 18 +---- arch/powerpc/platforms/powernv/pci.h | 13 ---- 4 files changed, 1 insertion(+), 152 deletions(-) diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index d2d8c28db336..7f627e3f4da4 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -50,13 +50,6 @@ int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, struct pci_dev *dev, int num); void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs, struct pci_dev *dev); - -/* Support for the cxl kernel api on the real PHB (instead of vPHB) */ -int pnv_cxl_enable_phb_kernel_api(struct pci_controller *hose, bool enable); -bool pnv_pci_on_cxl_phb(struct pci_dev *dev); -struct cxl_afu *pnv_cxl_phb_to_afu(struct pci_controller *hose); -void pnv_cxl_phb_set_peer_afu(struct pci_dev *dev, struct cxl_afu *afu); - #endif struct pnv_php_slot { diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c index c447b7f03c09..1b18111453d7 100644 --- a/arch/powerpc/platforms/powernv/pci-cxl.c +++ b/arch/powerpc/platforms/powernv/pci-cxl.c @@ -8,10 +8,8 @@ */ #include -#include #include #include -#include #include "pci.h" @@ -178,116 +176,3 @@ static inline int get_cxl_module(void) #else static inline int get_cxl_module(void) { return 0; } #endif - -/* - * Sets flags and switches the controller ops to enable the cxl kernel api. - * Originally the cxl kernel API operated on a virtual PHB, but certain cards - * such as the Mellanox CX4 use a peer model instead and for these cards the - * cxl kernel api will operate on the real PHB. - */ -int pnv_cxl_enable_phb_kernel_api(struct pci_controller *hose, bool enable) -{ - struct pnv_phb *phb = hose->private_data; - int rc; - - if (!enable) { - /* - * Once cxl mode is enabled on the PHB, there is currently no - * known safe method to disable it again, and trying risks a - * checkstop. If we can find a way to safely disable cxl mode - * in the future we can revisit this, but for now the only sane - * thing to do is to refuse to disable cxl mode: - */ - return -EPERM; - } - - /* - * Hold a reference to the cxl module since several PHB operations now - * depend on it, and it would be insane to allow it to be removed so - * long as we are in this mode (and since we can't safely disable this - * mode once enabled...). - */ - rc = get_cxl_module(); - if (rc) - return rc; - - phb->flags |= PNV_PHB_FLAG_CXL; - hose->controller_ops = pnv_cxl_cx4_ioda_controller_ops; - - return 0; -} -EXPORT_SYMBOL_GPL(pnv_cxl_enable_phb_kernel_api); - -bool pnv_pci_on_cxl_phb(struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - - return !!(phb->flags & PNV_PHB_FLAG_CXL); -} -EXPORT_SYMBOL_GPL(pnv_pci_on_cxl_phb); - -struct cxl_afu *pnv_cxl_phb_to_afu(struct pci_controller *hose) -{ - struct pnv_phb *phb = hose->private_data; - - return (struct cxl_afu *)phb->cxl_afu; -} -EXPORT_SYMBOL_GPL(pnv_cxl_phb_to_afu); - -void pnv_cxl_phb_set_peer_afu(struct pci_dev *dev, struct cxl_afu *afu) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - - phb->cxl_afu = afu; -} -EXPORT_SYMBOL_GPL(pnv_cxl_phb_set_peer_afu); - -/* - * In the peer cxl model, the XSL/PSL is physical function 0, and will be used - * by other functions on the device for memory access and interrupts. When the - * other functions are enabled we explicitly take a reference on the cxl - * function since they will use it, and allocate a default context associated - * with that function just like the vPHB model of the cxl kernel API. - */ -bool pnv_cxl_enable_device_hook(struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - struct cxl_afu *afu = phb->cxl_afu; - - if (!pnv_pci_enable_device_hook(dev)) - return false; - - - /* No special handling for the cxl function, which is always PF 0 */ - if (PCI_FUNC(dev->devfn) == 0) - return true; - - if (!afu) { - dev_WARN(&dev->dev, "Attempted to enable function > 0 on CXL PHB without a peer AFU\n"); - return false; - } - - dev_info(&dev->dev, "Enabling function on CXL enabled PHB with peer AFU\n"); - - /* Make sure the peer AFU can't go away while this device is active */ - cxl_afu_get(afu); - - return cxl_pci_associate_default_context(dev, afu); -} - -void pnv_cxl_disable_device(struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - struct cxl_afu *afu = phb->cxl_afu; - - /* No special handling for cxl function: */ - if (PCI_FUNC(dev->devfn) == 0) - return; - - cxl_pci_disable_device(dev); - cxl_afu_put(afu); -} diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 5b819c55868d..94ec58c3f5f2 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3575,7 +3575,7 @@ static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, /* Prevent enabling devices for which we couldn't properly * assign a PE */ -bool pnv_pci_enable_device_hook(struct pci_dev *dev) +static bool pnv_pci_enable_device_hook(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); struct pnv_phb *phb = hose->private_data; @@ -3843,22 +3843,6 @@ static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = { .shutdown = pnv_pci_ioda_shutdown, }; -#ifdef CONFIG_CXL_BASE -const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops = { - .dma_dev_setup = pnv_pci_dma_dev_setup, - .dma_bus_setup = pnv_pci_dma_bus_setup, - .enable_device_hook = pnv_cxl_enable_device_hook, - .disable_device = pnv_cxl_disable_device, - .release_device = pnv_pci_release_device, - .window_alignment = pnv_pci_window_alignment, - .setup_bridge = pnv_pci_setup_bridge, - .reset_secondary_bus = pnv_pci_reset_secondary_bus, - .dma_set_mask = pnv_pci_ioda_dma_set_mask, - .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask, - .shutdown = pnv_pci_ioda_shutdown, -}; -#endif - static void __init pnv_pci_init_ioda_phb(struct device_node *np, u64 hub_id, int ioda_type) { diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index ba41913c7e21..44dfbc37f547 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -88,7 +88,6 @@ struct pnv_ioda_pe { }; #define PNV_PHB_FLAG_EEH (1 << 0) -#define PNV_PHB_FLAG_CXL (1 << 1) /* Real PHB supporting the cxl kernel API */ struct pnv_phb { struct pci_controller *hose; @@ -194,9 +193,6 @@ struct pnv_phb { bool nmmu_flush; } npu; -#ifdef CONFIG_CXL_BASE - struct cxl_afu *cxl_afu; -#endif int p2p_target_count; }; @@ -238,7 +234,6 @@ extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev); extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq); -extern bool pnv_pci_enable_device_hook(struct pci_dev *dev); extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); extern int pnv_eeh_post_init(void); @@ -262,12 +257,4 @@ extern void pnv_npu_take_ownership(struct pnv_ioda_pe *npe); extern void pnv_npu_release_ownership(struct pnv_ioda_pe *npe); extern int pnv_npu2_init(struct pnv_phb *phb); -/* cxl functions */ -extern bool pnv_cxl_enable_device_hook(struct pci_dev *dev); -extern void pnv_cxl_disable_device(struct pci_dev *dev); - - -/* phb ops (cxl switches these when enabling the kernel api on the phb) */ -extern const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops; - #endif /* __POWERNV_PCI_H */ -- cgit From 322dc4af6c95cddc4f9d806197fe6b376cfae350 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Thu, 28 Jun 2018 12:05:07 +0200 Subject: Revert "cxl: Add cxl_slot_is_supported API" Remove abandonned capi support for the Mellanox CX4. This reverts commit 4e56f858bdde5cbfb70f61baddfaa56a8ed851bf. Signed-off-by: Frederic Barrat Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- drivers/misc/cxl/pci.c | 37 ------------------------------------- include/misc/cxl.h | 15 --------------- 2 files changed, 52 deletions(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 193ff22f610b..0ca818396524 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1808,43 +1808,6 @@ int cxl_slot_is_switched(struct pci_dev *dev) return (depth > CXL_MAX_PCIEX_PARENT); } -bool cxl_slot_is_supported(struct pci_dev *dev, int flags) -{ - if (!cpu_has_feature(CPU_FTR_HVMODE)) - return false; - - if ((flags & CXL_SLOT_FLAG_DMA) && (!pvr_version_is(PVR_POWER8NVL))) { - /* - * CAPP DMA mode is technically supported on regular P8, but - * will EEH if the card attempts to access memory < 4GB, which - * we cannot realistically avoid. We might be able to work - * around the issue, but until then return unsupported: - */ - return false; - } - - if (cxl_slot_is_switched(dev)) - return false; - - /* - * XXX: This gets a little tricky on regular P8 (not POWER8NVL) since - * the CAPP can be connected to PHB 0, 1 or 2 on a first come first - * served basis, which is racy to check from here. If we need to - * support this in future we might need to consider having this - * function effectively reserve it ahead of time. - * - * Currently, the only user of this API is the Mellanox CX4, which is - * only supported on P8NVL due to the above mentioned limitation of - * CAPP DMA mode and therefore does not need to worry about this. If the - * issue with CAPP DMA mode is later worked around on P8 we might need - * to revisit this. - */ - - return true; -} -EXPORT_SYMBOL_GPL(cxl_slot_is_supported); - - static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) { struct cxl *adapter; diff --git a/include/misc/cxl.h b/include/misc/cxl.h index 74da2e440763..ea9ff4a1a9ca 100644 --- a/include/misc/cxl.h +++ b/include/misc/cxl.h @@ -24,21 +24,6 @@ * generic PCI API. This API is agnostic to the actual AFU. */ -#define CXL_SLOT_FLAG_DMA 0x1 - -/* - * Checks if the given card is in a cxl capable slot. Pass CXL_SLOT_FLAG_DMA if - * the card requires CAPP DMA mode to also check if the system supports it. - * This is intended to be used by bi-modal devices to determine if they can use - * cxl mode or if they should continue running in PCI mode. - * - * Note that this only checks if the slot is cxl capable - it does not - * currently check if the CAPP is currently available for chips where it can be - * assigned to different PHBs on a first come first serve basis (i.e. P8) - */ -bool cxl_slot_is_supported(struct pci_dev *dev, int flags); - - /* Get the AFU associated with a pci_dev */ struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev); -- cgit From f18a4e1d973bc69a50419eb8918f458ea89c6c3f Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Thu, 28 Jun 2018 12:05:08 +0200 Subject: Revert "cxl: Allow a default context to be associated with an external pci_dev" Remove abandonned capi support for the Mellanox CX4. This reverts commit a19bd79e31769626d288cc016e21a31b6f47bf6f. Signed-off-by: Frederic Barrat Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- drivers/misc/cxl/Makefile | 2 +- drivers/misc/cxl/base.c | 35 ----------------------------------- drivers/misc/cxl/cxl.h | 6 ------ drivers/misc/cxl/main.c | 2 -- drivers/misc/cxl/phb.c | 44 -------------------------------------------- drivers/misc/cxl/vphb.c | 30 +++++++++++++++++++++++++++--- include/misc/cxl-base.h | 6 ------ 7 files changed, 28 insertions(+), 97 deletions(-) delete mode 100644 drivers/misc/cxl/phb.c diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile index 502d41fc9ea5..5eea61b9584f 100644 --- a/drivers/misc/cxl/Makefile +++ b/drivers/misc/cxl/Makefile @@ -4,7 +4,7 @@ ccflags-$(CONFIG_PPC_WERROR) += -Werror cxl-y += main.o file.o irq.o fault.o native.o cxl-y += context.o sysfs.o pci.o trace.o -cxl-y += vphb.o phb.o api.o cxllib.o +cxl-y += vphb.o api.o cxllib.o cxl-$(CONFIG_PPC_PSERIES) += flash.o guest.o of.o hcalls.o cxl-$(CONFIG_DEBUG_FS) += debugfs.o obj-$(CONFIG_CXL) += cxl.o diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c index e1e80cb99ad9..7557835cdfcd 100644 --- a/drivers/misc/cxl/base.c +++ b/drivers/misc/cxl/base.c @@ -106,41 +106,6 @@ int cxl_update_properties(struct device_node *dn, } EXPORT_SYMBOL_GPL(cxl_update_properties); -/* - * API calls into the driver that may be called from the PHB code and must be - * built in. - */ -bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu) -{ - bool ret; - struct cxl_calls *calls; - - calls = cxl_calls_get(); - if (!calls) - return false; - - ret = calls->cxl_pci_associate_default_context(dev, afu); - - cxl_calls_put(calls); - - return ret; -} -EXPORT_SYMBOL_GPL(cxl_pci_associate_default_context); - -void cxl_pci_disable_device(struct pci_dev *dev) -{ - struct cxl_calls *calls; - - calls = cxl_calls_get(); - if (!calls) - return; - - calls->cxl_pci_disable_device(dev); - - cxl_calls_put(calls); -} -EXPORT_SYMBOL_GPL(cxl_pci_disable_device); - static int __init cxl_base_init(void) { struct device_node *np; diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index d95c2c98f2ab..aa453448201d 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -867,15 +867,9 @@ static inline bool cxl_is_power9_dd1(void) ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, loff_t off, size_t count); -/* Internal functions wrapped in cxl_base to allow PHB to call them */ -bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu); -void _cxl_pci_disable_device(struct pci_dev *dev); struct cxl_calls { void (*cxl_slbia)(struct mm_struct *mm); - bool (*cxl_pci_associate_default_context)(struct pci_dev *dev, struct cxl_afu *afu); - void (*cxl_pci_disable_device)(struct pci_dev *dev); - struct module *owner; }; int register_cxl_calls(struct cxl_calls *calls); diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index a7e83624034b..334223b802ee 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -104,8 +104,6 @@ static inline void cxl_slbia_core(struct mm_struct *mm) static struct cxl_calls cxl_calls = { .cxl_slbia = cxl_slbia_core, - .cxl_pci_associate_default_context = _cxl_pci_associate_default_context, - .cxl_pci_disable_device = _cxl_pci_disable_device, .owner = THIS_MODULE, }; diff --git a/drivers/misc/cxl/phb.c b/drivers/misc/cxl/phb.c deleted file mode 100644 index 6ec69ada19f4..000000000000 --- a/drivers/misc/cxl/phb.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright 2014-2016 IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include "cxl.h" - -bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu) -{ - struct cxl_context *ctx; - - /* - * Allocate a context to do cxl things to. This is used for interrupts - * in the peer model using a real phb, and if we eventually do DMA ops - * in the virtual phb, we'll need a default context to attach them to. - */ - ctx = cxl_dev_context_init(dev); - if (IS_ERR(ctx)) - return false; - dev->dev.archdata.cxl_ctx = ctx; - - return (cxl_ops->afu_check_and_enable(afu) == 0); -} -/* exported via cxl_base */ - -void _cxl_pci_disable_device(struct pci_dev *dev) -{ - struct cxl_context *ctx = cxl_get_context(dev); - - if (ctx) { - if (ctx->status == STARTED) { - dev_err(&dev->dev, "Default context started\n"); - return; - } - dev->dev.archdata.cxl_ctx = NULL; - cxl_release_context(ctx); - } -} -/* exported via cxl_base */ diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index 1a99c9c7a6fb..7908633d9204 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -44,6 +44,7 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev) { struct pci_controller *phb; struct cxl_afu *afu; + struct cxl_context *ctx; phb = pci_bus_to_host(dev->bus); afu = (struct cxl_afu *)phb->private_data; @@ -56,7 +57,30 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev) set_dma_ops(&dev->dev, &dma_nommu_ops); set_dma_offset(&dev->dev, PAGE_OFFSET); - return _cxl_pci_associate_default_context(dev, afu); + /* + * Allocate a context to do cxl things too. If we eventually do real + * DMA ops, we'll need a default context to attach them to + */ + ctx = cxl_dev_context_init(dev); + if (IS_ERR(ctx)) + return false; + dev->dev.archdata.cxl_ctx = ctx; + + return (cxl_ops->afu_check_and_enable(afu) == 0); +} + +static void cxl_pci_disable_device(struct pci_dev *dev) +{ + struct cxl_context *ctx = cxl_get_context(dev); + + if (ctx) { + if (ctx->status == STARTED) { + dev_err(&dev->dev, "Default context started\n"); + return; + } + dev->dev.archdata.cxl_ctx = NULL; + cxl_release_context(ctx); + } } static resource_size_t cxl_pci_window_alignment(struct pci_bus *bus, @@ -190,8 +214,8 @@ static struct pci_controller_ops cxl_pci_controller_ops = { .probe_mode = cxl_pci_probe_mode, .enable_device_hook = cxl_pci_enable_device_hook, - .disable_device = _cxl_pci_disable_device, - .release_device = _cxl_pci_disable_device, + .disable_device = cxl_pci_disable_device, + .release_device = cxl_pci_disable_device, .window_alignment = cxl_pci_window_alignment, .reset_secondary_bus = cxl_pci_reset_secondary_bus, .setup_msi_irqs = cxl_setup_msi_irqs, diff --git a/include/misc/cxl-base.h b/include/misc/cxl-base.h index bb7e629ae492..f53808fa638a 100644 --- a/include/misc/cxl-base.h +++ b/include/misc/cxl-base.h @@ -10,8 +10,6 @@ #ifndef _MISC_CXL_BASE_H #define _MISC_CXL_BASE_H -#include - #ifdef CONFIG_CXL_BASE #define CXL_IRQ_RANGES 4 @@ -41,8 +39,6 @@ static inline void cxl_ctx_put(void) struct cxl_afu *cxl_afu_get(struct cxl_afu *afu); void cxl_afu_put(struct cxl_afu *afu); void cxl_slbia(struct mm_struct *mm); -bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu); -void cxl_pci_disable_device(struct pci_dev *dev); #else /* CONFIG_CXL_BASE */ @@ -50,8 +46,6 @@ static inline bool cxl_ctx_in_use(void) { return false; } static inline struct cxl_afu *cxl_afu_get(struct cxl_afu *afu) { return NULL; } static inline void cxl_afu_put(struct cxl_afu *afu) {} static inline void cxl_slbia(struct mm_struct *mm) {} -static inline bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu) { return false; } -static inline void cxl_pci_disable_device(struct pci_dev *dev) {} #endif /* CONFIG_CXL_BASE */ -- cgit From f3988ca4c74e136e49487b51231d324d0c923495 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Thu, 28 Jun 2018 12:05:09 +0200 Subject: cxl: Remove abandonned capi support for the Mellanox CX4, final cleanup Remove a few XSL/CX4 oddities which are no longer needed. A simple revert of the initial commits was not possible (or not worth it) due to the history of the code. Signed-off-by: Frederic Barrat Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- drivers/misc/cxl/context.c | 2 +- drivers/misc/cxl/cxl.h | 12 -------- drivers/misc/cxl/debugfs.c | 5 ---- drivers/misc/cxl/pci.c | 75 ++++------------------------------------------ 4 files changed, 7 insertions(+), 87 deletions(-) diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 0355d42d367f..5fe529b43ebe 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -95,7 +95,7 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master) */ mutex_lock(&afu->contexts_lock); idr_preload(GFP_KERNEL); - i = idr_alloc(&ctx->afu->contexts_idr, ctx, ctx->afu->adapter->min_pe, + i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0, ctx->afu->num_procs, GFP_NOWAIT); idr_preload_end(); mutex_unlock(&afu->contexts_lock); diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index aa453448201d..44bcfafbb579 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -93,11 +93,6 @@ static const cxl_p1_reg_t CXL_PSL_FIR_CNTL = {0x0148}; static const cxl_p1_reg_t CXL_PSL_DSNDCTL = {0x0150}; static const cxl_p1_reg_t CXL_PSL_SNWRALLOC = {0x0158}; static const cxl_p1_reg_t CXL_PSL_TRACE = {0x0170}; -/* XSL registers (Mellanox CX4) */ -static const cxl_p1_reg_t CXL_XSL_Timebase = {0x0100}; -static const cxl_p1_reg_t CXL_XSL_TB_CTLSTAT = {0x0108}; -static const cxl_p1_reg_t CXL_XSL_FEC = {0x0158}; -static const cxl_p1_reg_t CXL_XSL_DSNCTL = {0x0168}; /* PSL registers - CAIA 2 */ static const cxl_p1_reg_t CXL_PSL9_CONTROL = {0x0020}; static const cxl_p1_reg_t CXL_XSL9_INV = {0x0110}; @@ -695,7 +690,6 @@ struct cxl { struct bin_attribute cxl_attr; int adapter_num; int user_irqs; - int min_pe; u64 ps_size; u16 psl_rev; u16 base_image; @@ -934,7 +928,6 @@ int cxl_debugfs_afu_add(struct cxl_afu *afu); void cxl_debugfs_afu_remove(struct cxl_afu *afu); void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter, struct dentry *dir); void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, struct dentry *dir); -void cxl_debugfs_add_adapter_regs_xsl(struct cxl *adapter, struct dentry *dir); void cxl_debugfs_add_afu_regs_psl9(struct cxl_afu *afu, struct dentry *dir); void cxl_debugfs_add_afu_regs_psl8(struct cxl_afu *afu, struct dentry *dir); @@ -977,11 +970,6 @@ static inline void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, { } -static inline void cxl_debugfs_add_adapter_regs_xsl(struct cxl *adapter, - struct dentry *dir) -{ -} - static inline void cxl_debugfs_add_afu_regs_psl9(struct cxl_afu *afu, struct dentry *dir) { } diff --git a/drivers/misc/cxl/debugfs.c b/drivers/misc/cxl/debugfs.c index 1643850d2302..a1921d81593a 100644 --- a/drivers/misc/cxl/debugfs.c +++ b/drivers/misc/cxl/debugfs.c @@ -58,11 +58,6 @@ void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, struct dentry *dir) debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_TRACE)); } -void cxl_debugfs_add_adapter_regs_xsl(struct cxl *adapter, struct dentry *dir) -{ - debugfs_create_io_x64("fec", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_XSL_FEC)); -} - int cxl_debugfs_adapter_add(struct cxl *adapter) { struct dentry *dir; diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 0ca818396524..6dfb4ed345d3 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -593,27 +593,7 @@ static int init_implementation_adapter_regs_psl8(struct cxl *adapter, struct pci return 0; } -static int init_implementation_adapter_regs_xsl(struct cxl *adapter, struct pci_dev *dev) -{ - u64 xsl_dsnctl; - u64 chipid; - u32 phb_index; - u64 capp_unit_id; - int rc; - - rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id); - if (rc) - return rc; - - /* Tell XSL where to route data to */ - xsl_dsnctl = 0x0000600000000000ULL | (chipid << (63-5)); - xsl_dsnctl |= (capp_unit_id << (63-13)); - cxl_p1_write(adapter, CXL_XSL_DSNCTL, xsl_dsnctl); - - return 0; -} - -/* PSL & XSL */ +/* PSL */ #define TBSYNC_CAL(n) (((u64)n & 0x7) << (63-3)) #define TBSYNC_CNT(n) (((u64)n & 0x7) << (63-6)) /* For the PSL this is a multiple for 0 < n <= 7: */ @@ -625,21 +605,6 @@ static void write_timebase_ctrl_psl8(struct cxl *adapter) TBSYNC_CNT(2 * PSL_2048_250MHZ_CYCLES)); } -/* XSL */ -#define TBSYNC_ENA (1ULL << 63) -/* For the XSL this is 2**n * 2000 clocks for 0 < n <= 6: */ -#define XSL_2000_CLOCKS 1 -#define XSL_4000_CLOCKS 2 -#define XSL_8000_CLOCKS 3 - -static void write_timebase_ctrl_xsl(struct cxl *adapter) -{ - cxl_p1_write(adapter, CXL_XSL_TB_CTLSTAT, - TBSYNC_ENA | - TBSYNC_CAL(3) | - TBSYNC_CNT(XSL_4000_CLOCKS)); -} - static u64 timebase_read_psl9(struct cxl *adapter) { return cxl_p1_read(adapter, CXL_PSL9_Timebase); @@ -650,11 +615,6 @@ static u64 timebase_read_psl8(struct cxl *adapter) return cxl_p1_read(adapter, CXL_PSL_Timebase); } -static u64 timebase_read_xsl(struct cxl *adapter) -{ - return cxl_p1_read(adapter, CXL_XSL_Timebase); -} - static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev) { struct device_node *np; @@ -1671,37 +1631,14 @@ static const struct cxl_service_layer_ops psl8_ops = { .needs_reset_before_disable = true, }; -static const struct cxl_service_layer_ops xsl_ops = { - .adapter_regs_init = init_implementation_adapter_regs_xsl, - .invalidate_all = cxl_invalidate_all_psl8, - .sanitise_afu_regs = sanitise_afu_regs_psl8, - .handle_interrupt = cxl_irq_psl8, - .fail_irq = cxl_fail_irq_psl, - .activate_dedicated_process = cxl_activate_dedicated_process_psl8, - .attach_afu_directed = cxl_attach_afu_directed_psl8, - .attach_dedicated_process = cxl_attach_dedicated_process_psl8, - .update_dedicated_ivtes = cxl_update_dedicated_ivtes_psl8, - .debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_xsl, - .write_timebase_ctrl = write_timebase_ctrl_xsl, - .timebase_read = timebase_read_xsl, - .capi_mode = OPAL_PHB_CAPI_MODE_DMA, -}; - static void set_sl_ops(struct cxl *adapter, struct pci_dev *dev) { - if (dev->vendor == PCI_VENDOR_ID_MELLANOX && dev->device == 0x1013) { - /* Mellanox CX-4 */ - dev_info(&dev->dev, "Device uses an XSL\n"); - adapter->native->sl_ops = &xsl_ops; - adapter->min_pe = 1; /* Workaround for CX-4 hardware bug */ + if (cxl_is_power8()) { + dev_info(&dev->dev, "Device uses a PSL8\n"); + adapter->native->sl_ops = &psl8_ops; } else { - if (cxl_is_power8()) { - dev_info(&dev->dev, "Device uses a PSL8\n"); - adapter->native->sl_ops = &psl8_ops; - } else { - dev_info(&dev->dev, "Device uses a PSL9\n"); - adapter->native->sl_ops = &psl9_ops; - } + dev_info(&dev->dev, "Device uses a PSL9\n"); + adapter->native->sl_ops = &psl9_ops; } } -- cgit From 09a61e894ac852fb063ee0b54fc513b13abcab08 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 26 Jun 2018 10:20:12 -0300 Subject: selftests/powerpc: Fix strncpy usage There is a buffer overflow in dscr_inherit_test.c test. In main(), strncpy()'s third argument is the length of the source, not the size of the destination buffer, which makes strncpy() behaves like strcpy(), causing a buffer overflow if argv[0] is bigger than LEN_MAX (100). This patch maps 'prog' to the argv[0] memory region, removing the static allocation and the LEN_MAX size restriction. Signed-off-by: Breno Leitao Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c index 08a8b95e3bc1..55c55f39b6a6 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c @@ -19,7 +19,7 @@ */ #include "dscr.h" -static char prog[LEN_MAX]; +static char *prog; static void do_exec(unsigned long parent_dscr) { @@ -104,6 +104,6 @@ int main(int argc, char *argv[]) exit(1); } - strncpy(prog, argv[0], strlen(argv[0])); + prog = argv[0]; return test_harness(dscr_inherit_exec, "dscr_inherit_exec_test"); } -- cgit From 24bf6864e8b3f9e07b461fe30e0e365b5bb8cdf8 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 26 Jun 2018 10:20:13 -0300 Subject: selftests/powerpc: Fix typos Fix two typos in the file header. Replacing the word 'priviledged' by 'privileged' and 'exuecuted' by 'executed'. Signed-off-by: Breno Leitao Signed-off-by: Gustavo Romero Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c index 55c55f39b6a6..c8c240accc0c 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c @@ -5,8 +5,8 @@ * verifies that the child is using the changed DSCR using mfspr. * * When using the privilege state SPR, the instructions such as - * mfspr or mtspr are priviledged and the kernel emulates them - * for us. Instructions using problem state SPR can be exuecuted + * mfspr or mtspr are privileged and the kernel emulates them + * for us. Instructions using problem state SPR can be executed * directly without any emulation if the HW supports them. Else * they also get emulated by the kernel. * -- cgit From d497ebf5fb3a026c0817f8c96cde578787f24093 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Mon, 18 Jun 2018 14:14:36 +0200 Subject: ocxl: Fix page fault handler in case of fault on dying process If a process exits without doing proper cleanup, there's a window where an opencapi device can try to access the memory of the dying process and may trigger a page fault. That's an expected scenario and the ocxl driver holds a reference on the mm_struct of the process until the opencapi device is notified of the process exiting. However, if mm_users is already at 0, i.e. the address space of the process has already been destroyed, the driver shouldn't try resolving the page fault, as it will fail, but it can also try accessing already freed data. It is fixed by only calling the bottom half of the page fault handler if mm_users is greater than 0 and get a reference on mm_users instead of mm_count. Otherwise, we can safely return a translation fault to the device, as its associated memory context is being removed. The opencapi device will be properly cleaned up shortly after when closing the file descriptors. Fixes: 5ef3166e8a32 ("ocxl: Driver code for 'generic' opencapi devices") Cc: stable@vger.kernel.org # v4.16+ Signed-off-by: Frederic Barrat Reviewed-By: Alastair D'Silva Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- drivers/misc/ocxl/link.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c index 88876ae8f330..a963b0a4a3c5 100644 --- a/drivers/misc/ocxl/link.c +++ b/drivers/misc/ocxl/link.c @@ -136,7 +136,7 @@ static void xsl_fault_handler_bh(struct work_struct *fault_work) int rc; /* - * We need to release a reference on the mm whenever exiting this + * We must release a reference on mm_users whenever exiting this * function (taken in the memory fault interrupt handler) */ rc = copro_handle_mm_fault(fault->pe_data.mm, fault->dar, fault->dsisr, @@ -172,7 +172,7 @@ static void xsl_fault_handler_bh(struct work_struct *fault_work) } r = RESTART; ack: - mmdrop(fault->pe_data.mm); + mmput(fault->pe_data.mm); ack_irq(spa, r); } @@ -184,6 +184,7 @@ static irqreturn_t xsl_fault_handler(int irq, void *data) struct pe_data *pe_data; struct ocxl_process_element *pe; int lpid, pid, tid; + bool schedule = false; read_irq(spa, &dsisr, &dar, &pe_handle); trace_ocxl_fault(spa->spa_mem, pe_handle, dsisr, dar, -1); @@ -226,14 +227,19 @@ static irqreturn_t xsl_fault_handler(int irq, void *data) } WARN_ON(pe_data->mm->context.id != pid); - spa->xsl_fault.pe = pe_handle; - spa->xsl_fault.dar = dar; - spa->xsl_fault.dsisr = dsisr; - spa->xsl_fault.pe_data = *pe_data; - mmgrab(pe_data->mm); /* mm count is released by bottom half */ - + if (mmget_not_zero(pe_data->mm)) { + spa->xsl_fault.pe = pe_handle; + spa->xsl_fault.dar = dar; + spa->xsl_fault.dsisr = dsisr; + spa->xsl_fault.pe_data = *pe_data; + schedule = true; + /* mm_users count released by bottom half */ + } rcu_read_unlock(); - schedule_work(&spa->xsl_fault.fault_work); + if (schedule) + schedule_work(&spa->xsl_fault.fault_work); + else + ack_irq(spa, ADDRESS_ERROR); return IRQ_HANDLED; } -- cgit From 741c5640a15a23a2ec3a0846668a82e8e3b4314d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 2 Jul 2018 08:56:21 -0700 Subject: powerpc/mpc5200: Remove VLA usage In the quest to remove all stack VLA usage from the kernel[1], this switches to using a stack size large enough for the saved routine and adds a sanity check making sure the routine doesn't overflow into the 0x600 exception handler. [1] https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com Signed-off-by: Kees Cook Reviewed-by: Arnd Bergmann Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/52xx/mpc52xx_pm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pm.c b/arch/powerpc/platforms/52xx/mpc52xx_pm.c index 31d3515672f3..b1d208ded981 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pm.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pm.c @@ -117,7 +117,10 @@ int mpc52xx_pm_enter(suspend_state_t state) u32 intr_main_mask; void __iomem * irq_0x500 = (void __iomem *)CONFIG_KERNEL_START + 0x500; unsigned long irq_0x500_stop = (unsigned long)irq_0x500 + mpc52xx_ds_cached_size; - char saved_0x500[mpc52xx_ds_cached_size]; + char saved_0x500[0x600-0x500]; + + if (WARN_ON(mpc52xx_ds_cached_size > sizeof(saved_0x500))) + return -ENOMEM; /* disable all interrupts in PIC */ intr_main_mask = in_be32(&intr->main_mask); -- cgit From 8950329c4a64c6d3ca0bc34711a1afbd9ce05657 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Thu, 28 Jun 2018 10:49:56 +0530 Subject: powerpc/kdump: Handle crashkernel memory reservation failure Memory reservation for crashkernel could fail if there are holes around kdump kernel offset (128M). Fail gracefully in such cases and print an error message. Signed-off-by: Hari Bathini Tested-by: David Gibson Reviewed-by: Dave Young Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/machine_kexec.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 936c7e2d421e..b53401334e81 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -188,7 +188,12 @@ void __init reserve_crashkernel(void) (unsigned long)(crashk_res.start >> 20), (unsigned long)(memblock_phys_mem_size() >> 20)); - memblock_reserve(crashk_res.start, crash_size); + if (!memblock_is_region_memory(crashk_res.start, crash_size) || + memblock_reserve(crashk_res.start, crash_size)) { + pr_err("Failed to reserve memory for crashkernel!\n"); + crashk_res.start = crashk_res.end = 0; + return; + } } int overlaps_crashkernel(unsigned long start, unsigned long size) -- cgit From 00c376fdd7ed6cc083da3a3c51a63a75f270d647 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 2 Jul 2018 17:42:05 +1000 Subject: powerpc/powernv/ioda2: Add 256M IOMMU page size to the default POWER8 case The sketchy bypass uses 256M pages so add this page size as well. This should cause no behavioral change but will be used later. Fixes: 477afd6ea6 "powerpc/ioda: Use ibm,supported-tce-sizes for IOMMU page size mask" Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 94ec58c3f5f2..e7f2bee89fdd 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2925,7 +2925,7 @@ static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb) /* Add 16M for POWER8 by default */ if (cpu_has_feature(CPU_FTR_ARCH_207S) && !cpu_has_feature(CPU_FTR_ARCH_300)) - mask |= SZ_16M; + mask |= SZ_16M | SZ_256M; return mask; } -- cgit From 1b80ac648483189e45f60d4ddbb54b78ca942849 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 29 Jun 2018 11:52:54 -0700 Subject: powerpc/msi: Remove VLA usage In the quest to remove all stack VLA usage from the kernel[1], this switches from an unchanging variable to a constant expression to eliminate the VLA generation. [1] https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com Signed-off-by: Kees Cook Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/msi_bitmap.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c index 6243a7e537d0..e64a411d1a00 100644 --- a/arch/powerpc/sysdev/msi_bitmap.c +++ b/arch/powerpc/sysdev/msi_bitmap.c @@ -225,22 +225,23 @@ static void __init test_of_node(void) struct device_node of_node; struct property prop; struct msi_bitmap bmp; - int size = 256; - DECLARE_BITMAP(expected, size); +#define SIZE_EXPECTED 256 + DECLARE_BITMAP(expected, SIZE_EXPECTED); /* There should really be a struct device_node allocator */ memset(&of_node, 0, sizeof(of_node)); of_node_init(&of_node); of_node.full_name = node_name; - WARN_ON(msi_bitmap_alloc(&bmp, size, &of_node)); + WARN_ON(msi_bitmap_alloc(&bmp, SIZE_EXPECTED, &of_node)); /* No msi-available-ranges, so expect > 0 */ WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp) <= 0); /* Should all still be free */ - WARN_ON(bitmap_find_free_region(bmp.bitmap, size, get_count_order(size))); - bitmap_release_region(bmp.bitmap, 0, get_count_order(size)); + WARN_ON(bitmap_find_free_region(bmp.bitmap, SIZE_EXPECTED, + get_count_order(SIZE_EXPECTED))); + bitmap_release_region(bmp.bitmap, 0, get_count_order(SIZE_EXPECTED)); /* Now create a fake msi-available-ranges property */ @@ -256,8 +257,8 @@ static void __init test_of_node(void) WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp)); /* Check we got the expected result */ - WARN_ON(bitmap_parselist(expected_str, expected, size)); - WARN_ON(!bitmap_equal(expected, bmp.bitmap, size)); + WARN_ON(bitmap_parselist(expected_str, expected, SIZE_EXPECTED)); + WARN_ON(!bitmap_equal(expected, bmp.bitmap, SIZE_EXPECTED)); msi_bitmap_free(&bmp); kfree(bmp.bitmap); -- cgit From 26064848efbca49c643d1237dc1f8215515d52ee Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Tue, 19 Jun 2018 23:52:30 +0300 Subject: powerpc: Enable kernel XZ compression option on BOOK3S_32 Enable kernel XZ compression option on BOOK3S_32. Tested on G4 PowerBook. Signed-off-by: Aaro Koskinen [mpe: Use one select under the PPC symbol guarded by if PPC_BOOK3S] Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 1 + arch/powerpc/platforms/Kconfig.cputype | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 9f2b75fe2c2d..5eb4d969afbf 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -197,6 +197,7 @@ config PPC select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_KERNEL_GZIP + select HAVE_KERNEL_XZ if PPC_BOOK3S select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE select HAVE_KRETPROBES diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index e6a1de521319..e93d6186de6e 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -74,7 +74,6 @@ config PPC_BOOK3S_64 select HAVE_ARCH_TRANSPARENT_HUGEPAGE select ARCH_SUPPORTS_NUMA_BALANCING select IRQ_WORK - select HAVE_KERNEL_XZ config PPC_BOOK3E_64 bool "Embedded processors" -- cgit From dcb14337e0f2adb227c376e6327ef0c3e4cce6f9 Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Wed, 25 Apr 2018 11:44:55 +0530 Subject: cpufreq: powernv: Remove global pstate ramp-down timer in POWER9 POWER9 does not support global pstate requests for the chip. So remove the timer logic which slowly ramps down the global pstate in P9 platforms. Signed-off-by: Shilpasri G Bhat Acked-by: Viresh Kumar [mpe: Drop NULL check before kfree(policy->driver_data)] Signed-off-by: Michael Ellerman --- drivers/cpufreq/powernv-cpufreq.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 54edaec1e608..bf6519cf64bc 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -758,8 +758,13 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy, cur_msec = jiffies_to_msecs(get_jiffies_64()); - spin_lock(&gpstates->gpstate_lock); freq_data.pstate_id = idx_to_pstate(new_index); + if (!gpstates) { + freq_data.gpstate_id = freq_data.pstate_id; + goto no_gpstate; + } + + spin_lock(&gpstates->gpstate_lock); if (!gpstates->last_sampled_time) { gpstate_idx = new_index; @@ -809,6 +814,7 @@ gpstates_done: spin_unlock(&gpstates->gpstate_lock); +no_gpstate: /* * Use smp_call_function to send IPI and execute the * mtspr on target CPU. We could do that without IPI @@ -843,6 +849,13 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy) kernfs_put(kn); } + policy->freq_table = powernv_freqs; + policy->fast_switch_possible = true; + + if (pvr_version_is(PVR_POWER9)) + return 0; + + /* Initialise Gpstate ramp-down timer only on POWER8 */ gpstates = kzalloc(sizeof(*gpstates), GFP_KERNEL); if (!gpstates) return -ENOMEM; @@ -857,8 +870,6 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy) msecs_to_jiffies(GPSTATE_TIMER_INTERVAL); spin_lock_init(&gpstates->gpstate_lock); - policy->freq_table = powernv_freqs; - policy->fast_switch_possible = true; return 0; } @@ -998,7 +1009,8 @@ static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy) freq_data.pstate_id = idx_to_pstate(powernv_pstate_info.min); freq_data.gpstate_id = idx_to_pstate(powernv_pstate_info.min); smp_call_function_single(policy->cpu, set_pstate, &freq_data, 1); - del_timer_sync(&gpstates->timer); + if (gpstates) + del_timer_sync(&gpstates->timer); } static unsigned int powernv_fast_switch(struct cpufreq_policy *policy, -- cgit From 835b706bab95141e2663b046f2fc596169b57fa8 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 21 Jun 2018 10:33:03 +0200 Subject: powerpc/dts: Use 'atmel' as at24 manufacturer for pdm360ng Using 'at' as the part of the compatible string is now deprecated. Use a correct string: 'atmel,'. Signed-off-by: Bartosz Golaszewski Signed-off-by: Michael Ellerman --- arch/powerpc/boot/dts/pdm360ng.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/boot/dts/pdm360ng.dts b/arch/powerpc/boot/dts/pdm360ng.dts index 445b88114009..df1283b63d9b 100644 --- a/arch/powerpc/boot/dts/pdm360ng.dts +++ b/arch/powerpc/boot/dts/pdm360ng.dts @@ -98,7 +98,7 @@ fsl,preserve-clocking; eeprom@50 { - compatible = "at,24c01"; + compatible = "atmel,24c01"; reg = <0x50>; }; -- cgit From 6aeb43591f0dd75eea361d0517527d9e5397abe8 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 21 Jun 2018 10:33:04 +0200 Subject: powerpc/dts: Use 'atmel' as at24 manufacturer for kmcent2 Using compatible strings without the part for at24 is now deprecated. Use a correct 'atmel,' value. Signed-off-by: Bartosz Golaszewski Signed-off-by: Michael Ellerman --- arch/powerpc/boot/dts/fsl/kmcent2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/boot/dts/fsl/kmcent2.dts b/arch/powerpc/boot/dts/fsl/kmcent2.dts index 5922c1ea0e96..3094df05f5ea 100644 --- a/arch/powerpc/boot/dts/fsl/kmcent2.dts +++ b/arch/powerpc/boot/dts/fsl/kmcent2.dts @@ -130,7 +130,7 @@ #size-cells = <0>; eeprom@54 { - compatible = "24c02"; + compatible = "atmel,24c02"; reg = <0x54>; pagesize = <2>; read-only; -- cgit From 1e5d75843f57d8ec4f8b573e8b780269ceecf38e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 21 Jun 2018 10:33:05 +0200 Subject: powerpc/dts: Use a correct at24 compatible fallback in ac14xx Using 'at24' as fallback is now deprecated - use the full 'atmel,' string. Signed-off-by: Bartosz Golaszewski Signed-off-by: Michael Ellerman --- arch/powerpc/boot/dts/ac14xx.dts | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/boot/dts/ac14xx.dts b/arch/powerpc/boot/dts/ac14xx.dts index 83bcfd865167..0be5c4f3265d 100644 --- a/arch/powerpc/boot/dts/ac14xx.dts +++ b/arch/powerpc/boot/dts/ac14xx.dts @@ -176,12 +176,12 @@ clock-frequency = <400000>; at24@30 { - compatible = "at24,24c01"; + compatible = "atmel,24c01"; reg = <0x30>; }; at24@31 { - compatible = "at24,24c01"; + compatible = "atmel,24c01"; reg = <0x31>; }; @@ -191,42 +191,42 @@ }; at24@50 { - compatible = "at24,24c01"; + compatible = "atmel,24c01"; reg = <0x50>; }; at24@51 { - compatible = "at24,24c01"; + compatible = "atmel,24c01"; reg = <0x51>; }; at24@52 { - compatible = "at24,24c01"; + compatible = "atmel,24c01"; reg = <0x52>; }; at24@53 { - compatible = "at24,24c01"; + compatible = "atmel,24c01"; reg = <0x53>; }; at24@54 { - compatible = "at24,24c01"; + compatible = "atmel,24c01"; reg = <0x54>; }; at24@55 { - compatible = "at24,24c01"; + compatible = "atmel,24c01"; reg = <0x55>; }; at24@56 { - compatible = "at24,24c01"; + compatible = "atmel,24c01"; reg = <0x56>; }; at24@57 { - compatible = "at24,24c01"; + compatible = "atmel,24c01"; reg = <0x57>; }; -- cgit From e11b64b1ef336f8976e5bf194b0eede48954f419 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Wed, 11 Jul 2018 16:02:58 +1000 Subject: powerpc: Remove Power8 DD1 from cputable This was added to support an early version of Power8 that did not have working doorbells. These machines were not publicly available, and all of the internal users have long since upgraded. Signed-off-by: Joel Stanley Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cputable.h | 15 +++++++-------- arch/powerpc/kernel/cputable.c | 19 ------------------- 2 files changed, 7 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 9c0a3083571b..7711ebfb7d5f 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -452,7 +452,6 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_PKEY) #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG) -#define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL) #define CPU_FTRS_POWER9 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ @@ -488,16 +487,16 @@ static inline void cpu_feature_keys_init(void) { } #ifdef CONFIG_CPU_LITTLE_ENDIAN #define CPU_FTRS_POSSIBLE \ (CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 | \ - CPU_FTRS_POWER8_DD1 | CPU_FTR_ALTIVEC_COMP | CPU_FTR_VSX_COMP | \ - CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \ + CPU_FTR_ALTIVEC_COMP | CPU_FTR_VSX_COMP | CPU_FTRS_POWER9 | \ + CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \ CPU_FTRS_POWER9_DD2_2) #else #define CPU_FTRS_POSSIBLE \ (CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \ CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \ - CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \ - CPU_FTRS_PA6T | CPU_FTR_VSX_COMP | CPU_FTR_ALTIVEC_COMP | \ - CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \ + CPU_FTRS_POWER8 | CPU_FTRS_CELL | CPU_FTRS_PA6T | \ + CPU_FTR_VSX_COMP | CPU_FTR_ALTIVEC_COMP | CPU_FTRS_POWER9 | \ + CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \ CPU_FTRS_POWER9_DD2_2) #endif /* CONFIG_CPU_LITTLE_ENDIAN */ #endif @@ -566,7 +565,7 @@ enum { #ifdef CONFIG_CPU_LITTLE_ENDIAN #define CPU_FTRS_ALWAYS \ (CPU_FTRS_POSSIBLE & ~CPU_FTR_HVMODE & CPU_FTRS_POWER7 & \ - CPU_FTRS_POWER8E & CPU_FTRS_POWER8 & CPU_FTRS_POWER8_DD1 & \ + CPU_FTRS_POWER8E & CPU_FTRS_POWER8 & \ CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1 & \ CPU_FTRS_DT_CPU_BASE) #else @@ -574,7 +573,7 @@ enum { (CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \ CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \ CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \ - CPU_FTRS_POWER8_DD1 & ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE & \ + ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE & \ CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1 & \ CPU_FTRS_DT_CPU_BASE) #endif /* CONFIG_CPU_LITTLE_ENDIAN */ diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index c8fc9691f8c7..9169ffdbab08 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -447,25 +447,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, - { /* Power8 DD1: Does not support doorbell IPIs */ - .pvr_mask = 0xffffff00, - .pvr_value = 0x004d0100, - .cpu_name = "POWER8 (raw)", - .cpu_features = CPU_FTRS_POWER8_DD1, - .cpu_user_features = COMMON_USER_POWER8, - .cpu_user_features2 = COMMON_USER2_POWER8, - .mmu_features = MMU_FTRS_POWER8, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power8", - .oprofile_type = PPC_OPROFILE_INVALID, - .cpu_setup = __setup_cpu_power8, - .cpu_restore = __restore_cpu_power8, - .machine_check_early = __machine_check_early_realmode_p8, - .platform = "power8", - }, { /* Power8 */ .pvr_mask = 0xffff0000, .pvr_value = 0x004d0000, -- cgit From 54dbcfc211f15586c57d27492f938eb4df964257 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 13 Jun 2018 23:24:14 +1000 Subject: powerpc/64s: Report SLB multi-hit rather than parity error When we take an SLB multi-hit on bare metal, we see both the multi-hit and parity error bits set in DSISR. The user manuals indicates this is expected to always happen on Power8, whereas on Power9 it says a multi-hit will "usually" also cause a parity error. We decide what to do based on the various error tables in mce_power.c, and because we process them in order and only report the first, we currently always report a parity error but not the multi-hit, eg: Severe Machine check interrupt [Recovered] Initiator: CPU Error type: SLB [Parity] Effective address: c000000ffffd4300 Although this is correct, it leaves the user wondering why they got a parity error. It would be clearer instead if we reported the multi-hit because that is more likely to be simply a software bug, whereas a true parity error is possibly an indication of a bad core. We can do that simply by reordering the error tables so that multi-hit appears before parity. That doesn't affect the error recovery at all, because we flush the SLB either way. Signed-off-by: Michael Ellerman Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/mce_power.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 38c5b4764bfe..d6756af6ec78 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -257,12 +257,12 @@ static const struct mce_derror_table mce_p7_derror_table[] = { { 0x00000400, true, MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000080, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, { 0x00000100, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0x00000080, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, { 0x00000040, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, @@ -290,12 +290,12 @@ static const struct mce_derror_table mce_p8_derror_table[] = { { 0x00000200, true, MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000080, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, { 0x00000100, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0x00000080, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, { 0, false, 0, 0, 0, 0 } }; static const struct mce_derror_table mce_p9_derror_table[] = { @@ -320,12 +320,12 @@ static const struct mce_derror_table mce_p9_derror_table[] = { { 0x00000200, false, MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000080, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, { 0x00000100, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0x00000080, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, { 0x00000040, true, MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -- cgit From 9c3250a127901c93b23723dc5859c71a0d068fb8 Mon Sep 17 00:00:00 2001 From: Daniel Klamt Date: Thu, 12 Jul 2018 00:03:04 +0200 Subject: powerpc/xive: Replace msleep(x) with msleep(OPAL_BUSY_DELAY_MS) Replace msleep(x) with with msleep(OPAL_BUSY_DELAY_MS) to document these sleeps are to wait for opal (firmware). Signed-off-by: Daniel Klamt Signed-off-by: Bjoern Noetel Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/xive/native.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 311185b9960a..39ab5ad58297 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -109,7 +109,7 @@ int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq) rc = opal_xive_set_irq_config(hw_irq, target, prio, sw_irq); if (rc != OPAL_BUSY) break; - msleep(1); + msleep(OPAL_BUSY_DELAY_MS); } return rc == 0 ? 0 : -ENXIO; } @@ -163,7 +163,7 @@ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, rc = opal_xive_set_queue_info(vp_id, prio, qpage_phys, order, flags); if (rc != OPAL_BUSY) break; - msleep(1); + msleep(OPAL_BUSY_DELAY_MS); } if (rc) { pr_err("Error %lld setting queue for prio %d\n", rc, prio); @@ -190,7 +190,7 @@ static void __xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio) rc = opal_xive_set_queue_info(vp_id, prio, 0, 0, 0); if (rc != OPAL_BUSY) break; - msleep(1); + msleep(OPAL_BUSY_DELAY_MS); } if (rc) pr_err("Error %lld disabling queue for prio %d\n", rc, prio); @@ -253,7 +253,7 @@ static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc) for (;;) { irq = opal_xive_allocate_irq(chip_id); if (irq == OPAL_BUSY) { - msleep(1); + msleep(OPAL_BUSY_DELAY_MS); continue; } if (irq < 0) { @@ -275,7 +275,7 @@ u32 xive_native_alloc_irq(void) rc = opal_xive_allocate_irq(OPAL_XIVE_ANY_CHIP); if (rc != OPAL_BUSY) break; - msleep(1); + msleep(OPAL_BUSY_DELAY_MS); } if (rc < 0) return 0; @@ -289,7 +289,7 @@ void xive_native_free_irq(u32 irq) s64 rc = opal_xive_free_irq(irq); if (rc != OPAL_BUSY) break; - msleep(1); + msleep(OPAL_BUSY_DELAY_MS); } } EXPORT_SYMBOL_GPL(xive_native_free_irq); @@ -305,7 +305,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc) for (;;) { rc = opal_xive_free_irq(xc->hw_ipi); if (rc == OPAL_BUSY) { - msleep(1); + msleep(OPAL_BUSY_DELAY_MS); continue; } xc->hw_ipi = 0; @@ -400,7 +400,7 @@ static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc) rc = opal_xive_set_vp_info(vp, OPAL_XIVE_VP_ENABLED, 0); if (rc != OPAL_BUSY) break; - msleep(1); + msleep(OPAL_BUSY_DELAY_MS); } if (rc) { pr_err("Failed to enable pool VP on CPU %d\n", cpu); @@ -444,7 +444,7 @@ static void xive_native_teardown_cpu(unsigned int cpu, struct xive_cpu *xc) rc = opal_xive_set_vp_info(vp, 0, 0); if (rc != OPAL_BUSY) break; - msleep(1); + msleep(OPAL_BUSY_DELAY_MS); } } @@ -645,7 +645,7 @@ u32 xive_native_alloc_vp_block(u32 max_vcpus) rc = opal_xive_alloc_vp_block(order); switch (rc) { case OPAL_BUSY: - msleep(1); + msleep(OPAL_BUSY_DELAY_MS); break; case OPAL_XIVE_PROVISIONING: if (!xive_native_provision_pages()) @@ -687,7 +687,7 @@ int xive_native_enable_vp(u32 vp_id, bool single_escalation) rc = opal_xive_set_vp_info(vp_id, flags, 0); if (rc != OPAL_BUSY) break; - msleep(1); + msleep(OPAL_BUSY_DELAY_MS); } return rc ? -EIO : 0; } @@ -701,7 +701,7 @@ int xive_native_disable_vp(u32 vp_id) rc = opal_xive_set_vp_info(vp_id, 0, 0); if (rc != OPAL_BUSY) break; - msleep(1); + msleep(OPAL_BUSY_DELAY_MS); } return rc ? -EIO : 0; } -- cgit From 2bf1071a8d50928a4ae366bb3108833166c2b70c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 5 Jul 2018 18:47:00 +1000 Subject: powerpc/64s: Remove POWER9 DD1 support POWER9 DD1 was never a product. It is no longer supported by upstream firmware, and it is not effectively supported in Linux due to lack of testing. Signed-off-by: Nicholas Piggin Reviewed-by: Michael Ellerman [mpe: Remove arch_make_huge_pte() entirely] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hugetlb.h | 20 -------- arch/powerpc/include/asm/book3s/64/pgtable.h | 5 +- arch/powerpc/include/asm/book3s/64/radix.h | 35 ++----------- .../powerpc/include/asm/book3s/64/tlbflush-radix.h | 2 - arch/powerpc/include/asm/cputable.h | 13 ++--- arch/powerpc/include/asm/paca.h | 5 -- arch/powerpc/kernel/asm-offsets.c | 1 - arch/powerpc/kernel/cputable.c | 19 ------- arch/powerpc/kernel/dt_cpu_ftrs.c | 4 +- arch/powerpc/kernel/exceptions-64s.S | 4 +- arch/powerpc/kernel/idle_book3s.S | 50 ------------------ arch/powerpc/kernel/process.c | 10 +--- arch/powerpc/kvm/book3s_64_mmu_radix.c | 15 +----- arch/powerpc/kvm/book3s_hv.c | 10 ---- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 16 +----- arch/powerpc/kvm/book3s_xive_template.c | 39 +++++--------- arch/powerpc/mm/hash_utils_64.c | 30 ----------- arch/powerpc/mm/hugetlbpage.c | 9 ++-- arch/powerpc/mm/mmu_context_book3s64.c | 12 +---- arch/powerpc/mm/pgtable-radix.c | 60 +--------------------- arch/powerpc/mm/tlb-radix.c | 18 ------- arch/powerpc/perf/core-book3s.c | 34 ------------ arch/powerpc/perf/isa207-common.c | 12 ++--- arch/powerpc/perf/isa207-common.h | 5 -- arch/powerpc/perf/power9-pmu.c | 54 +------------------ arch/powerpc/platforms/powernv/idle.c | 28 ---------- arch/powerpc/platforms/powernv/smp.c | 27 ++-------- arch/powerpc/sysdev/xive/common.c | 8 +-- arch/powerpc/xmon/xmon.c | 1 - drivers/misc/cxl/cxl.h | 8 --- drivers/misc/cxl/cxllib.c | 4 -- drivers/misc/cxl/pci.c | 41 ++++++--------- 32 files changed, 66 insertions(+), 533 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index c459f937d484..50888388a359 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -32,26 +32,6 @@ static inline int hstate_get_psize(struct hstate *hstate) } } -#define arch_make_huge_pte arch_make_huge_pte -static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, - struct page *page, int writable) -{ - unsigned long page_shift; - - if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) - return entry; - - page_shift = huge_page_shift(hstate_vma(vma)); - /* - * We don't support 1G hugetlb pages yet. - */ - VM_WARN_ON(page_shift == mmu_psize_defs[MMU_PAGE_1G].shift); - if (page_shift == mmu_psize_defs[MMU_PAGE_2M].shift) - return __pte(pte_val(entry) | R_PAGE_LARGE); - else - return entry; -} - #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE static inline bool gigantic_page_supported(void) { diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 63cee159022b..d334e6b9a46d 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -474,9 +474,8 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, { if (full && radix_enabled()) { /* - * Let's skip the DD1 style pte update here. We know that - * this is a full mm pte clear and hence can be sure there is - * no parallel set_pte. + * We know that this is a full mm pte clear and + * hence can be sure there is no parallel set_pte. */ return radix__ptep_get_and_clear_full(mm, addr, ptep, full); } diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index ef9f96742ce1..3ab3f7aef022 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -12,12 +12,6 @@ #include #endif -/* - * For P9 DD1 only, we need to track whether the pte's huge. - */ -#define R_PAGE_LARGE _RPAGE_RSV1 - - #ifndef __ASSEMBLY__ #include #include @@ -154,20 +148,7 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm, { unsigned long old_pte; - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { - - unsigned long new_pte; - - old_pte = __radix_pte_update(ptep, ~0ul, 0); - /* - * new value of pte - */ - new_pte = (old_pte | set) & ~clr; - radix__flush_tlb_pte_p9_dd1(old_pte, mm, addr); - if (new_pte) - __radix_pte_update(ptep, 0, new_pte); - } else - old_pte = __radix_pte_update(ptep, clr, set); + old_pte = __radix_pte_update(ptep, clr, set); if (!huge) assert_pte_locked(mm, addr); @@ -253,8 +234,6 @@ static inline int radix__pmd_trans_huge(pmd_t pmd) static inline pmd_t radix__pmd_mkhuge(pmd_t pmd) { - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) - return __pmd(pmd_val(pmd) | _PAGE_PTE | R_PAGE_LARGE); return __pmd(pmd_val(pmd) | _PAGE_PTE); } @@ -285,18 +264,14 @@ static inline unsigned long radix__get_tree_size(void) unsigned long rts_field; /* * We support 52 bits, hence: - * DD1 52-28 = 24, 0b11000 - * Others 52-31 = 21, 0b10101 + * bits 52 - 31 = 21, 0b10101 * RTS encoding details * bits 0 - 3 of rts -> bits 6 - 8 unsigned long * bits 4 - 5 of rts -> bits 62 - 63 of unsigned long */ - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) - rts_field = (0x3UL << 61); - else { - rts_field = (0x5UL << 5); /* 6 - 8 bits */ - rts_field |= (0x2UL << 61); - } + rts_field = (0x5UL << 5); /* 6 - 8 bits */ + rts_field |= (0x2UL << 61); + return rts_field; } diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index ef5c3f2994c9..1154a6dc6d26 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -48,8 +48,6 @@ extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmad extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr); extern void radix__flush_tlb_all(void); -extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, - unsigned long address); extern void radix__flush_tlb_lpid_page(unsigned int lpid, unsigned long addr, diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 9c0a3083571b..f980f91cad8a 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -210,7 +210,6 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_DAWR LONG_ASM_CONST(0x0000008000000000) #define CPU_FTR_DABRX LONG_ASM_CONST(0x0000010000000000) #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x0000020000000000) -#define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x0000040000000000) #define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x0000080000000000) #define CPU_FTR_P9_TM_HV_ASSIST LONG_ASM_CONST(0x0000100000000000) #define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000) @@ -464,8 +463,6 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \ CPU_FTR_P9_TLBIE_BUG | CPU_FTR_P9_TIDR) -#define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ - (~CPU_FTR_SAO)) #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9 #define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1) #define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \ @@ -489,16 +486,14 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTRS_POSSIBLE \ (CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 | \ CPU_FTRS_POWER8_DD1 | CPU_FTR_ALTIVEC_COMP | CPU_FTR_VSX_COMP | \ - CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \ - CPU_FTRS_POWER9_DD2_2) + CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2) #else #define CPU_FTRS_POSSIBLE \ (CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \ CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \ CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \ CPU_FTRS_PA6T | CPU_FTR_VSX_COMP | CPU_FTR_ALTIVEC_COMP | \ - CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \ - CPU_FTRS_POWER9_DD2_2) + CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2) #endif /* CONFIG_CPU_LITTLE_ENDIAN */ #endif #else @@ -567,7 +562,7 @@ enum { #define CPU_FTRS_ALWAYS \ (CPU_FTRS_POSSIBLE & ~CPU_FTR_HVMODE & CPU_FTRS_POWER7 & \ CPU_FTRS_POWER8E & CPU_FTRS_POWER8 & CPU_FTRS_POWER8_DD1 & \ - CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1 & \ + CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD2_1 & \ CPU_FTRS_DT_CPU_BASE) #else #define CPU_FTRS_ALWAYS \ @@ -575,7 +570,7 @@ enum { CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \ CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \ CPU_FTRS_POWER8_DD1 & ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE & \ - CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1 & \ + CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD2_1 & \ CPU_FTRS_DT_CPU_BASE) #endif /* CONFIG_CPU_LITTLE_ENDIAN */ #endif diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 6d34bd71139d..4e9cede5a7e7 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -187,11 +187,6 @@ struct paca_struct { u8 subcore_sibling_mask; /* Flag to request this thread not to stop */ atomic_t dont_stop; - /* - * Pointer to an array which contains pointer - * to the sibling threads' paca. - */ - struct paca_struct **thread_sibling_pacas; /* The PSSCR value that the kernel requested before going to stop */ u64 requested_psscr; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 0a0544335950..89cf15566c4e 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -766,7 +766,6 @@ int main(void) OFFSET(PACA_THREAD_IDLE_STATE, paca_struct, thread_idle_state); OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask); OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask); - OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas); OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr); OFFSET(PACA_DONT_STOP, paca_struct, dont_stop); #define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index c8fc9691f8c7..bc75a2908a7e 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -485,25 +485,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, - { /* Power9 DD1*/ - .pvr_mask = 0xffffff00, - .pvr_value = 0x004e0100, - .cpu_name = "POWER9 (raw)", - .cpu_features = CPU_FTRS_POWER9_DD1, - .cpu_user_features = COMMON_USER_POWER9, - .cpu_user_features2 = COMMON_USER2_POWER9, - .mmu_features = MMU_FTRS_POWER9, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power9", - .oprofile_type = PPC_OPROFILE_INVALID, - .cpu_setup = __setup_cpu_power9, - .cpu_restore = __restore_cpu_power9, - .machine_check_early = __machine_check_early_realmode_p9, - .platform = "power9", - }, { /* Power9 DD2.0 */ .pvr_mask = 0xffffefff, .pvr_value = 0x004e0200, diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 4be1c0de9406..98c373a4c1cf 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -701,9 +701,7 @@ static __init void cpufeatures_cpu_quirks(void) /* * Not all quirks can be derived from the cpufeatures device tree. */ - if ((version & 0xffffff00) == 0x004e0100) - cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1; - else if ((version & 0xffffefff) == 0x004e0200) + if ((version & 0xffffefff) == 0x004e0200) ; /* DD2.0 has no feature flag */ else if ((version & 0xffffefff) == 0x004e0201) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 285c6465324a..76a14702cb9c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -276,9 +276,7 @@ BEGIN_FTR_SECTION * * This interrupt can wake directly from idle. If that is the case, * the machine check is handled then the idle wakeup code is called - * to restore state. In that case, the POWER9 DD1 idle PACA workaround - * is not applied in the early machine check code, which will cause - * bugs. + * to restore state. */ mr r11,r1 /* Save r1 */ lhz r10,PACA_IN_MCE(r13) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index e734f6e45abc..d85d5515a091 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -466,43 +466,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) blr /* return 0 for wakeup cause / SRR1 value */ #endif -/* - * On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1, - * HSPRG0 will be set to the HSPRG0 value of one of the - * threads in this core. Thus the value we have in r13 - * may not be this thread's paca pointer. - * - * Fortunately, the TIR remains invariant. Since this thread's - * paca pointer is recorded in all its sibling's paca, we can - * correctly recover this thread's paca pointer if we - * know the index of this thread in the core. - * - * This index can be obtained from the TIR. - * - * i.e, thread's position in the core = TIR. - * If this value is i, then this thread's paca is - * paca->thread_sibling_pacas[i]. - */ -power9_dd1_recover_paca: - mfspr r4, SPRN_TIR - /* - * Since each entry in thread_sibling_pacas is 8 bytes - * we need to left-shift by 3 bits. Thus r4 = i * 8 - */ - sldi r4, r4, 3 - /* Get &paca->thread_sibling_pacas[0] in r5 */ - ld r5, PACA_SIBLING_PACA_PTRS(r13) - /* Load paca->thread_sibling_pacas[i] into r13 */ - ldx r13, r4, r5 - SET_PACA(r13) - /* - * Indicate that we have lost NVGPR state - * which needs to be restored from the stack. - */ - li r3, 1 - stb r3,PACA_NAPSTATELOST(r13) - blr - /* * Called from machine check handler for powersave wakeups. * Low level machine check processing has already been done. Now just @@ -537,9 +500,6 @@ pnv_powersave_wakeup: ld r2, PACATOC(r13) BEGIN_FTR_SECTION -BEGIN_FTR_SECTION_NESTED(70) - bl power9_dd1_recover_paca -END_FTR_SECTION_NESTED_IFSET(CPU_FTR_POWER9_DD1, 70) bl pnv_restore_hyp_resource_arch300 FTR_SECTION_ELSE bl pnv_restore_hyp_resource_arch207 @@ -602,22 +562,12 @@ END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1) LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) -BEGIN_FTR_SECTION_NESTED(71) - /* - * Assume that we are waking up from the state - * same as the Requested Level (RL) in the PSSCR - * which are Bits 60-63 - */ - ld r5,PACA_REQ_PSSCR(r13) - rldicl r5,r5,0,60 -FTR_SECTION_ELSE_NESTED(71) /* * 0-3 bits correspond to Power-Saving Level Status * which indicates the idle state we are waking up from */ mfspr r5, SPRN_PSSCR rldicl r5,r5,4,60 -ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 71) li r0, 0 /* clear requested_psscr to say we're awake */ std r0, PACA_REQ_PSSCR(r13) cmpd cr4,r5,r4 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9ef4aea9fffe..27f0caee55ea 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1250,17 +1250,9 @@ struct task_struct *__switch_to(struct task_struct *prev, * mappings. If the new process has the foreign real address * mappings, we must issue a cp_abort to clear any state and * prevent snooping, corruption or a covert channel. - * - * DD1 allows paste into normal system memory so we do an - * unpaired copy, rather than cp_abort, to clear the buffer, - * since cp_abort is quite expensive. */ - if (current_thread_info()->task->thread.used_vas) { + if (current_thread_info()->task->thread.used_vas) asm volatile(PPC_CP_ABORT); - } else if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { - asm volatile(PPC_COPY(%0, %1) - : : "r"(dummy_copy_buffer), "r"(0)); - } } #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 176f911ee983..0af1c0aea1fe 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -66,10 +66,7 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, bits = root & RPDS_MASK; root = root & RPDB_MASK; - /* P9 DD1 interprets RTS (radix tree size) differently */ offset = rts + 31; - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) - offset -= 3; /* current implementations only support 52-bit space */ if (offset != 52) @@ -160,17 +157,7 @@ static unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, unsigned long clr, unsigned long set, unsigned long addr, unsigned int shift) { - unsigned long old = 0; - - if (!(clr & _PAGE_PRESENT) && cpu_has_feature(CPU_FTR_POWER9_DD1) && - pte_present(*ptep)) { - /* have to invalidate it first */ - old = __radix_pte_update(ptep, _PAGE_PRESENT, 0); - kvmppc_radix_tlbie_page(kvm, addr, shift); - set |= _PAGE_PRESENT; - old &= _PAGE_PRESENT; - } - return __radix_pte_update(ptep, clr, set) | old; + return __radix_pte_update(ptep, clr, set); } void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr, diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index de686b340f4a..b568582120a3 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1693,14 +1693,6 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len); break; case KVM_REG_PPC_TB_OFFSET: - /* - * POWER9 DD1 has an erratum where writing TBU40 causes - * the timebase to lose ticks. So we don't let the - * timebase offset be changed on P9 DD1. (It is - * initialized to zero.) - */ - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) - break; /* round up to multiple of 2^24 */ vcpu->arch.vcore->tb_offset = ALIGN(set_reg_val(id, *val), 1UL << 24); @@ -2026,8 +2018,6 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, /* * Set the default HFSCR for the guest from the host value. * This value is only used on POWER9. - * On POWER9 DD1, TM doesn't work, so we make sure to - * prevent the guest from using it. * On POWER9, we want to virtualize the doorbell facility, so we * turn off the HFSCR bit, which causes those instructions to trap. */ diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 153988d878e8..6e4554b273f1 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -916,9 +916,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR) mtspr SPRN_BESCR, r6 mtspr SPRN_PID, r7 mtspr SPRN_WORT, r8 -BEGIN_FTR_SECTION - PPC_INVALIDATE_ERAT -END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) BEGIN_FTR_SECTION /* POWER8-only registers */ ld r5, VCPU_TCSCR(r4) @@ -1912,7 +1909,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) ld r5, VCPU_KVM(r9) lbz r0, KVM_RADIX(r5) cmpwi cr2, r0, 0 - beq cr2, 4f + beq cr2, 2f /* * Radix: do eieio; tlbsync; ptesync sequence in case we @@ -1952,11 +1949,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) bdnz 1b ptesync -2: /* Flush the ERAT on radix P9 DD1 guest exit */ -BEGIN_FTR_SECTION - PPC_INVALIDATE_ERAT -END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) -4: +2: #endif /* CONFIG_PPC_RADIX_MMU */ /* @@ -3367,11 +3360,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_CIABR, r0 mtspr SPRN_DAWRX, r0 - /* Flush the ERAT on radix P9 DD1 guest exit */ -BEGIN_FTR_SECTION - PPC_INVALIDATE_ERAT -END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) - BEGIN_MMU_FTR_SECTION b 4f END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index 6e41ba7ec8f4..4171ede8722b 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -25,18 +25,6 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc) */ eieio(); - /* - * DD1 bug workaround: If PIPR is less favored than CPPR - * ignore the interrupt or we might incorrectly lose an IPB - * bit. - */ - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { - __be64 qw1 = __x_readq(__x_tima + TM_QW1_OS); - u8 pipr = be64_to_cpu(qw1) & 0xff; - if (pipr >= xc->hw_cppr) - return; - } - /* Perform the acknowledge OS to register cycle. */ ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG)); @@ -89,8 +77,15 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd) /* If the XIVE supports the new "store EOI facility, use it */ if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) __x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI); - else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) { + else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) opal_int_eoi(hw_irq); + else if (xd->flags & XIVE_IRQ_FLAG_LSI) { + /* + * For LSIs the HW EOI cycle is used rather than PQ bits, + * as they are automatically re-triggred in HW when still + * pending. + */ + __x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI); } else { uint64_t eoi_val; @@ -102,20 +97,12 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd) * * This allows us to then do a re-trigger if Q was set * rather than synthetizing an interrupt in software - * - * For LSIs, using the HW EOI cycle works around a problem - * on P9 DD1 PHBs where the other ESB accesses don't work - * properly. */ - if (xd->flags & XIVE_IRQ_FLAG_LSI) - __x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI); - else { - eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00); - - /* Re-trigger if needed */ - if ((eoi_val & 1) && __x_trig_page(xd)) - __x_writeq(0, __x_trig_page(xd)); - } + eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00); + + /* Re-trigger if needed */ + if ((eoi_val & 1) && __x_trig_page(xd)) + __x_writeq(0, __x_trig_page(xd)); } } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 8318716e5075..5a72e980e25a 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -808,31 +808,6 @@ int hash__remove_section_mapping(unsigned long start, unsigned long end) } #endif /* CONFIG_MEMORY_HOTPLUG */ -static void update_hid_for_hash(void) -{ - unsigned long hid0; - unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */ - - asm volatile("ptesync": : :"memory"); - /* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */ - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(0), "i"(0), "i"(2), "r"(0) : "memory"); - asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory"); - trace_tlbie(0, 0, rb, 0, 2, 0, 0); - - /* - * now switch the HID - */ - hid0 = mfspr(SPRN_HID0); - hid0 &= ~HID0_POWER9_RADIX; - mtspr(SPRN_HID0, hid0); - asm volatile("isync": : :"memory"); - - /* Wait for it to happen */ - while ((mfspr(SPRN_HID0) & HID0_POWER9_RADIX)) - cpu_relax(); -} - static void __init hash_init_partition_table(phys_addr_t hash_table, unsigned long htab_size) { @@ -845,8 +820,6 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, htab_size = __ilog2(htab_size) - 18; mmu_partition_table_set_entry(0, hash_table | htab_size, 0); pr_info("Partition table %p\n", partition_tb); - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) - update_hid_for_hash(); } static void __init htab_initialize(void) @@ -1077,9 +1050,6 @@ void hash__early_init_mmu_secondary(void) /* Initialize hash table for that CPU */ if (!firmware_has_feature(FW_FEATURE_LPAR)) { - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) - update_hid_for_hash(); - if (!cpu_has_feature(CPU_FTR_ARCH_300)) mtspr(SPRN_SDR1, _SDR1); else diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7c5f479c5c00..ec7538a802f9 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -620,15 +620,12 @@ static int __init add_huge_page_size(unsigned long long size) * firmware we only add hugetlb support for page sizes that can be * supported by linux page table layout. * For now we have - * Radix: 2M + * Radix: 2M and 1G * Hash: 16M and 16G */ if (radix_enabled()) { - if (mmu_psize != MMU_PAGE_2M) { - if (cpu_has_feature(CPU_FTR_POWER9_DD1) || - (mmu_psize != MMU_PAGE_1G)) - return -EINVAL; - } + if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G) + return -EINVAL; } else { if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G) return -EINVAL; diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index f3d4b4a0e561..39e9ef0eb78b 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -273,15 +273,7 @@ void arch_exit_mmap(struct mm_struct *mm) #ifdef CONFIG_PPC_RADIX_MMU void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) { - - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { - isync(); - mtspr(SPRN_PID, next->context.id); - isync(); - asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); - } else { - mtspr(SPRN_PID, next->context.id); - isync(); - } + mtspr(SPRN_PID, next->context.id); + isync(); } #endif diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 96f68c5aa1f5..bba168d02235 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -226,16 +226,6 @@ void radix__mark_rodata_ro(void) { unsigned long start, end; - /* - * mark_rodata_ro() will mark itself as !writable at some point. - * Due to DD1 workaround in radix__pte_update(), we'll end up with - * an invalid pte and the system will crash quite severly. - */ - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { - pr_warn("Warning: Unable to mark rodata read only on P9 DD1\n"); - return; - } - start = (unsigned long)_stext; end = (unsigned long)__init_begin; @@ -533,35 +523,6 @@ found: return; } -static void update_hid_for_radix(void) -{ - unsigned long hid0; - unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */ - - asm volatile("ptesync": : :"memory"); - /* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */ - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(1), "i"(0), "i"(2), "r"(0) : "memory"); - /* prs = 1, ric = 2, rs = 0, r = 1 is = 3 */ - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(1), "i"(1), "i"(2), "r"(0) : "memory"); - asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory"); - trace_tlbie(0, 0, rb, 0, 2, 0, 1); - trace_tlbie(0, 0, rb, 0, 2, 1, 1); - - /* - * now switch the HID - */ - hid0 = mfspr(SPRN_HID0); - hid0 |= HID0_POWER9_RADIX; - mtspr(SPRN_HID0, hid0); - asm volatile("isync": : :"memory"); - - /* Wait for it to happen */ - while (!(mfspr(SPRN_HID0) & HID0_POWER9_RADIX)) - cpu_relax(); -} - static void radix_init_amor(void) { /* @@ -576,22 +537,12 @@ static void radix_init_amor(void) static void radix_init_iamr(void) { - unsigned long iamr; - - /* - * The IAMR should set to 0 on DD1. - */ - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) - iamr = 0; - else - iamr = (1ul << 62); - /* * Radix always uses key0 of the IAMR to determine if an access is * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction * fetch. */ - mtspr(SPRN_IAMR, iamr); + mtspr(SPRN_IAMR, (1ul << 62)); } void __init radix__early_init_mmu(void) @@ -644,8 +595,6 @@ void __init radix__early_init_mmu(void) if (!firmware_has_feature(FW_FEATURE_LPAR)) { radix_init_native(); - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) - update_hid_for_radix(); lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); radix_init_partition_table(); @@ -671,10 +620,6 @@ void radix__early_init_mmu_secondary(void) * update partition table control register and UPRT */ if (!firmware_has_feature(FW_FEATURE_LPAR)) { - - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) - update_hid_for_radix(); - lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); @@ -1095,8 +1040,7 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, * To avoid NMMU hang while relaxing access, we need mark * the pte invalid in between. */ - if (cpu_has_feature(CPU_FTR_POWER9_DD1) || - atomic_read(&mm->context.copros) > 0) { + if (atomic_read(&mm->context.copros) > 0) { unsigned long old_pte, new_pte; old_pte = __radix_pte_update(ptep, ~0, 0); diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 67a6e86d3e7e..902767b8a9c1 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -994,24 +994,6 @@ void radix__flush_tlb_all(void) asm volatile("eieio; tlbsync; ptesync": : :"memory"); } -void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, - unsigned long address) -{ - /* - * We track page size in pte only for DD1, So we can - * call this only on DD1. - */ - if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) { - VM_WARN_ON(1); - return; - } - - if (old_pte & R_PAGE_LARGE) - radix__flush_tlb_page_psize(mm, address, MMU_PAGE_2M); - else - radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize); -} - #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) { diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 3f66fcf8ad99..01f92c4a9f02 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -128,10 +128,6 @@ static inline void power_pmu_bhrb_disable(struct perf_event *event) {} static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {} static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {} static void pmao_restore_workaround(bool ebb) { } -static bool use_ic(u64 event) -{ - return false; -} #endif /* CONFIG_PPC32 */ static bool regs_use_siar(struct pt_regs *regs) @@ -714,14 +710,6 @@ static void pmao_restore_workaround(bool ebb) mtspr(SPRN_PMC6, pmcs[5]); } -static bool use_ic(u64 event) -{ - if (cpu_has_feature(CPU_FTR_POWER9_DD1) && - (event == 0x200f2 || event == 0x300f2)) - return true; - - return false; -} #endif /* CONFIG_PPC64 */ static void perf_event_interrupt(struct pt_regs *regs); @@ -1046,7 +1034,6 @@ static u64 check_and_compute_delta(u64 prev, u64 val) static void power_pmu_read(struct perf_event *event) { s64 val, delta, prev; - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); if (event->hw.state & PERF_HES_STOPPED) return; @@ -1056,13 +1043,6 @@ static void power_pmu_read(struct perf_event *event) if (is_ebb_event(event)) { val = read_pmc(event->hw.idx); - if (use_ic(event->attr.config)) { - val = mfspr(SPRN_IC); - if (val > cpuhw->ic_init) - val = val - cpuhw->ic_init; - else - val = val + (0 - cpuhw->ic_init); - } local64_set(&event->hw.prev_count, val); return; } @@ -1076,13 +1056,6 @@ static void power_pmu_read(struct perf_event *event) prev = local64_read(&event->hw.prev_count); barrier(); val = read_pmc(event->hw.idx); - if (use_ic(event->attr.config)) { - val = mfspr(SPRN_IC); - if (val > cpuhw->ic_init) - val = val - cpuhw->ic_init; - else - val = val + (0 - cpuhw->ic_init); - } delta = check_and_compute_delta(prev, val); if (!delta) return; @@ -1535,13 +1508,6 @@ nocheck: event->attr.branch_sample_type); } - /* - * Workaround for POWER9 DD1 to use the Instruction Counter - * register value for instruction counting - */ - if (use_ic(event->attr.config)) - cpuhw->ic_init = mfspr(SPRN_IC); - perf_pmu_enable(event->pmu); local_irq_restore(flags); return ret; diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 2efee3f196f5..177de814286f 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -59,7 +59,7 @@ static bool is_event_valid(u64 event) { u64 valid_mask = EVENT_VALID_MASK; - if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1)) + if (cpu_has_feature(CPU_FTR_ARCH_300)) valid_mask = p9_EVENT_VALID_MASK; return !(event & ~valid_mask); @@ -86,8 +86,6 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) * Incase of Power9: * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'), * or if group already have any marked events. - * Non-Marked events (for DD1): - * MMCRA[SDAR_MODE] will be set to 0b01 * For rest * MMCRA[SDAR_MODE] will be set from event code. * If sdar_mode from event is zero, default to 0b01. Hardware @@ -96,7 +94,7 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE)) *mmcra &= MMCRA_SDAR_MODE_NO_UPDATES; - else if (!cpu_has_feature(CPU_FTR_POWER9_DD1) && p9_SDAR_MODE(event)) + else if (p9_SDAR_MODE(event)) *mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT; else *mmcra |= MMCRA_SDAR_MODE_DCACHE; @@ -106,7 +104,7 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) static u64 thresh_cmp_val(u64 value) { - if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1)) + if (cpu_has_feature(CPU_FTR_ARCH_300)) return value << p9_MMCRA_THR_CMP_SHIFT; return value << MMCRA_THR_CMP_SHIFT; @@ -114,7 +112,7 @@ static u64 thresh_cmp_val(u64 value) static unsigned long combine_from_event(u64 event) { - if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1)) + if (cpu_has_feature(CPU_FTR_ARCH_300)) return p9_EVENT_COMBINE(event); return EVENT_COMBINE(event); @@ -122,7 +120,7 @@ static unsigned long combine_from_event(u64 event) static unsigned long combine_shift(unsigned long pmc) { - if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1)) + if (cpu_has_feature(CPU_FTR_ARCH_300)) return p9_MMCR1_COMBINE_SHIFT(pmc); return MMCR1_COMBINE_SHIFT(pmc); diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 6a0b586c935a..0028f4b9490d 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -158,11 +158,6 @@ CNST_PMC_VAL(1) | CNST_PMC_VAL(2) | CNST_PMC_VAL(3) | \ CNST_PMC_VAL(4) | CNST_PMC_VAL(5) | CNST_PMC_VAL(6) | CNST_NC_VAL -/* - * Lets restrict use of PMC5 for instruction counting. - */ -#define P9_DD1_TEST_ADDER (ISA207_TEST_ADDER | CNST_PMC_VAL(5)) - /* Bits in MMCR1 for PowerISA v2.07 */ #define MMCR1_UNIT_SHIFT(pmc) (60 - (4 * ((pmc) - 1))) #define MMCR1_COMBINE_SHIFT(pmc) (35 - ((pmc) - 1)) diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 2ca0b33b4efb..e012b1030a5b 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -219,12 +219,6 @@ static struct attribute_group power9_pmu_events_group = { .attrs = power9_events_attr, }; -static const struct attribute_group *power9_isa207_pmu_attr_groups[] = { - &isa207_pmu_format_group, - &power9_pmu_events_group, - NULL, -}; - PMU_FORMAT_ATTR(event, "config:0-51"); PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); PMU_FORMAT_ATTR(mark, "config:8"); @@ -267,17 +261,6 @@ static const struct attribute_group *power9_pmu_attr_groups[] = { NULL, }; -static int power9_generic_events_dd1[] = { - [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_ICT_NOSLOT_CYC, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PM_CMPLU_STALL, - [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_DISP, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL_ALT, - [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL, - [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1, - [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1_FIN, -}; - static int power9_generic_events[] = { [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_ICT_NOSLOT_CYC, @@ -439,25 +422,6 @@ static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { #undef C -static struct power_pmu power9_isa207_pmu = { - .name = "POWER9", - .n_counter = MAX_PMU_COUNTERS, - .add_fields = ISA207_ADD_FIELDS, - .test_adder = P9_DD1_TEST_ADDER, - .compute_mmcr = isa207_compute_mmcr, - .config_bhrb = power9_config_bhrb, - .bhrb_filter_map = power9_bhrb_filter_map, - .get_constraint = isa207_get_constraint, - .get_alternatives = power9_get_alternatives, - .disable_pmc = isa207_disable_pmc, - .flags = PPMU_NO_SIAR | PPMU_ARCH_207S, - .n_generic = ARRAY_SIZE(power9_generic_events_dd1), - .generic_events = power9_generic_events_dd1, - .cache_events = &power9_cache_events, - .attr_groups = power9_isa207_pmu_attr_groups, - .bhrb_nr = 32, -}; - static struct power_pmu power9_pmu = { .name = "POWER9", .n_counter = MAX_PMU_COUNTERS, @@ -500,23 +464,7 @@ static int __init init_power9_pmu(void) } } - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { - /* - * Since PM_INST_CMPL may not provide right counts in all - * sampling scenarios in power9 DD1, instead use PM_INST_DISP. - */ - EVENT_VAR(PM_INST_CMPL, _g).id = PM_INST_DISP; - /* - * Power9 DD1 should use PM_BR_CMPL_ALT event code for - * "branches" to provide correct counter value. - */ - EVENT_VAR(PM_BR_CMPL, _g).id = PM_BR_CMPL_ALT; - EVENT_VAR(PM_BR_CMPL, _c).id = PM_BR_CMPL_ALT; - rc = register_power_pmu(&power9_isa207_pmu); - } else { - rc = register_power_pmu(&power9_pmu); - } - + rc = register_power_pmu(&power9_pmu); if (rc) return rc; diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 1c5d0675b43c..12f13acee1f6 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -177,11 +177,6 @@ static void pnv_alloc_idle_core_states(void) paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state; paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING; paca_ptrs[cpu]->thread_mask = 1 << j; - if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) - continue; - paca_ptrs[cpu]->thread_sibling_pacas = - kmalloc_node(paca_ptr_array_size, - GFP_KERNEL, node); } } @@ -805,29 +800,6 @@ static int __init pnv_init_idle_states(void) pnv_alloc_idle_core_states(); - /* - * For each CPU, record its PACA address in each of it's - * sibling thread's PACA at the slot corresponding to this - * CPU's index in the core. - */ - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { - int cpu; - - pr_info("powernv: idle: Saving PACA pointers of all CPUs in their thread sibling PACA\n"); - for_each_present_cpu(cpu) { - int base_cpu = cpu_first_thread_sibling(cpu); - int idx = cpu_thread_in_core(cpu); - int i; - - for (i = 0; i < threads_per_core; i++) { - int j = base_cpu + i; - - paca_ptrs[j]->thread_sibling_pacas[idx] = - paca_ptrs[cpu]; - } - } - } - if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) ppc_md.power_save = power7_idle; diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index b80909957792..0d354e19ef92 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -283,23 +283,6 @@ static void pnv_cause_ipi(int cpu) ic_cause_ipi(cpu); } -static void pnv_p9_dd1_cause_ipi(int cpu) -{ - int this_cpu = get_cpu(); - - /* - * POWER9 DD1 has a global addressed msgsnd, but for now we restrict - * IPIs to same core, because it requires additional synchronization - * for inter-core doorbells which we do not implement. - */ - if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) - doorbell_global_ipi(cpu); - else - ic_cause_ipi(cpu); - - put_cpu(); -} - static void __init pnv_smp_probe(void) { if (xive_enabled()) @@ -311,14 +294,10 @@ static void __init pnv_smp_probe(void) ic_cause_ipi = smp_ops->cause_ipi; WARN_ON(!ic_cause_ipi); - if (cpu_has_feature(CPU_FTR_ARCH_300)) { - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) - smp_ops->cause_ipi = pnv_p9_dd1_cause_ipi; - else - smp_ops->cause_ipi = doorbell_global_ipi; - } else { + if (cpu_has_feature(CPU_FTR_ARCH_300)) + smp_ops->cause_ipi = doorbell_global_ipi; + else smp_ops->cause_ipi = pnv_cause_ipi; - } } } diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 3459015092fa..4758173df426 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -319,7 +319,7 @@ void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd) * The FW told us to call it. This happens for some * interrupt sources that need additional HW whacking * beyond the ESB manipulation. For example LPC interrupts - * on P9 DD1.0 need a latch to be clared in the LPC bridge + * on P9 DD1.0 needed a latch to be clared in the LPC bridge * itself. The Firmware will take care of it. */ if (WARN_ON_ONCE(!xive_ops->eoi)) @@ -337,9 +337,9 @@ void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd) * This allows us to then do a re-trigger if Q was set * rather than synthesizing an interrupt in software * - * For LSIs, using the HW EOI cycle works around a problem - * on P9 DD1 PHBs where the other ESB accesses don't work - * properly. + * For LSIs the HW EOI cycle is used rather than PQ bits, + * as they are automatically re-triggred in HW when still + * pending. */ if (xd->flags & XIVE_IRQ_FLAG_LSI) xive_esb_read(xd, XIVE_ESB_LOAD_EOI); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 47166ad2a669..21119cfe8474 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2429,7 +2429,6 @@ static void dump_one_paca(int cpu) DUMP(p, thread_idle_state, "%#-*x"); DUMP(p, thread_mask, "%#-*x"); DUMP(p, subcore_sibling_mask, "%#-*x"); - DUMP(p, thread_sibling_pacas, "%-*px"); DUMP(p, requested_psscr, "%#-*llx"); DUMP(p, stop_sprs.pid, "%#-*llx"); DUMP(p, stop_sprs.ldbar, "%#-*llx"); diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 918d4fb742d1..505f973e13f3 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -865,14 +865,6 @@ static inline bool cxl_is_power9(void) return false; } -static inline bool cxl_is_power9_dd1(void) -{ - if ((pvr_version_is(PVR_POWER9)) && - cpu_has_feature(CPU_FTR_POWER9_DD1)) - return true; - return false; -} - ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, loff_t off, size_t count); diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c index 0bc7c31cf739..5a3f91255258 100644 --- a/drivers/misc/cxl/cxllib.c +++ b/drivers/misc/cxl/cxllib.c @@ -102,10 +102,6 @@ int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg) rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl); if (rc) return rc; - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { - /* workaround for DD1 - nbwind = capiind */ - cfg->dsnctl |= ((u64)0x02 << (63-47)); - } cfg->version = CXL_XSL_CONFIG_CURRENT_VERSION; cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE; diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 429d6de1dde7..2af0d4c47b76 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -465,23 +465,21 @@ int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg) /* nMMU_ID Defaults to: b’000001001’*/ xsl_dsnctl |= ((u64)0x09 << (63-28)); - if (!(cxl_is_power9_dd1())) { - /* - * Used to identify CAPI packets which should be sorted into - * the Non-Blocking queues by the PHB. This field should match - * the PHB PBL_NBW_CMPM register - * nbwind=0x03, bits [57:58], must include capi indicator. - * Not supported on P9 DD1. - */ - xsl_dsnctl |= (nbwind << (63-55)); + /* + * Used to identify CAPI packets which should be sorted into + * the Non-Blocking queues by the PHB. This field should match + * the PHB PBL_NBW_CMPM register + * nbwind=0x03, bits [57:58], must include capi indicator. + * Not supported on P9 DD1. + */ + xsl_dsnctl |= (nbwind << (63-55)); - /* - * Upper 16b address bits of ASB_Notify messages sent to the - * system. Need to match the PHB’s ASN Compare/Mask Register. - * Not supported on P9 DD1. - */ - xsl_dsnctl |= asnind; - } + /* + * Upper 16b address bits of ASB_Notify messages sent to the + * system. Need to match the PHB’s ASN Compare/Mask Register. + * Not supported on P9 DD1. + */ + xsl_dsnctl |= asnind; *reg = xsl_dsnctl; return 0; @@ -539,15 +537,8 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter, /* Snoop machines */ cxl_p1_write(adapter, CXL_PSL9_APCDEDALLOC, 0x800F000200000000ULL); - if (cxl_is_power9_dd1()) { - /* Disabling deadlock counter CAR */ - cxl_p1_write(adapter, CXL_PSL9_GP_CT, 0x0020000000000001ULL); - /* Enable NORST */ - cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0x8000000000000000ULL); - } else { - /* Enable NORST and DD2 features */ - cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0xC000000000000000ULL); - } + /* Enable NORST and DD2 features */ + cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0xC000000000000000ULL); /* * Check if PSL has data-cache. We need to flush adapter datacache -- cgit From da2bb0da730c5c427f66ce501aa4367f6921779e Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 4 Jul 2018 16:13:44 +1000 Subject: powerpc/powernv: Remove useless wrapper This gets rid of a useless wrapper around pnv_pci_ioda2_table_free_pages(). Reviewed-by: David Gibson Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 5bd0eb6681bc..d453f88c3c3e 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2199,11 +2199,6 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false); } -static void pnv_ioda2_table_free(struct iommu_table *tbl) -{ - pnv_pci_ioda2_table_free_pages(tbl); -} - static struct iommu_table_ops pnv_ioda2_iommu_ops = { .set = pnv_ioda2_tce_build, #ifdef CONFIG_IOMMU_API @@ -2212,7 +2207,7 @@ static struct iommu_table_ops pnv_ioda2_iommu_ops = { #endif .clear = pnv_ioda2_tce_free, .get = pnv_tce_get, - .free = pnv_ioda2_table_free, + .free = pnv_pci_ioda2_table_free_pages, }; static int pnv_pci_ioda_dev_dma_weight(struct pci_dev *dev, void *data) -- cgit From 191c22879fbcfd98a7fe9a51786ef41253b1549b Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 4 Jul 2018 16:13:45 +1000 Subject: powerpc/powernv: Move TCE manupulation code to its own file Right now we have allocation code in pci-ioda.c and traversing code in pci.c, let's keep them toghether. However both files are big enough already so let's move this business to a new file. While we at it, move the code which links IOMMU table groups to IOMMU tables as it is not specific to any PNV PHB model. These puts exported symbols from the new file together. This fixes several warnings from checkpatch.pl like this: "WARNING: Prefer 'unsigned int' to bare use of 'unsigned'". As this is almost cut-n-paste, there should be no behavioral change. Reviewed-by: David Gibson Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/Makefile | 2 +- arch/powerpc/platforms/powernv/pci-ioda-tce.c | 313 ++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/pci-ioda.c | 146 ------------ arch/powerpc/platforms/powernv/pci.c | 158 ------------- arch/powerpc/platforms/powernv/pci.h | 41 ++-- 5 files changed, 340 insertions(+), 320 deletions(-) create mode 100644 arch/powerpc/platforms/powernv/pci-ioda-tce.c diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 703a350a7f4e..b540ce8eec55 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -6,7 +6,7 @@ obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o -obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o +obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o obj-$(CONFIG_CXL_BASE) += pci-cxl.o obj-$(CONFIG_EEH) += eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c new file mode 100644 index 000000000000..726b8693f5ae --- /dev/null +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c @@ -0,0 +1,313 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * TCE helpers for IODA PCI/PCIe on PowerNV platforms + * + * Copyright 2018 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include + +#include +#include +#include "pci.h" + +void pnv_pci_setup_iommu_table(struct iommu_table *tbl, + void *tce_mem, u64 tce_size, + u64 dma_offset, unsigned int page_shift) +{ + tbl->it_blocksize = 16; + tbl->it_base = (unsigned long)tce_mem; + tbl->it_page_shift = page_shift; + tbl->it_offset = dma_offset >> tbl->it_page_shift; + tbl->it_index = 0; + tbl->it_size = tce_size >> 3; + tbl->it_busno = 0; + tbl->it_type = TCE_PCI; +} + +static __be64 *pnv_tce(struct iommu_table *tbl, long idx) +{ + __be64 *tmp = ((__be64 *)tbl->it_base); + int level = tbl->it_indirect_levels; + const long shift = ilog2(tbl->it_level_size); + unsigned long mask = (tbl->it_level_size - 1) << (level * shift); + + while (level) { + int n = (idx & mask) >> (level * shift); + unsigned long tce = be64_to_cpu(tmp[n]); + + tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE)); + idx &= ~mask; + mask >>= shift; + --level; + } + + return tmp + idx; +} + +int pnv_tce_build(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, enum dma_data_direction direction, + unsigned long attrs) +{ + u64 proto_tce = iommu_direction_to_tce_perm(direction); + u64 rpn = __pa(uaddr) >> tbl->it_page_shift; + long i; + + if (proto_tce & TCE_PCI_WRITE) + proto_tce |= TCE_PCI_READ; + + for (i = 0; i < npages; i++) { + unsigned long newtce = proto_tce | + ((rpn + i) << tbl->it_page_shift); + unsigned long idx = index - tbl->it_offset + i; + + *(pnv_tce(tbl, idx)) = cpu_to_be64(newtce); + } + + return 0; +} + +#ifdef CONFIG_IOMMU_API +int pnv_tce_xchg(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction) +{ + u64 proto_tce = iommu_direction_to_tce_perm(*direction); + unsigned long newtce = *hpa | proto_tce, oldtce; + unsigned long idx = index - tbl->it_offset; + + BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl)); + + if (newtce & TCE_PCI_WRITE) + newtce |= TCE_PCI_READ; + + oldtce = be64_to_cpu(xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce))); + *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE); + *direction = iommu_tce_direction(oldtce); + + return 0; +} +#endif + +void pnv_tce_free(struct iommu_table *tbl, long index, long npages) +{ + long i; + + for (i = 0; i < npages; i++) { + unsigned long idx = index - tbl->it_offset + i; + + *(pnv_tce(tbl, idx)) = cpu_to_be64(0); + } +} + +unsigned long pnv_tce_get(struct iommu_table *tbl, long index) +{ + return be64_to_cpu(*(pnv_tce(tbl, index - tbl->it_offset))); +} + +static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, + unsigned long size, unsigned int levels) +{ + const unsigned long addr_ul = (unsigned long) addr & + ~(TCE_PCI_READ | TCE_PCI_WRITE); + + if (levels) { + long i; + u64 *tmp = (u64 *) addr_ul; + + for (i = 0; i < size; ++i) { + unsigned long hpa = be64_to_cpu(tmp[i]); + + if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE))) + continue; + + pnv_pci_ioda2_table_do_free_pages(__va(hpa), size, + levels - 1); + } + } + + free_pages(addr_ul, get_order(size << 3)); +} + +void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl) +{ + const unsigned long size = tbl->it_indirect_levels ? + tbl->it_level_size : tbl->it_size; + + if (!tbl->it_size) + return; + + pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size, + tbl->it_indirect_levels); +} + +static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift, + unsigned int levels, unsigned long limit, + unsigned long *current_offset, unsigned long *total_allocated) +{ + struct page *tce_mem = NULL; + __be64 *addr, *tmp; + unsigned int order = max_t(unsigned int, shift, PAGE_SHIFT) - + PAGE_SHIFT; + unsigned long allocated = 1UL << (order + PAGE_SHIFT); + unsigned int entries = 1UL << (shift - 3); + long i; + + tce_mem = alloc_pages_node(nid, GFP_KERNEL, order); + if (!tce_mem) { + pr_err("Failed to allocate a TCE memory, order=%d\n", order); + return NULL; + } + addr = page_address(tce_mem); + memset(addr, 0, allocated); + *total_allocated += allocated; + + --levels; + if (!levels) { + *current_offset += allocated; + return addr; + } + + for (i = 0; i < entries; ++i) { + tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift, + levels, limit, current_offset, total_allocated); + if (!tmp) + break; + + addr[i] = cpu_to_be64(__pa(tmp) | + TCE_PCI_READ | TCE_PCI_WRITE); + + if (*current_offset >= limit) + break; + } + + return addr; +} + +long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, + __u32 page_shift, __u64 window_size, __u32 levels, + struct iommu_table *tbl) +{ + void *addr; + unsigned long offset = 0, level_shift, total_allocated = 0; + const unsigned int window_shift = ilog2(window_size); + unsigned int entries_shift = window_shift - page_shift; + unsigned int table_shift = max_t(unsigned int, entries_shift + 3, + PAGE_SHIFT); + const unsigned long tce_table_size = 1UL << table_shift; + + if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS)) + return -EINVAL; + + if (!is_power_of_2(window_size)) + return -EINVAL; + + /* Adjust direct table size from window_size and levels */ + entries_shift = (entries_shift + levels - 1) / levels; + level_shift = entries_shift + 3; + level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT); + + if ((level_shift - 3) * levels + page_shift >= 60) + return -EINVAL; + + /* Allocate TCE table */ + addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, + levels, tce_table_size, &offset, &total_allocated); + + /* addr==NULL means that the first level allocation failed */ + if (!addr) + return -ENOMEM; + + /* + * First level was allocated but some lower level failed as + * we did not allocate as much as we wanted, + * release partially allocated table. + */ + if (offset < tce_table_size) { + pnv_pci_ioda2_table_do_free_pages(addr, + 1ULL << (level_shift - 3), levels - 1); + return -ENOMEM; + } + + /* Setup linux iommu table */ + pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset, + page_shift); + tbl->it_level_size = 1ULL << (level_shift - 3); + tbl->it_indirect_levels = levels - 1; + tbl->it_allocated_size = total_allocated; + + pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n", + window_size, tce_table_size, bus_offset); + + return 0; +} + +static void pnv_iommu_table_group_link_free(struct rcu_head *head) +{ + struct iommu_table_group_link *tgl = container_of(head, + struct iommu_table_group_link, rcu); + + kfree(tgl); +} + +void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, + struct iommu_table_group *table_group) +{ + long i; + bool found; + struct iommu_table_group_link *tgl; + + if (!tbl || !table_group) + return; + + /* Remove link to a group from table's list of attached groups */ + found = false; + list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { + if (tgl->table_group == table_group) { + list_del_rcu(&tgl->next); + call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free); + found = true; + break; + } + } + if (WARN_ON(!found)) + return; + + /* Clean a pointer to iommu_table in iommu_table_group::tables[] */ + found = false; + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + if (table_group->tables[i] == tbl) { + table_group->tables[i] = NULL; + found = true; + break; + } + } + WARN_ON(!found); +} + +long pnv_pci_link_table_and_group(int node, int num, + struct iommu_table *tbl, + struct iommu_table_group *table_group) +{ + struct iommu_table_group_link *tgl = NULL; + + if (WARN_ON(!tbl || !table_group)) + return -EINVAL; + + tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL, + node); + if (!tgl) + return -ENOMEM; + + tgl->table_group = table_group; + list_add_rcu(&tgl->next, &tbl->it_group_list); + + table_group->tables[num] = tbl; + + return 0; +} diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index d453f88c3c3e..4abf1175626e 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -51,12 +51,8 @@ #define PNV_IODA1_M64_SEGS 8 /* Segments per M64 BAR */ #define PNV_IODA1_DMA32_SEGSIZE 0x10000000 -#define POWERNV_IOMMU_DEFAULT_LEVELS 1 -#define POWERNV_IOMMU_MAX_LEVELS 5 - static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK", "NPU_OCAPI" }; -static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl); void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, const char *fmt, ...) @@ -2457,10 +2453,6 @@ void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) pe->tce_bypass_enabled = enable; } -static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, - __u32 page_shift, __u64 window_size, __u32 levels, - struct iommu_table *tbl); - static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, int num, __u32 page_shift, __u64 window_size, __u32 levels, struct iommu_table **ptbl) @@ -2768,144 +2760,6 @@ static void pnv_pci_ioda_setup_iommu_api(void) static void pnv_pci_ioda_setup_iommu_api(void) { }; #endif -static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift, - unsigned levels, unsigned long limit, - unsigned long *current_offset, unsigned long *total_allocated) -{ - struct page *tce_mem = NULL; - __be64 *addr, *tmp; - unsigned order = max_t(unsigned, shift, PAGE_SHIFT) - PAGE_SHIFT; - unsigned long allocated = 1UL << (order + PAGE_SHIFT); - unsigned entries = 1UL << (shift - 3); - long i; - - tce_mem = alloc_pages_node(nid, GFP_KERNEL, order); - if (!tce_mem) { - pr_err("Failed to allocate a TCE memory, order=%d\n", order); - return NULL; - } - addr = page_address(tce_mem); - memset(addr, 0, allocated); - *total_allocated += allocated; - - --levels; - if (!levels) { - *current_offset += allocated; - return addr; - } - - for (i = 0; i < entries; ++i) { - tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift, - levels, limit, current_offset, total_allocated); - if (!tmp) - break; - - addr[i] = cpu_to_be64(__pa(tmp) | - TCE_PCI_READ | TCE_PCI_WRITE); - - if (*current_offset >= limit) - break; - } - - return addr; -} - -static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, - unsigned long size, unsigned level); - -static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, - __u32 page_shift, __u64 window_size, __u32 levels, - struct iommu_table *tbl) -{ - void *addr; - unsigned long offset = 0, level_shift, total_allocated = 0; - const unsigned window_shift = ilog2(window_size); - unsigned entries_shift = window_shift - page_shift; - unsigned table_shift = max_t(unsigned, entries_shift + 3, PAGE_SHIFT); - const unsigned long tce_table_size = 1UL << table_shift; - - if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS)) - return -EINVAL; - - if (!is_power_of_2(window_size)) - return -EINVAL; - - /* Adjust direct table size from window_size and levels */ - entries_shift = (entries_shift + levels - 1) / levels; - level_shift = entries_shift + 3; - level_shift = max_t(unsigned, level_shift, PAGE_SHIFT); - - if ((level_shift - 3) * levels + page_shift >= 60) - return -EINVAL; - - /* Allocate TCE table */ - addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, - levels, tce_table_size, &offset, &total_allocated); - - /* addr==NULL means that the first level allocation failed */ - if (!addr) - return -ENOMEM; - - /* - * First level was allocated but some lower level failed as - * we did not allocate as much as we wanted, - * release partially allocated table. - */ - if (offset < tce_table_size) { - pnv_pci_ioda2_table_do_free_pages(addr, - 1ULL << (level_shift - 3), levels - 1); - return -ENOMEM; - } - - /* Setup linux iommu table */ - pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset, - page_shift); - tbl->it_level_size = 1ULL << (level_shift - 3); - tbl->it_indirect_levels = levels - 1; - tbl->it_allocated_size = total_allocated; - - pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n", - window_size, tce_table_size, bus_offset); - - return 0; -} - -static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, - unsigned long size, unsigned level) -{ - const unsigned long addr_ul = (unsigned long) addr & - ~(TCE_PCI_READ | TCE_PCI_WRITE); - - if (level) { - long i; - u64 *tmp = (u64 *) addr_ul; - - for (i = 0; i < size; ++i) { - unsigned long hpa = be64_to_cpu(tmp[i]); - - if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE))) - continue; - - pnv_pci_ioda2_table_do_free_pages(__va(hpa), size, - level - 1); - } - } - - free_pages(addr_ul, get_order(size << 3)); -} - -static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl) -{ - const unsigned long size = tbl->it_indirect_levels ? - tbl->it_level_size : tbl->it_size; - - if (!tbl->it_size) - return; - - pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size, - tbl->it_indirect_levels); -} - static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb) { struct pci_controller *hose = phb->hose; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index b265ecc0836a..13aef2323bbc 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -802,85 +802,6 @@ struct pci_ops pnv_pci_ops = { .write = pnv_pci_write_config, }; -static __be64 *pnv_tce(struct iommu_table *tbl, long idx) -{ - __be64 *tmp = ((__be64 *)tbl->it_base); - int level = tbl->it_indirect_levels; - const long shift = ilog2(tbl->it_level_size); - unsigned long mask = (tbl->it_level_size - 1) << (level * shift); - - while (level) { - int n = (idx & mask) >> (level * shift); - unsigned long tce = be64_to_cpu(tmp[n]); - - tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE)); - idx &= ~mask; - mask >>= shift; - --level; - } - - return tmp + idx; -} - -int pnv_tce_build(struct iommu_table *tbl, long index, long npages, - unsigned long uaddr, enum dma_data_direction direction, - unsigned long attrs) -{ - u64 proto_tce = iommu_direction_to_tce_perm(direction); - u64 rpn = __pa(uaddr) >> tbl->it_page_shift; - long i; - - if (proto_tce & TCE_PCI_WRITE) - proto_tce |= TCE_PCI_READ; - - for (i = 0; i < npages; i++) { - unsigned long newtce = proto_tce | - ((rpn + i) << tbl->it_page_shift); - unsigned long idx = index - tbl->it_offset + i; - - *(pnv_tce(tbl, idx)) = cpu_to_be64(newtce); - } - - return 0; -} - -#ifdef CONFIG_IOMMU_API -int pnv_tce_xchg(struct iommu_table *tbl, long index, - unsigned long *hpa, enum dma_data_direction *direction) -{ - u64 proto_tce = iommu_direction_to_tce_perm(*direction); - unsigned long newtce = *hpa | proto_tce, oldtce; - unsigned long idx = index - tbl->it_offset; - - BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl)); - - if (newtce & TCE_PCI_WRITE) - newtce |= TCE_PCI_READ; - - oldtce = be64_to_cpu(xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce))); - *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE); - *direction = iommu_tce_direction(oldtce); - - return 0; -} -#endif - -void pnv_tce_free(struct iommu_table *tbl, long index, long npages) -{ - long i; - - for (i = 0; i < npages; i++) { - unsigned long idx = index - tbl->it_offset + i; - - *(pnv_tce(tbl, idx)) = cpu_to_be64(0); - } -} - -unsigned long pnv_tce_get(struct iommu_table *tbl, long index) -{ - return be64_to_cpu(*(pnv_tce(tbl, index - tbl->it_offset))); -} - struct iommu_table *pnv_pci_table_alloc(int nid) { struct iommu_table *tbl; @@ -895,85 +816,6 @@ struct iommu_table *pnv_pci_table_alloc(int nid) return tbl; } -long pnv_pci_link_table_and_group(int node, int num, - struct iommu_table *tbl, - struct iommu_table_group *table_group) -{ - struct iommu_table_group_link *tgl = NULL; - - if (WARN_ON(!tbl || !table_group)) - return -EINVAL; - - tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL, - node); - if (!tgl) - return -ENOMEM; - - tgl->table_group = table_group; - list_add_rcu(&tgl->next, &tbl->it_group_list); - - table_group->tables[num] = tbl; - - return 0; -} - -static void pnv_iommu_table_group_link_free(struct rcu_head *head) -{ - struct iommu_table_group_link *tgl = container_of(head, - struct iommu_table_group_link, rcu); - - kfree(tgl); -} - -void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, - struct iommu_table_group *table_group) -{ - long i; - bool found; - struct iommu_table_group_link *tgl; - - if (!tbl || !table_group) - return; - - /* Remove link to a group from table's list of attached groups */ - found = false; - list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { - if (tgl->table_group == table_group) { - list_del_rcu(&tgl->next); - call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free); - found = true; - break; - } - } - if (WARN_ON(!found)) - return; - - /* Clean a pointer to iommu_table in iommu_table_group::tables[] */ - found = false; - for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { - if (table_group->tables[i] == tbl) { - table_group->tables[i] = NULL; - found = true; - break; - } - } - WARN_ON(!found); -} - -void pnv_pci_setup_iommu_table(struct iommu_table *tbl, - void *tce_mem, u64 tce_size, - u64 dma_offset, unsigned page_shift) -{ - tbl->it_blocksize = 16; - tbl->it_base = (unsigned long)tce_mem; - tbl->it_page_shift = page_shift; - tbl->it_offset = dma_offset >> tbl->it_page_shift; - tbl->it_index = 0; - tbl->it_size = tce_size >> 3; - tbl->it_busno = 0; - tbl->it_type = TCE_PCI; -} - void pnv_pci_dma_dev_setup(struct pci_dev *pdev) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index eada4b6068cb..fa90f60e89ce 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -201,13 +201,6 @@ struct pnv_phb { }; extern struct pci_ops pnv_pci_ops; -extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages, - unsigned long uaddr, enum dma_data_direction direction, - unsigned long attrs); -extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages); -extern int pnv_tce_xchg(struct iommu_table *tbl, long index, - unsigned long *hpa, enum dma_data_direction *direction); -extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index); void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, unsigned char *log_buff); @@ -217,14 +210,6 @@ int pnv_pci_cfg_write(struct pci_dn *pdn, int where, int size, u32 val); extern struct iommu_table *pnv_pci_table_alloc(int nid); -extern long pnv_pci_link_table_and_group(int node, int num, - struct iommu_table *tbl, - struct iommu_table_group *table_group); -extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, - struct iommu_table_group *table_group); -extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, - void *tce_mem, u64 tce_size, - u64 dma_offset, unsigned page_shift); extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); extern void pnv_pci_init_npu_phb(struct device_node *np); @@ -272,4 +257,30 @@ extern void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev); /* phb ops (cxl switches these when enabling the kernel api on the phb) */ extern const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops; +/* pci-ioda-tce.c */ +#define POWERNV_IOMMU_DEFAULT_LEVELS 1 +#define POWERNV_IOMMU_MAX_LEVELS 5 + +extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, enum dma_data_direction direction, + unsigned long attrs); +extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages); +extern int pnv_tce_xchg(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction); +extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index); + +extern long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, + __u32 page_shift, __u64 window_size, __u32 levels, + struct iommu_table *tbl); +extern void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl); + +extern long pnv_pci_link_table_and_group(int node, int num, + struct iommu_table *tbl, + struct iommu_table_group *table_group); +extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, + struct iommu_table_group *table_group); +extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, + void *tce_mem, u64 tce_size, + u64 dma_offset, unsigned int page_shift); + #endif /* __POWERNV_PCI_H */ -- cgit From 00a5c58d9499bd0c290b57205f43a70f2e69d3f6 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 4 Jul 2018 16:13:46 +1000 Subject: KVM: PPC: Make iommu_table::it_userspace big endian We are going to reuse multilevel TCE code for the userspace copy of the TCE table and since it is big endian, let's make the copy big endian too. Reviewed-by: David Gibson Signed-off-by: Alexey Kardashevskiy Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 2 +- arch/powerpc/kvm/book3s_64_vio.c | 11 ++++++----- arch/powerpc/kvm/book3s_64_vio_hv.c | 10 +++++----- drivers/vfio/vfio_iommu_spapr_tce.c | 19 +++++++++---------- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 20febe0b7f32..803ac70ecedf 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -117,7 +117,7 @@ struct iommu_table { unsigned long *it_map; /* A simple allocation bitmap for now */ unsigned long it_page_shift;/* table iommu page size */ struct list_head it_group_list;/* List of iommu_table_group_link */ - unsigned long *it_userspace; /* userspace view of the table */ + __be64 *it_userspace; /* userspace view of the table */ struct iommu_table_ops *it_ops; struct kref it_kref; }; diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index d066e37551ec..8b9aaf24b0a2 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -378,19 +378,19 @@ static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm, { struct mm_iommu_table_group_mem_t *mem = NULL; const unsigned long pgsize = 1ULL << tbl->it_page_shift; - unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); if (!pua) /* it_userspace allocation might be delayed */ return H_TOO_HARD; - mem = mm_iommu_lookup(kvm->mm, *pua, pgsize); + mem = mm_iommu_lookup(kvm->mm, be64_to_cpu(*pua), pgsize); if (!mem) return H_TOO_HARD; mm_iommu_mapped_dec(mem); - *pua = 0; + *pua = cpu_to_be64(0); return H_SUCCESS; } @@ -437,7 +437,8 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, enum dma_data_direction dir) { long ret; - unsigned long hpa, *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + unsigned long hpa; + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); struct mm_iommu_table_group_mem_t *mem; if (!pua) @@ -464,7 +465,7 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, if (dir != DMA_NONE) kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry); - *pua = ua; + *pua = cpu_to_be64(ua); return 0; } diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 925fc316a104..236f74b210a7 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -200,7 +200,7 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, { struct mm_iommu_table_group_mem_t *mem = NULL; const unsigned long pgsize = 1ULL << tbl->it_page_shift; - unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); if (!pua) /* it_userspace allocation might be delayed */ @@ -210,13 +210,13 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, if (WARN_ON_ONCE_RM(!pua)) return H_HARDWARE; - mem = mm_iommu_lookup_rm(kvm->mm, *pua, pgsize); + mem = mm_iommu_lookup_rm(kvm->mm, be64_to_cpu(*pua), pgsize); if (!mem) return H_TOO_HARD; mm_iommu_mapped_dec(mem); - *pua = 0; + *pua = cpu_to_be64(0); return H_SUCCESS; } @@ -268,7 +268,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, { long ret; unsigned long hpa = 0; - unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); struct mm_iommu_table_group_mem_t *mem; if (!pua) @@ -302,7 +302,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, if (dir != DMA_NONE) kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry); - *pua = ua; + *pua = cpu_to_be64(ua); return 0; } diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 759a5bdd40e1..8ab124a67311 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -230,7 +230,7 @@ static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl, decrement_locked_vm(mm, cb >> PAGE_SHIFT); return -ENOMEM; } - tbl->it_userspace = uas; + tbl->it_userspace = (__be64 *) uas; return 0; } @@ -482,20 +482,20 @@ static void tce_iommu_unuse_page_v2(struct tce_container *container, struct mm_iommu_table_group_mem_t *mem = NULL; int ret; unsigned long hpa = 0; - unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); if (!pua) return; - ret = tce_iommu_prereg_ua_to_hpa(container, *pua, IOMMU_PAGE_SIZE(tbl), - &hpa, &mem); + ret = tce_iommu_prereg_ua_to_hpa(container, be64_to_cpu(*pua), + IOMMU_PAGE_SIZE(tbl), &hpa, &mem); if (ret) - pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n", - __func__, *pua, entry, ret); + pr_debug("%s: tce %llx at #%lx was not cached, ret=%d\n", + __func__, be64_to_cpu(*pua), entry, ret); if (mem) mm_iommu_mapped_dec(mem); - *pua = 0; + *pua = cpu_to_be64(0); } static int tce_iommu_clear(struct tce_container *container, @@ -607,8 +607,7 @@ static long tce_iommu_build_v2(struct tce_container *container, for (i = 0; i < pages; ++i) { struct mm_iommu_table_group_mem_t *mem = NULL; - unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, - entry + i); + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry + i); ret = tce_iommu_prereg_ua_to_hpa(container, tce, IOMMU_PAGE_SIZE(tbl), &hpa, &mem); @@ -642,7 +641,7 @@ static long tce_iommu_build_v2(struct tce_container *container, if (dirtmp != DMA_NONE) tce_iommu_unuse_page_v2(container, tbl, entry + i); - *pua = tce; + *pua = cpu_to_be64(tce); tce += IOMMU_PAGE_SIZE(tbl); } -- cgit From 090bad39b237aad92d8e01baa033699cf0c81cbe Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 4 Jul 2018 16:13:47 +1000 Subject: powerpc/powernv: Add indirect levels to it_userspace We want to support sparse memory and therefore huge chunks of DMA windows do not need to be mapped. If a DMA window big enough to require 2 or more indirect levels, and a DMA window is used to map all RAM (which is a default case for 64bit window), we can actually save some memory by not allocation TCE for regions which we are not going to map anyway. The hardware tables alreary support indirect levels but we also keep host-physical-to-userspace translation array which is allocated by vmalloc() and is a flat array which might use quite some memory. This converts it_userspace from vmalloc'ed array to a multi level table. As the format becomes platform dependend, this replaces the direct access to it_usespace with a iommu_table_ops::useraddrptr hook which returns a pointer to the userspace copy of a TCE; future extension will return NULL if the level was not allocated. This should not change non-KVM handling of TCE tables and it_userspace will not be allocated for non-KVM tables. Reviewed-by: David Gibson Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 6 +-- arch/powerpc/kvm/book3s_64_vio_hv.c | 8 ---- arch/powerpc/platforms/powernv/pci-ioda-tce.c | 65 +++++++++++++++++++++------ arch/powerpc/platforms/powernv/pci-ioda.c | 23 +++++++--- arch/powerpc/platforms/powernv/pci.h | 3 +- drivers/vfio/vfio_iommu_spapr_tce.c | 46 ------------------- 6 files changed, 73 insertions(+), 78 deletions(-) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 803ac70ecedf..4bdcf22509e6 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -69,6 +69,8 @@ struct iommu_table_ops { long index, unsigned long *hpa, enum dma_data_direction *direction); + + __be64 *(*useraddrptr)(struct iommu_table *tbl, long index); #endif void (*clear)(struct iommu_table *tbl, long index, long npages); @@ -123,9 +125,7 @@ struct iommu_table { }; #define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \ - ((tbl)->it_userspace ? \ - &((tbl)->it_userspace[(entry) - (tbl)->it_offset]) : \ - NULL) + ((tbl)->it_ops->useraddrptr((tbl), (entry))) /* Pure 2^n version of get_order */ static inline __attribute_const__ diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 236f74b210a7..ee98cf6180d7 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -206,10 +206,6 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, /* it_userspace allocation might be delayed */ return H_TOO_HARD; - pua = (void *) vmalloc_to_phys(pua); - if (WARN_ON_ONCE_RM(!pua)) - return H_HARDWARE; - mem = mm_iommu_lookup_rm(kvm->mm, be64_to_cpu(*pua), pgsize); if (!mem) return H_TOO_HARD; @@ -282,10 +278,6 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa))) return H_HARDWARE; - pua = (void *) vmalloc_to_phys(pua); - if (WARN_ON_ONCE_RM(!pua)) - return H_HARDWARE; - if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem))) return H_CLOSED; diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c index 726b8693f5ae..88cecc1815d9 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c @@ -31,9 +31,9 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl, tbl->it_type = TCE_PCI; } -static __be64 *pnv_tce(struct iommu_table *tbl, long idx) +static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx) { - __be64 *tmp = ((__be64 *)tbl->it_base); + __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base; int level = tbl->it_indirect_levels; const long shift = ilog2(tbl->it_level_size); unsigned long mask = (tbl->it_level_size - 1) << (level * shift); @@ -67,7 +67,7 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages, ((rpn + i) << tbl->it_page_shift); unsigned long idx = index - tbl->it_offset + i; - *(pnv_tce(tbl, idx)) = cpu_to_be64(newtce); + *(pnv_tce(tbl, false, idx)) = cpu_to_be64(newtce); } return 0; @@ -86,12 +86,21 @@ int pnv_tce_xchg(struct iommu_table *tbl, long index, if (newtce & TCE_PCI_WRITE) newtce |= TCE_PCI_READ; - oldtce = be64_to_cpu(xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce))); + oldtce = be64_to_cpu(xchg(pnv_tce(tbl, false, idx), + cpu_to_be64(newtce))); *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE); *direction = iommu_tce_direction(oldtce); return 0; } + +__be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index) +{ + if (WARN_ON_ONCE(!tbl->it_userspace)) + return NULL; + + return pnv_tce(tbl, true, index - tbl->it_offset); +} #endif void pnv_tce_free(struct iommu_table *tbl, long index, long npages) @@ -101,13 +110,15 @@ void pnv_tce_free(struct iommu_table *tbl, long index, long npages) for (i = 0; i < npages; i++) { unsigned long idx = index - tbl->it_offset + i; - *(pnv_tce(tbl, idx)) = cpu_to_be64(0); + *(pnv_tce(tbl, false, idx)) = cpu_to_be64(0); } } unsigned long pnv_tce_get(struct iommu_table *tbl, long index) { - return be64_to_cpu(*(pnv_tce(tbl, index - tbl->it_offset))); + __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset); + + return be64_to_cpu(*ptce); } static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, @@ -144,6 +155,10 @@ void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl) pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size, tbl->it_indirect_levels); + if (tbl->it_userspace) { + pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size, + tbl->it_indirect_levels); + } } static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift, @@ -191,10 +206,11 @@ static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift, long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, __u32 page_shift, __u64 window_size, __u32 levels, - struct iommu_table *tbl) + bool alloc_userspace_copy, struct iommu_table *tbl) { - void *addr; + void *addr, *uas = NULL; unsigned long offset = 0, level_shift, total_allocated = 0; + unsigned long total_allocated_uas = 0; const unsigned int window_shift = ilog2(window_size); unsigned int entries_shift = window_shift - page_shift; unsigned int table_shift = max_t(unsigned int, entries_shift + 3, @@ -228,10 +244,20 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, * we did not allocate as much as we wanted, * release partially allocated table. */ - if (offset < tce_table_size) { - pnv_pci_ioda2_table_do_free_pages(addr, - 1ULL << (level_shift - 3), levels - 1); - return -ENOMEM; + if (offset < tce_table_size) + goto free_tces_exit; + + /* Allocate userspace view of the TCE table */ + if (alloc_userspace_copy) { + offset = 0; + uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, + levels, tce_table_size, &offset, + &total_allocated_uas); + if (!uas) + goto free_tces_exit; + if (offset < tce_table_size || + total_allocated_uas != total_allocated) + goto free_uas_exit; } /* Setup linux iommu table */ @@ -240,11 +266,22 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, tbl->it_level_size = 1ULL << (level_shift - 3); tbl->it_indirect_levels = levels - 1; tbl->it_allocated_size = total_allocated; + tbl->it_userspace = uas; - pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n", - window_size, tce_table_size, bus_offset); + pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d\n", + window_size, tce_table_size, bus_offset, tbl->it_base, + tbl->it_userspace, levels); return 0; + +free_uas_exit: + pnv_pci_ioda2_table_do_free_pages(uas, + 1ULL << (level_shift - 3), levels - 1); +free_tces_exit: + pnv_pci_ioda2_table_do_free_pages(addr, + 1ULL << (level_shift - 3), levels - 1); + + return -ENOMEM; } static void pnv_iommu_table_group_link_free(struct rcu_head *head) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 4abf1175626e..fc38f06ee41d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2036,6 +2036,7 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { #ifdef CONFIG_IOMMU_API .exchange = pnv_ioda1_tce_xchg, .exchange_rm = pnv_ioda1_tce_xchg_rm, + .useraddrptr = pnv_tce_useraddrptr, #endif .clear = pnv_ioda1_tce_free, .get = pnv_tce_get, @@ -2200,6 +2201,7 @@ static struct iommu_table_ops pnv_ioda2_iommu_ops = { #ifdef CONFIG_IOMMU_API .exchange = pnv_ioda2_tce_xchg, .exchange_rm = pnv_ioda2_tce_xchg_rm, + .useraddrptr = pnv_tce_useraddrptr, #endif .clear = pnv_ioda2_tce_free, .get = pnv_tce_get, @@ -2455,7 +2457,7 @@ void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, int num, __u32 page_shift, __u64 window_size, __u32 levels, - struct iommu_table **ptbl) + bool alloc_userspace_copy, struct iommu_table **ptbl) { struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, table_group); @@ -2472,7 +2474,7 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, ret = pnv_pci_ioda2_table_alloc_pages(nid, bus_offset, page_shift, window_size, - levels, tbl); + levels, alloc_userspace_copy, tbl); if (ret) { iommu_tce_table_put(tbl); return ret; @@ -2505,7 +2507,7 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, IOMMU_PAGE_SHIFT_4K, window_size, - POWERNV_IOMMU_DEFAULT_LEVELS, &tbl); + POWERNV_IOMMU_DEFAULT_LEVELS, false, &tbl); if (rc) { pe_err(pe, "Failed to create 32-bit TCE table, err %ld", rc); @@ -2592,7 +2594,16 @@ static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, tce_table_size, direct_table_size); } - return bytes; + return bytes + bytes; /* one for HW table, one for userspace copy */ +} + +static long pnv_pci_ioda2_create_table_userspace( + struct iommu_table_group *table_group, + int num, __u32 page_shift, __u64 window_size, __u32 levels, + struct iommu_table **ptbl) +{ + return pnv_pci_ioda2_create_table(table_group, + num, page_shift, window_size, levels, true, ptbl); } static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) @@ -2621,7 +2632,7 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) static struct iommu_table_group_ops pnv_pci_ioda2_ops = { .get_table_size = pnv_pci_ioda2_get_table_size, - .create_table = pnv_pci_ioda2_create_table, + .create_table = pnv_pci_ioda2_create_table_userspace, .set_window = pnv_pci_ioda2_set_window, .unset_window = pnv_pci_ioda2_unset_window, .take_ownership = pnv_ioda2_take_ownership, @@ -2726,7 +2737,7 @@ static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group) static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = { .get_table_size = pnv_pci_ioda2_get_table_size, - .create_table = pnv_pci_ioda2_create_table, + .create_table = pnv_pci_ioda2_create_table_userspace, .set_window = pnv_pci_ioda2_npu_set_window, .unset_window = pnv_pci_ioda2_npu_unset_window, .take_ownership = pnv_ioda2_npu_take_ownership, diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index fa90f60e89ce..2962f6ddb2a8 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -267,11 +267,12 @@ extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages, extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages); extern int pnv_tce_xchg(struct iommu_table *tbl, long index, unsigned long *hpa, enum dma_data_direction *direction); +extern __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index); extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index); extern long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, __u32 page_shift, __u64 window_size, __u32 levels, - struct iommu_table *tbl); + bool alloc_userspace_copy, struct iommu_table *tbl); extern void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl); extern long pnv_pci_link_table_and_group(int node, int num, diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 8ab124a67311..54ae6c2be1b7 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -211,44 +211,6 @@ static long tce_iommu_register_pages(struct tce_container *container, return 0; } -static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl, - struct mm_struct *mm) -{ - unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) * - tbl->it_size, PAGE_SIZE); - unsigned long *uas; - long ret; - - BUG_ON(tbl->it_userspace); - - ret = try_increment_locked_vm(mm, cb >> PAGE_SHIFT); - if (ret) - return ret; - - uas = vzalloc(cb); - if (!uas) { - decrement_locked_vm(mm, cb >> PAGE_SHIFT); - return -ENOMEM; - } - tbl->it_userspace = (__be64 *) uas; - - return 0; -} - -static void tce_iommu_userspace_view_free(struct iommu_table *tbl, - struct mm_struct *mm) -{ - unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) * - tbl->it_size, PAGE_SIZE); - - if (!tbl->it_userspace) - return; - - vfree(tbl->it_userspace); - tbl->it_userspace = NULL; - decrement_locked_vm(mm, cb >> PAGE_SHIFT); -} - static bool tce_page_is_contained(struct page *page, unsigned page_shift) { /* @@ -599,12 +561,6 @@ static long tce_iommu_build_v2(struct tce_container *container, unsigned long hpa; enum dma_data_direction dirtmp; - if (!tbl->it_userspace) { - ret = tce_iommu_userspace_view_alloc(tbl, container->mm); - if (ret) - return ret; - } - for (i = 0; i < pages; ++i) { struct mm_iommu_table_group_mem_t *mem = NULL; __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry + i); @@ -685,7 +641,6 @@ static void tce_iommu_free_table(struct tce_container *container, { unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT; - tce_iommu_userspace_view_free(tbl, container->mm); iommu_tce_table_put(tbl); decrement_locked_vm(container->mm, pages); } @@ -1200,7 +1155,6 @@ static void tce_iommu_release_ownership(struct tce_container *container, continue; tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); - tce_iommu_userspace_view_free(tbl, container->mm); if (tbl->it_map) iommu_release_ownership(tbl); -- cgit From 9bc98c8a43c4900ee63b160f805c65051e35d917 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 4 Jul 2018 16:13:48 +1000 Subject: powerpc/powernv: Rework TCE level allocation This moves actual pages allocation to a separate function which is going to be reused later in on-demand TCE allocation. While we are at it, remove unnecessary level size round up as the caller does this already. Reviewed-by: David Gibson Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda-tce.c | 30 +++++++++++++++++---------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c index 88cecc1815d9..123c49925b46 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c @@ -31,6 +31,23 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl, tbl->it_type = TCE_PCI; } +static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift) +{ + struct page *tce_mem = NULL; + __be64 *addr; + + tce_mem = alloc_pages_node(nid, GFP_KERNEL, shift - PAGE_SHIFT); + if (!tce_mem) { + pr_err("Failed to allocate a TCE memory, level shift=%d\n", + shift); + return NULL; + } + addr = page_address(tce_mem); + memset(addr, 0, 1UL << shift); + + return addr; +} + static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx) { __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base; @@ -165,21 +182,12 @@ static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift, unsigned int levels, unsigned long limit, unsigned long *current_offset, unsigned long *total_allocated) { - struct page *tce_mem = NULL; __be64 *addr, *tmp; - unsigned int order = max_t(unsigned int, shift, PAGE_SHIFT) - - PAGE_SHIFT; - unsigned long allocated = 1UL << (order + PAGE_SHIFT); + unsigned long allocated = 1UL << shift; unsigned int entries = 1UL << (shift - 3); long i; - tce_mem = alloc_pages_node(nid, GFP_KERNEL, order); - if (!tce_mem) { - pr_err("Failed to allocate a TCE memory, order=%d\n", order); - return NULL; - } - addr = page_address(tce_mem); - memset(addr, 0, allocated); + addr = pnv_alloc_tce_level(nid, shift); *total_allocated += allocated; --levels; -- cgit From a68bd1267b7286b1687905651b404e765046de25 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 4 Jul 2018 16:13:49 +1000 Subject: powerpc/powernv/ioda: Allocate indirect TCE levels on demand At the moment we allocate the entire TCE table, twice (hardware part and userspace translation cache). This normally works as we normally have contigous memory and the guest will map entire RAM for 64bit DMA. However if we have sparse RAM (one example is a memory device), then we will allocate TCEs which will never be used as the guest only maps actual memory for DMA. If it is a single level TCE table, there is nothing we can really do but if it a multilevel table, we can skip allocating TCEs we know we won't need. This adds ability to allocate only first level, saving memory. This changes iommu_table::free() to avoid allocating of an extra level; iommu_table::set() will do this when needed. This adds @alloc parameter to iommu_table::exchange() to tell the callback if it can allocate an extra level; the flag is set to "false" for the realmode KVM handlers of H_PUT_TCE hcalls and the callback returns H_TOO_HARD. This still requires the entire table to be counted in mm::locked_vm. To be conservative, this only does on-demand allocation when the usespace cache table is requested which is the case of VFIO. The example math for a system replicating a powernv setup with NVLink2 in a guest: 16GB RAM mapped at 0x0 128GB GPU RAM window (16GB of actual RAM) mapped at 0x244000000000 the table to cover that all with 64K pages takes: (((0x244000000000 + 0x2000000000) >> 16)*8)>>20 = 4556MB If we allocate only necessary TCE levels, we will only need: (((0x400000000 + 0x400000000) >> 16)*8)>>20 = 4MB (plus some for indirect levels). Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 7 ++- arch/powerpc/kvm/book3s_64_vio_hv.c | 4 +- arch/powerpc/platforms/powernv/pci-ioda-tce.c | 73 +++++++++++++++++++++------ arch/powerpc/platforms/powernv/pci-ioda.c | 8 +-- arch/powerpc/platforms/powernv/pci.h | 6 ++- drivers/vfio/vfio_iommu_spapr_tce.c | 2 +- 6 files changed, 73 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 4bdcf22509e6..daa3ee5d7ad2 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -70,7 +70,7 @@ struct iommu_table_ops { unsigned long *hpa, enum dma_data_direction *direction); - __be64 *(*useraddrptr)(struct iommu_table *tbl, long index); + __be64 *(*useraddrptr)(struct iommu_table *tbl, long index, bool alloc); #endif void (*clear)(struct iommu_table *tbl, long index, long npages); @@ -122,10 +122,13 @@ struct iommu_table { __be64 *it_userspace; /* userspace view of the table */ struct iommu_table_ops *it_ops; struct kref it_kref; + int it_nid; }; +#define IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry) \ + ((tbl)->it_ops->useraddrptr((tbl), (entry), false)) #define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \ - ((tbl)->it_ops->useraddrptr((tbl), (entry))) + ((tbl)->it_ops->useraddrptr((tbl), (entry), true)) /* Pure 2^n version of get_order */ static inline __attribute_const__ diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index ee98cf6180d7..d4bcd1b17b09 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -200,7 +200,7 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, { struct mm_iommu_table_group_mem_t *mem = NULL; const unsigned long pgsize = 1ULL << tbl->it_page_shift; - __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry); if (!pua) /* it_userspace allocation might be delayed */ @@ -264,7 +264,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, { long ret; unsigned long hpa = 0; - __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry); struct mm_iommu_table_group_mem_t *mem; if (!pua) diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c index 123c49925b46..6c5db1acbe8d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c @@ -48,7 +48,7 @@ static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift) return addr; } -static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx) +static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc) { __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base; int level = tbl->it_indirect_levels; @@ -57,7 +57,23 @@ static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx) while (level) { int n = (idx & mask) >> (level * shift); - unsigned long tce = be64_to_cpu(tmp[n]); + unsigned long tce; + + if (tmp[n] == 0) { + __be64 *tmp2; + + if (!alloc) + return NULL; + + tmp2 = pnv_alloc_tce_level(tbl->it_nid, + ilog2(tbl->it_level_size) + 3); + if (!tmp2) + return NULL; + + tmp[n] = cpu_to_be64(__pa(tmp2) | + TCE_PCI_READ | TCE_PCI_WRITE); + } + tce = be64_to_cpu(tmp[n]); tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE)); idx &= ~mask; @@ -84,7 +100,7 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages, ((rpn + i) << tbl->it_page_shift); unsigned long idx = index - tbl->it_offset + i; - *(pnv_tce(tbl, false, idx)) = cpu_to_be64(newtce); + *(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce); } return 0; @@ -92,31 +108,46 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages, #ifdef CONFIG_IOMMU_API int pnv_tce_xchg(struct iommu_table *tbl, long index, - unsigned long *hpa, enum dma_data_direction *direction) + unsigned long *hpa, enum dma_data_direction *direction, + bool alloc) { u64 proto_tce = iommu_direction_to_tce_perm(*direction); unsigned long newtce = *hpa | proto_tce, oldtce; unsigned long idx = index - tbl->it_offset; + __be64 *ptce = NULL; BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl)); + if (*direction == DMA_NONE) { + ptce = pnv_tce(tbl, false, idx, false); + if (!ptce) { + *hpa = 0; + return 0; + } + } + + if (!ptce) { + ptce = pnv_tce(tbl, false, idx, alloc); + if (!ptce) + return alloc ? H_HARDWARE : H_TOO_HARD; + } + if (newtce & TCE_PCI_WRITE) newtce |= TCE_PCI_READ; - oldtce = be64_to_cpu(xchg(pnv_tce(tbl, false, idx), - cpu_to_be64(newtce))); + oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce))); *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE); *direction = iommu_tce_direction(oldtce); return 0; } -__be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index) +__be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc) { if (WARN_ON_ONCE(!tbl->it_userspace)) return NULL; - return pnv_tce(tbl, true, index - tbl->it_offset); + return pnv_tce(tbl, true, index - tbl->it_offset, alloc); } #endif @@ -126,14 +157,19 @@ void pnv_tce_free(struct iommu_table *tbl, long index, long npages) for (i = 0; i < npages; i++) { unsigned long idx = index - tbl->it_offset + i; + __be64 *ptce = pnv_tce(tbl, false, idx, false); - *(pnv_tce(tbl, false, idx)) = cpu_to_be64(0); + if (ptce) + *ptce = cpu_to_be64(0); } } unsigned long pnv_tce_get(struct iommu_table *tbl, long index) { - __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset); + __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false); + + if (!ptce) + return 0; return be64_to_cpu(*ptce); } @@ -224,6 +260,7 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, unsigned int table_shift = max_t(unsigned int, entries_shift + 3, PAGE_SHIFT); const unsigned long tce_table_size = 1UL << table_shift; + unsigned int tmplevels = levels; if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS)) return -EINVAL; @@ -231,6 +268,9 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, if (!is_power_of_2(window_size)) return -EINVAL; + if (alloc_userspace_copy && (window_size > (1ULL << 32))) + tmplevels = 1; + /* Adjust direct table size from window_size and levels */ entries_shift = (entries_shift + levels - 1) / levels; level_shift = entries_shift + 3; @@ -241,7 +281,7 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, /* Allocate TCE table */ addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, - levels, tce_table_size, &offset, &total_allocated); + tmplevels, tce_table_size, &offset, &total_allocated); /* addr==NULL means that the first level allocation failed */ if (!addr) @@ -252,7 +292,7 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, * we did not allocate as much as we wanted, * release partially allocated table. */ - if (offset < tce_table_size) + if (tmplevels == levels && offset < tce_table_size) goto free_tces_exit; /* Allocate userspace view of the TCE table */ @@ -263,8 +303,8 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, &total_allocated_uas); if (!uas) goto free_tces_exit; - if (offset < tce_table_size || - total_allocated_uas != total_allocated) + if (tmplevels == levels && (offset < tce_table_size || + total_allocated_uas != total_allocated)) goto free_uas_exit; } @@ -275,10 +315,11 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, tbl->it_indirect_levels = levels - 1; tbl->it_allocated_size = total_allocated; tbl->it_userspace = uas; + tbl->it_nid = nid; - pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d\n", + pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n", window_size, tce_table_size, bus_offset, tbl->it_base, - tbl->it_userspace, levels); + tbl->it_userspace, tmplevels, levels); return 0; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index fc38f06ee41d..b4475f71a0b4 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2003,7 +2003,7 @@ static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index, static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index, unsigned long *hpa, enum dma_data_direction *direction) { - long ret = pnv_tce_xchg(tbl, index, hpa, direction); + long ret = pnv_tce_xchg(tbl, index, hpa, direction, true); if (!ret) pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, false); @@ -2014,7 +2014,7 @@ static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index, static int pnv_ioda1_tce_xchg_rm(struct iommu_table *tbl, long index, unsigned long *hpa, enum dma_data_direction *direction) { - long ret = pnv_tce_xchg(tbl, index, hpa, direction); + long ret = pnv_tce_xchg(tbl, index, hpa, direction, false); if (!ret) pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, true); @@ -2168,7 +2168,7 @@ static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index, unsigned long *hpa, enum dma_data_direction *direction) { - long ret = pnv_tce_xchg(tbl, index, hpa, direction); + long ret = pnv_tce_xchg(tbl, index, hpa, direction, true); if (!ret) pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false); @@ -2179,7 +2179,7 @@ static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index, static int pnv_ioda2_tce_xchg_rm(struct iommu_table *tbl, long index, unsigned long *hpa, enum dma_data_direction *direction) { - long ret = pnv_tce_xchg(tbl, index, hpa, direction); + long ret = pnv_tce_xchg(tbl, index, hpa, direction, false); if (!ret) pnv_pci_ioda2_tce_invalidate(tbl, index, 1, true); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 2962f6ddb2a8..0020937fc694 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -266,8 +266,10 @@ extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long attrs); extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages); extern int pnv_tce_xchg(struct iommu_table *tbl, long index, - unsigned long *hpa, enum dma_data_direction *direction); -extern __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index); + unsigned long *hpa, enum dma_data_direction *direction, + bool alloc); +extern __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, + bool alloc); extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index); extern long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 54ae6c2be1b7..11a4c194d6e3 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -631,7 +631,7 @@ static long tce_iommu_create_table(struct tce_container *container, page_shift, window_size, levels, ptbl); WARN_ON(!ret && !(*ptbl)->it_ops->free); - WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size)); + WARN_ON(!ret && ((*ptbl)->it_allocated_size > table_size)); return ret; } -- cgit From 8c8c10b90d88bfaad41ea34df17bf6f18d00f09d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 13 Jul 2018 13:10:47 +0000 Subject: powerpc/8xx: fix handling of early NULL pointer dereference NULL pointers are pointers to user memory space. So user pagetable has to be set in order to avoid random behaviour in case of NULL pointer dereference, otherwise we may encounter random memory access hence Machine Check Exception from TLB Miss handlers. Set user pagetable as early as possible in order to properly catch early kernel NULL pointer dereference. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/head_8xx.S | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 6cab07e76732..04501b65040e 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -873,6 +873,10 @@ start_here: li r0,0 stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + lis r6, swapper_pg_dir@ha + tophys(r6,r6) + mtspr SPRN_M_TW, r6 + bl early_init /* We have to do this with MMU on */ /* @@ -893,9 +897,6 @@ start_here: * init's THREAD like the context switch code does, but this is * easier......until someone changes init's static structures. */ - lis r6, swapper_pg_dir@ha - tophys(r6,r6) - mtspr SPRN_M_TW, r6 lis r4,2f@h ori r4,r4,2f@l tophys(r4,r4) -- cgit From a8bf9e504a6a2bb058b4f020d4ccc54b4157eb4a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 13 Jul 2018 21:27:48 -0700 Subject: chrp/nvram.c: add MODULE_LICENSE() Add MODULE_LICENSE() to the chrp nvram.c driver to fix the build warning message: WARNING: modpost: missing MODULE_LICENSE() in arch/powerpc/platforms/chrp/nvram.o Signed-off-by: Randy Dunlap Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/chrp/nvram.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/platforms/chrp/nvram.c b/arch/powerpc/platforms/chrp/nvram.c index c3ede2c365c3..791b86398e1d 100644 --- a/arch/powerpc/platforms/chrp/nvram.c +++ b/arch/powerpc/platforms/chrp/nvram.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include @@ -89,3 +90,5 @@ void __init chrp_nvram_init(void) return; } + +MODULE_LICENSE("GPL v2"); -- cgit From fdf743c5c51d5b45db4dada39109549d2e49eb62 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 17 Jul 2018 14:24:30 +1000 Subject: powerpc/hugetlbpage: Rmove unhelpful HUGEPD_*_SHIFT macros The HUGEPD_*_SHIFT macros are always defined to be PGDIR_SHIFT and PUD_SHIFT, and have to have those values to work properly. They once used to have different values, but that was really only because they were used to mean different things in different contexts. 6fa50483 "powerpc/mm/hugetlb: initialize the pagetable cache correctly for hugetlb" removed that double meaning, but left the now useless constants. Signed-off-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hugetlbpage.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index f425b5b37d58..e87f9ef9115b 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -117,15 +117,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, return 0; } -/* - * These macros define how to determine which level of the page table holds - * the hpdp. - */ -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) -#define HUGEPD_PGD_SHIFT PGDIR_SHIFT -#define HUGEPD_PUD_SHIFT PUD_SHIFT -#endif - /* * At this point we do the placement change only for BOOK3S 64. This would * possibly work on other subarchs. @@ -174,13 +165,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz } } #else - if (pshift >= HUGEPD_PGD_SHIFT) { + if (pshift >= PGDIR_SHIFT) { ptl = &mm->page_table_lock; hpdp = (hugepd_t *)pg; } else { pdshift = PUD_SHIFT; pu = pud_alloc(mm, pg, addr); - if (pshift >= HUGEPD_PUD_SHIFT) { + if (pshift >= PUD_SHIFT) { ptl = pud_lockptr(mm, pu); hpdp = (hugepd_t *)pu; } else { @@ -692,9 +683,9 @@ static int __init hugetlbpage_init(void) else pdshift = PMD_SHIFT; #else - if (shift < HUGEPD_PUD_SHIFT) + if (shift < PUD_SHIFT) pdshift = PMD_SHIFT; - else if (shift < HUGEPD_PGD_SHIFT) + else if (shift < PGDIR_SHIFT) pdshift = PUD_SHIFT; else pdshift = PGDIR_SHIFT; -- cgit From fca7bf946e31688edce446fdaa571ccf77e24687 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Fri, 29 Jun 2018 15:54:32 +0530 Subject: powerpc/mpic: Pass first free vector number to mpic_setup_error_int() Update the comment to account for the spurious interrupt number. The code was already accounting for it, but that was unclear because it was achieved by mpic_setup_error_int() knowing that the number it was passed was the last used vector, rather than the first free vector. So change the meaning of the argument to the first free vector and update the caller to pass 13, instead of 12, to achieve the same result. Signed-off-by: Bharat Bhushan [mpe: Rewrite change log] Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/fsl_mpic_err.c | 2 +- arch/powerpc/sysdev/mpic.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_mpic_err.c b/arch/powerpc/sysdev/fsl_mpic_err.c index 488ec453038a..2a98837dc6ba 100644 --- a/arch/powerpc/sysdev/fsl_mpic_err.c +++ b/arch/powerpc/sysdev/fsl_mpic_err.c @@ -76,7 +76,7 @@ int mpic_setup_error_int(struct mpic *mpic, int intvec) mpic->flags |= MPIC_FSL_HAS_EIMR; /* allocate interrupt vectors for error interrupts */ for (i = MPIC_MAX_ERR - 1; i >= 0; i--) - mpic->err_int_vecs[i] = --intvec; + mpic->err_int_vecs[i] = intvec--; return 0; } diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 353b43972bbf..934a77324f6b 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1380,12 +1380,12 @@ struct mpic * __init mpic_alloc(struct device_node *node, * global vector number space, as in case of ipis * and timer interrupts. * - * Available vector space = intvec_top - 12, where 12 + * Available vector space = intvec_top - 13, where 13 * is the number of vectors which have been consumed by - * ipis and timer interrupts. + * ipis, timer interrupts and spurious. */ if (fsl_version >= 0x401) { - ret = mpic_setup_error_int(mpic, intvec_top - 12); + ret = mpic_setup_error_int(mpic, intvec_top - 13); if (ret) return NULL; } -- cgit From d69ccc00c497a4d81fca2dca9bda9f7799971b38 Mon Sep 17 00:00:00 2001 From: Michael Hanselmann Date: Mon, 29 Jan 2018 22:40:09 +0000 Subject: MAINTAINERS: Remove the entry for the orphaned ams driver I no longer have any hardware with the Apple motion sensor and thus relinquish maintainership of the driver. Remove the maintainers entry entirely, meaning the code will now fall under "LINUX FOR POWER MACINTOSH". Signed-off-by: Michael Hanselmann [mpe: Drop the entry entirely, munge change log] Signed-off-by: Michael Ellerman --- MAINTAINERS | 5 ----- 1 file changed, 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 07d1576fc766..50e398a463f9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -791,11 +791,6 @@ S: Supported F: drivers/net/ethernet/amd/xgbe/ F: arch/arm64/boot/dts/amd/amd-seattle-xgbe*.dtsi -AMS (Apple Motion Sensor) DRIVER -M: Michael Hanselmann -S: Supported -F: drivers/macintosh/ams/ - ANALOG DEVICES INC AD5686 DRIVER M: Stefan Popa L: linux-pm@vger.kernel.org -- cgit From 99c3ce33a00bc40cb218af770ef00c82c8044c36 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Tue, 17 Apr 2018 19:11:29 +1000 Subject: powerpc/powernv/npu: Add a debugfs setting to change ATSD threshold The threshold at which it becomes more efficient to coalesce a range of ATSDs into a single per-PID ATSD is currently not well understood due to a lack of real-world work loads. This patch adds a debugfs parameter allowing the threshold to be altered at runtime in order to aid future development and refinement of the value. Signed-off-by: Alistair Popple Acked-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/npu-dma.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 8cdf91f5d3a4..458315319bd3 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -17,7 +17,9 @@ #include #include #include +#include +#include #include #include #include @@ -44,7 +46,8 @@ static DEFINE_SPINLOCK(npu_context_lock); * entire TLB on the GPU for the given PID rather than each specific address in * the range. */ -#define ATSD_THRESHOLD (2*1024*1024) +static uint64_t atsd_threshold = 2 * 1024 * 1024; +static struct dentry *atsd_threshold_dentry; /* * Other types of TCE cache invalidation are not functional in the @@ -683,7 +686,7 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, struct npu_context *npu_context = mn_to_npu_context(mn); unsigned long address; - if (end - start > ATSD_THRESHOLD) { + if (end - start > atsd_threshold) { /* * Just invalidate the entire PID if the address range is too * large. @@ -958,6 +961,11 @@ int pnv_npu2_init(struct pnv_phb *phb) static int npu_index; uint64_t rc = 0; + if (!atsd_threshold_dentry) { + atsd_threshold_dentry = debugfs_create_x64("atsd_threshold", + 0600, powerpc_debugfs_root, &atsd_threshold); + } + phb->npu.nmmu_flush = of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush"); for_each_child_of_node(phb->hose->dn, dn) { -- cgit From ef6cb5f1a048fdf91ccee6d63d2bfa293338502d Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Wed, 4 Jul 2018 20:58:33 +0530 Subject: cxl: Fix wrong comparison in cxl_adapter_context_get() Function atomic_inc_unless_negative() returns a bool to indicate success/failure. However cxl_adapter_context_get() wrongly compares the return value against '>=0' which will always be true. The patch fixes this comparison to '==0' there by also fixing this compile time warning: drivers/misc/cxl/main.c:290 cxl_adapter_context_get() warn: 'atomic_inc_unless_negative(&adapter->contexts_num)' is unsigned Fixes: 70b565bbdb91 ("cxl: Prevent adapter reset if an active context exists") Cc: stable@vger.kernel.org # v4.9+ Reported-by: Dan Carpenter Signed-off-by: Vaibhav Jain Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman --- drivers/misc/cxl/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 334223b802ee..f35406be465a 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -282,7 +282,7 @@ int cxl_adapter_context_get(struct cxl *adapter) int rc; rc = atomic_inc_unless_negative(&adapter->contexts_num); - return rc >= 0 ? 0 : -EBUSY; + return rc ? 0 : -EBUSY; } void cxl_adapter_context_put(struct cxl *adapter) -- cgit From 48e91846865cd61bfdb701eb737de222d7f08c74 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Wed, 18 Jul 2018 15:08:33 -0700 Subject: powerpc/ps3: Set driver coherent_dma_mask Set the coherent_dma_mask for the PS3 ehci, ohci, and snd devices. Silences WARN_ON_ONCE messages emitted by the dma_alloc_attrs() routine. Reported-by: Fredrik Noring Signed-off-by: Geoff Levand Acked-by: Greg Kroah-Hartman Acked-by: Alan Stern Signed-off-by: Michael Ellerman --- drivers/usb/host/ehci-ps3.c | 6 ++++-- drivers/usb/host/ohci-ps3.c | 6 ++++-- sound/ppc/snd_ps3.c | 5 +++++ 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/ehci-ps3.c b/drivers/usb/host/ehci-ps3.c index 8c733492d8fe..454d8c624a3f 100644 --- a/drivers/usb/host/ehci-ps3.c +++ b/drivers/usb/host/ehci-ps3.c @@ -86,7 +86,7 @@ static int ps3_ehci_probe(struct ps3_system_bus_device *dev) int result; struct usb_hcd *hcd; unsigned int virq; - static u64 dummy_mask = DMA_BIT_MASK(32); + static u64 dummy_mask; if (usb_disabled()) { result = -ENODEV; @@ -131,7 +131,9 @@ static int ps3_ehci_probe(struct ps3_system_bus_device *dev) goto fail_irq; } - dev->core.dma_mask = &dummy_mask; /* FIXME: for improper usb code */ + dummy_mask = DMA_BIT_MASK(32); + dev->core.dma_mask = &dummy_mask; + dma_set_coherent_mask(&dev->core, dummy_mask); hcd = usb_create_hcd(&ps3_ehci_hc_driver, &dev->core, dev_name(&dev->core)); diff --git a/drivers/usb/host/ohci-ps3.c b/drivers/usb/host/ohci-ps3.c index 20a23d795adf..395f9d3bc849 100644 --- a/drivers/usb/host/ohci-ps3.c +++ b/drivers/usb/host/ohci-ps3.c @@ -69,7 +69,7 @@ static int ps3_ohci_probe(struct ps3_system_bus_device *dev) int result; struct usb_hcd *hcd; unsigned int virq; - static u64 dummy_mask = DMA_BIT_MASK(32); + static u64 dummy_mask; if (usb_disabled()) { result = -ENODEV; @@ -115,7 +115,9 @@ static int ps3_ohci_probe(struct ps3_system_bus_device *dev) goto fail_irq; } - dev->core.dma_mask = &dummy_mask; /* FIXME: for improper usb code */ + dummy_mask = DMA_BIT_MASK(32); + dev->core.dma_mask = &dummy_mask; + dma_set_coherent_mask(&dev->core, dummy_mask); hcd = usb_create_hcd(&ps3_ohci_hc_driver, &dev->core, dev_name(&dev->core)); diff --git a/sound/ppc/snd_ps3.c b/sound/ppc/snd_ps3.c index 36f34f434ecb..abe031c9d592 100644 --- a/sound/ppc/snd_ps3.c +++ b/sound/ppc/snd_ps3.c @@ -930,6 +930,7 @@ static int snd_ps3_driver_probe(struct ps3_system_bus_device *dev) { int i, ret; u64 lpar_addr, lpar_size; + static u64 dummy_mask; if (WARN_ON(!firmware_has_feature(FW_FEATURE_PS3_LV1))) return -ENODEV; @@ -970,6 +971,10 @@ static int snd_ps3_driver_probe(struct ps3_system_bus_device *dev) goto clean_mmio; } + dummy_mask = DMA_BIT_MASK(32); + dev->core.dma_mask = &dummy_mask; + dma_set_coherent_mask(&dev->core, dummy_mask); + snd_ps3_audio_set_base_addr(dev->d_region->bus_addr); /* CONFIG_SND_PS3_DEFAULT_START_DELAY */ -- cgit From ec9336396a98f61290f45e8bb942458a1b2f299b Mon Sep 17 00:00:00 2001 From: Murilo Opsfelder Araujo Date: Wed, 18 Jul 2018 13:15:44 -0300 Subject: powerpc/prom_init: Remove linux,stdout-package property This property was added in 2004 and the only use of it, which was already inside `#if 0`, was removed a month later. Signed-off-by: Murilo Opsfelder Araujo Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom_init.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 5425dd3d6a9f..c45fb463c9e5 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2102,8 +2102,6 @@ static void __init prom_init_stdout(void) stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout); if (stdout_node != PROM_ERROR) { val = cpu_to_be32(stdout_node); - prom_setprop(prom.chosen, "/chosen", "linux,stdout-package", - &val, sizeof(val)); /* If it's a display, note it */ memset(type, 0, sizeof(type)); -- cgit From 83039f22ba2f6aff935a2acbb6bf671374e8317d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 11 Jul 2018 17:10:15 +1000 Subject: selftests/powerpc: Remove Power9 copy_unaligned test This is a test of the ISA 3.0 "copy" instruction. That instruction has an L field, which if set to 1 specifies that "the instruction identifies the beginning of a move group" (pp 858). That's also referred to as "copy first" vs "copy". In ISA 3.0B the copy instruction does not have an L field, and the corresponding bit in the instruction must be set to 1. This test is generating a "copy" instruction, not a "copy first", and so on Power9 (which implements 3.0B), this results in an illegal instruction. So just drop the test entirely. We still have copy_first_unaligned to test the "copy first" behaviour. Signed-off-by: Michael Ellerman Acked-by: Michael Neuling Signed-off-by: Michael Ellerman --- .../testing/selftests/powerpc/alignment/.gitignore | 1 - tools/testing/selftests/powerpc/alignment/Makefile | 2 +- .../selftests/powerpc/alignment/copy_unaligned.c | 41 ---------------------- 3 files changed, 1 insertion(+), 43 deletions(-) delete mode 100644 tools/testing/selftests/powerpc/alignment/copy_unaligned.c diff --git a/tools/testing/selftests/powerpc/alignment/.gitignore b/tools/testing/selftests/powerpc/alignment/.gitignore index 9d383073b7ad..78ac678b175b 100644 --- a/tools/testing/selftests/powerpc/alignment/.gitignore +++ b/tools/testing/selftests/powerpc/alignment/.gitignore @@ -1,4 +1,3 @@ -copy_unaligned copy_first_unaligned paste_unaligned paste_last_unaligned diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile index 083a48a008b4..2d09d0e28b35 100644 --- a/tools/testing/selftests/powerpc/alignment/Makefile +++ b/tools/testing/selftests/powerpc/alignment/Makefile @@ -1,4 +1,4 @@ -TEST_GEN_PROGS := copy_unaligned copy_first_unaligned paste_unaligned \ +TEST_GEN_PROGS := copy_first_unaligned paste_unaligned \ paste_last_unaligned alignment_handler include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/alignment/copy_unaligned.c b/tools/testing/selftests/powerpc/alignment/copy_unaligned.c deleted file mode 100644 index 3a4e26461554..000000000000 --- a/tools/testing/selftests/powerpc/alignment/copy_unaligned.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2016, Chris Smart, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Calls to copy which are not 128-byte aligned should be caught - * and sent a SIGBUS. - * - */ - -#include -#include -#include "utils.h" -#include "instructions.h" -#include "copy_paste_unaligned_common.h" - -unsigned int expected_instruction = PPC_INST_COPY; -unsigned int instruction_mask = 0xfc0007fe; - -int test_copy_unaligned(void) -{ - /* Only run this test on a P9 or later */ - SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00)); - - /* Register our signal handler with SIGBUS */ - setup_signal_handler(); - - /* +1 makes buf unaligned */ - copy(cacheline_buf+1); - - /* We should not get here */ - return 1; -} - -int main(int argc, char *argv[]) -{ - return test_harness(test_copy_unaligned, "test_copy_unaligned"); -} -- cgit From 525661ef8040e099d191f5f35defaed342e6859b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 11 Jul 2018 17:10:16 +1000 Subject: selftests/powerpc: Remove Power9 paste tests Paste on POWER9 only works to accelerators and not on real memory. So these tests just generate a SIGILL. So just delete them. Signed-off-by: Michael Ellerman Acked-by: Michael Neuling Signed-off-by: Michael Ellerman --- .../testing/selftests/powerpc/alignment/.gitignore | 2 - tools/testing/selftests/powerpc/alignment/Makefile | 3 +- .../powerpc/alignment/paste_last_unaligned.c | 43 ---------------------- .../selftests/powerpc/alignment/paste_unaligned.c | 43 ---------------------- 4 files changed, 1 insertion(+), 90 deletions(-) delete mode 100644 tools/testing/selftests/powerpc/alignment/paste_last_unaligned.c delete mode 100644 tools/testing/selftests/powerpc/alignment/paste_unaligned.c diff --git a/tools/testing/selftests/powerpc/alignment/.gitignore b/tools/testing/selftests/powerpc/alignment/.gitignore index 78ac678b175b..9c9b2e9b11c4 100644 --- a/tools/testing/selftests/powerpc/alignment/.gitignore +++ b/tools/testing/selftests/powerpc/alignment/.gitignore @@ -1,5 +1,3 @@ copy_first_unaligned -paste_unaligned -paste_last_unaligned copy_paste_unaligned_common alignment_handler diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile index 2d09d0e28b35..58d0e0ed8ff5 100644 --- a/tools/testing/selftests/powerpc/alignment/Makefile +++ b/tools/testing/selftests/powerpc/alignment/Makefile @@ -1,5 +1,4 @@ -TEST_GEN_PROGS := copy_first_unaligned paste_unaligned \ - paste_last_unaligned alignment_handler +TEST_GEN_PROGS := copy_first_unaligned alignment_handler include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/alignment/paste_last_unaligned.c b/tools/testing/selftests/powerpc/alignment/paste_last_unaligned.c deleted file mode 100644 index 6e0ad045fcc3..000000000000 --- a/tools/testing/selftests/powerpc/alignment/paste_last_unaligned.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright 2016, Chris Smart, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Calls to paste_last which are not 128-byte aligned should be - * caught and sent a SIGBUS. - * - */ - -#include -#include -#include "utils.h" -#include "instructions.h" -#include "copy_paste_unaligned_common.h" - -unsigned int expected_instruction = PPC_INST_PASTE_LAST; -unsigned int instruction_mask = 0xfc2007ff; - -int test_paste_last_unaligned(void) -{ - /* Only run this test on a P9 or later */ - SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00)); - - /* Register our signal handler with SIGBUS */ - setup_signal_handler(); - - copy(cacheline_buf); - - /* +1 makes buf unaligned */ - paste_last(cacheline_buf+1); - - /* We should not get here */ - return 1; -} - -int main(int argc, char *argv[]) -{ - return test_harness(test_paste_last_unaligned, "test_paste_last_unaligned"); -} diff --git a/tools/testing/selftests/powerpc/alignment/paste_unaligned.c b/tools/testing/selftests/powerpc/alignment/paste_unaligned.c deleted file mode 100644 index 6f982b45e4bd..000000000000 --- a/tools/testing/selftests/powerpc/alignment/paste_unaligned.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright 2016, Chris Smart, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Calls to paste which are not 128-byte aligned should be caught - * and sent a SIGBUS. - * - */ - -#include -#include -#include "utils.h" -#include "instructions.h" -#include "copy_paste_unaligned_common.h" - -unsigned int expected_instruction = PPC_INST_PASTE; -unsigned int instruction_mask = 0xfc0007fe; - -int test_paste_unaligned(void) -{ - /* Only run this test on a P9 or later */ - SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00)); - - /* Register our signal handler with SIGBUS */ - setup_signal_handler(); - - copy(cacheline_buf); - - /* +1 makes buf unaligned */ - paste(cacheline_buf+1); - - /* We should not get here */ - return 1; -} - -int main(int argc, char *argv[]) -{ - return test_harness(test_paste_unaligned, "test_paste_unaligned"); -} -- cgit From dbc3f77c2fea2875e4fd64e2dee57b9a2bda38e8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 11 Jul 2018 17:10:17 +1000 Subject: selftests/powerpc: Consolidate copy/paste test logic This logic was shared between multiple tests, but now that we have removed all but one of them we can just move it into that test. Signed-off-by: Michael Ellerman --- .../testing/selftests/powerpc/alignment/.gitignore | 1 - tools/testing/selftests/powerpc/alignment/Makefile | 2 +- .../powerpc/alignment/copy_first_unaligned.c | 33 +++++++++++++- .../alignment/copy_paste_unaligned_common.c | 53 ---------------------- .../alignment/copy_paste_unaligned_common.h | 26 ----------- 5 files changed, 33 insertions(+), 82 deletions(-) delete mode 100644 tools/testing/selftests/powerpc/alignment/copy_paste_unaligned_common.c delete mode 100644 tools/testing/selftests/powerpc/alignment/copy_paste_unaligned_common.h diff --git a/tools/testing/selftests/powerpc/alignment/.gitignore b/tools/testing/selftests/powerpc/alignment/.gitignore index 9c9b2e9b11c4..6d4fd014511c 100644 --- a/tools/testing/selftests/powerpc/alignment/.gitignore +++ b/tools/testing/selftests/powerpc/alignment/.gitignore @@ -1,3 +1,2 @@ copy_first_unaligned -copy_paste_unaligned_common alignment_handler diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile index 58d0e0ed8ff5..93baacab7693 100644 --- a/tools/testing/selftests/powerpc/alignment/Makefile +++ b/tools/testing/selftests/powerpc/alignment/Makefile @@ -2,4 +2,4 @@ TEST_GEN_PROGS := copy_first_unaligned alignment_handler include ../../lib.mk -$(TEST_GEN_PROGS): ../harness.c ../utils.c copy_paste_unaligned_common.c +$(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c b/tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c index 47b73b3a08bd..5a9589987702 100644 --- a/tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c +++ b/tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c @@ -11,15 +11,46 @@ * */ +#include #include #include #include "utils.h" #include "instructions.h" -#include "copy_paste_unaligned_common.h" unsigned int expected_instruction = PPC_INST_COPY_FIRST; unsigned int instruction_mask = 0xfc2007fe; +void signal_action_handler(int signal_num, siginfo_t *info, void *ptr) +{ + ucontext_t *ctx = ptr; +#ifdef __powerpc64__ + unsigned int *pc = (unsigned int *)ctx->uc_mcontext.gp_regs[PT_NIP]; +#else + unsigned int *pc = (unsigned int *)ctx->uc_mcontext.uc_regs->gregs[PT_NIP]; +#endif + + /* + * Check that the signal was on the correct instruction, using a + * mask because the compiler assigns the register at RB. + */ + if ((*pc & instruction_mask) == expected_instruction) + _exit(0); /* We hit the right instruction */ + + _exit(1); +} + +void setup_signal_handler(void) +{ + struct sigaction signal_action; + + memset(&signal_action, 0, sizeof(signal_action)); + signal_action.sa_sigaction = signal_action_handler; + signal_action.sa_flags = SA_SIGINFO; + sigaction(SIGBUS, &signal_action, NULL); +} + +char cacheline_buf[128] __cacheline_aligned; + int test_copy_first_unaligned(void) { /* Only run this test on a P9 or later */ diff --git a/tools/testing/selftests/powerpc/alignment/copy_paste_unaligned_common.c b/tools/testing/selftests/powerpc/alignment/copy_paste_unaligned_common.c deleted file mode 100644 index d35fa5f5d2d3..000000000000 --- a/tools/testing/selftests/powerpc/alignment/copy_paste_unaligned_common.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2016, Chris Smart, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Common code for copy, copy_first, paste and paste_last unaligned - * tests. - * - */ - -#include -#include -#include -#include "utils.h" -#include "instructions.h" -#include "copy_paste_unaligned_common.h" - -unsigned int expected_instruction; -unsigned int instruction_mask; - -char cacheline_buf[128] __cacheline_aligned; - -void signal_action_handler(int signal_num, siginfo_t *info, void *ptr) -{ - ucontext_t *ctx = ptr; -#if defined(__powerpc64__) - unsigned int *pc = (unsigned int *)ctx->uc_mcontext.gp_regs[PT_NIP]; -#else - unsigned int *pc = (unsigned int *)ctx->uc_mcontext.uc_regs->gregs[PT_NIP]; -#endif - - /* - * Check that the signal was on the correct instruction, using a - * mask because the compiler assigns the register at RB. - */ - if ((*pc & instruction_mask) == expected_instruction) - _exit(0); /* We hit the right instruction */ - - _exit(1); -} - -void setup_signal_handler(void) -{ - struct sigaction signal_action; - - memset(&signal_action, 0, sizeof(signal_action)); - signal_action.sa_sigaction = signal_action_handler; - signal_action.sa_flags = SA_SIGINFO; - sigaction(SIGBUS, &signal_action, NULL); -} diff --git a/tools/testing/selftests/powerpc/alignment/copy_paste_unaligned_common.h b/tools/testing/selftests/powerpc/alignment/copy_paste_unaligned_common.h deleted file mode 100644 index 053899fe506e..000000000000 --- a/tools/testing/selftests/powerpc/alignment/copy_paste_unaligned_common.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright 2016, Chris Smart, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Declarations for common code for copy, copy_first, paste and - * paste_last unaligned tests. - * - */ - -#ifndef _SELFTESTS_POWERPC_COPY_PASTE_H -#define _SELFTESTS_POWERPC_COPY_PASTE_H - -#include - -int main(int argc, char *argv[]); -void signal_action_handler(int signal_num, siginfo_t *info, void *ptr); -void setup_signal_handler(void); -extern char cacheline_buf[128] __cacheline_aligned; -extern unsigned int expected_instruction; -extern unsigned int instruction_mask; - -#endif /* _SELFTESTS_POWERPC_COPY_PASTE_H */ -- cgit From a9bf5c8a271b9a954709b7ada1bd258f5cadf7ff Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 May 2018 00:55:53 +1000 Subject: tty: hvc: use mutex instead of spinlock for hvc_structs lock This allows hvc operations to sleep under the lock. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- drivers/tty/hvc/hvc_console.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c index 7709fcc707f4..fddb63322c67 100644 --- a/drivers/tty/hvc/hvc_console.c +++ b/drivers/tty/hvc/hvc_console.c @@ -73,7 +73,7 @@ static LIST_HEAD(hvc_structs); * Protect the list of hvc_struct instances from inserts and removals during * list traversal. */ -static DEFINE_SPINLOCK(hvc_structs_lock); +static DEFINE_MUTEX(hvc_structs_mutex); /* * This value is used to assign a tty->index value to a hvc_struct based @@ -83,7 +83,7 @@ static DEFINE_SPINLOCK(hvc_structs_lock); static int last_hvc = -1; /* - * Do not call this function with either the hvc_structs_lock or the hvc_struct + * Do not call this function with either the hvc_structs_mutex or the hvc_struct * lock held. If successful, this function increments the kref reference * count against the target hvc_struct so it should be released when finished. */ @@ -92,25 +92,24 @@ static struct hvc_struct *hvc_get_by_index(int index) struct hvc_struct *hp; unsigned long flags; - spin_lock(&hvc_structs_lock); + mutex_lock(&hvc_structs_mutex); list_for_each_entry(hp, &hvc_structs, next) { spin_lock_irqsave(&hp->lock, flags); if (hp->index == index) { tty_port_get(&hp->port); spin_unlock_irqrestore(&hp->lock, flags); - spin_unlock(&hvc_structs_lock); + mutex_unlock(&hvc_structs_mutex); return hp; } spin_unlock_irqrestore(&hp->lock, flags); } hp = NULL; + mutex_unlock(&hvc_structs_mutex); - spin_unlock(&hvc_structs_lock); return hp; } - /* * Initial console vtermnos for console API usage prior to full console * initialization. Any vty adapter outside this range will not have usable @@ -224,13 +223,13 @@ static void hvc_port_destruct(struct tty_port *port) struct hvc_struct *hp = container_of(port, struct hvc_struct, port); unsigned long flags; - spin_lock(&hvc_structs_lock); + mutex_lock(&hvc_structs_mutex); spin_lock_irqsave(&hp->lock, flags); list_del(&(hp->next)); spin_unlock_irqrestore(&hp->lock, flags); - spin_unlock(&hvc_structs_lock); + mutex_unlock(&hvc_structs_mutex); kfree(hp); } @@ -733,11 +732,11 @@ static int khvcd(void *unused) try_to_freeze(); wmb(); if (!cpus_are_in_xmon()) { - spin_lock(&hvc_structs_lock); + mutex_lock(&hvc_structs_mutex); list_for_each_entry(hp, &hvc_structs, next) { poll_mask |= hvc_poll(hp); } - spin_unlock(&hvc_structs_lock); + mutex_unlock(&hvc_structs_mutex); } else poll_mask |= HVC_POLL_READ; if (hvc_kicked) @@ -871,7 +870,7 @@ struct hvc_struct *hvc_alloc(uint32_t vtermno, int data, INIT_WORK(&hp->tty_resize, hvc_set_winsz); spin_lock_init(&hp->lock); - spin_lock(&hvc_structs_lock); + mutex_lock(&hvc_structs_mutex); /* * find index to use: @@ -891,7 +890,7 @@ struct hvc_struct *hvc_alloc(uint32_t vtermno, int data, vtermnos[i] = vtermno; list_add_tail(&(hp->next), &hvc_structs); - spin_unlock(&hvc_structs_lock); + mutex_unlock(&hvc_structs_mutex); /* check if we need to re-register the kernel console */ hvc_check_console(i); -- cgit From ec97eaad1383ab2500fcf9a07ade6044fbcc67f5 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 May 2018 00:55:54 +1000 Subject: tty: hvc: hvc_poll() break hv read loop Avoid looping with the spinlock held while there is read data being returned from the hv driver. Instead note if the entire size returned by tty_buffer_request_room was read, and request another read poll. This limits the critical section lengths, and provides more even service to other consoles in case there is a pathological condition. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- drivers/tty/hvc/hvc_console.c | 88 ++++++++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 43 deletions(-) diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c index fddb63322c67..745ac220fce8 100644 --- a/drivers/tty/hvc/hvc_console.c +++ b/drivers/tty/hvc/hvc_console.c @@ -592,7 +592,7 @@ static u32 timeout = MIN_TIMEOUT; int hvc_poll(struct hvc_struct *hp) { struct tty_struct *tty; - int i, n, poll_mask = 0; + int i, n, count, poll_mask = 0; char buf[N_INBUF] __ALIGNED__; unsigned long flags; int read_total = 0; @@ -618,7 +618,7 @@ int hvc_poll(struct hvc_struct *hp) /* Now check if we can get data (are we throttled ?) */ if (tty_throttled(tty)) - goto throttled; + goto out; /* If we aren't notifier driven and aren't throttled, we always * request a reschedule @@ -627,56 +627,58 @@ int hvc_poll(struct hvc_struct *hp) poll_mask |= HVC_POLL_READ; /* Read data if any */ - for (;;) { - int count = tty_buffer_request_room(&hp->port, N_INBUF); - /* If flip is full, just reschedule a later read */ - if (count == 0) { + count = tty_buffer_request_room(&hp->port, N_INBUF); + + /* If flip is full, just reschedule a later read */ + if (count == 0) { + poll_mask |= HVC_POLL_READ; + goto out; + } + + n = hp->ops->get_chars(hp->vtermno, buf, count); + if (n <= 0) { + /* Hangup the tty when disconnected from host */ + if (n == -EPIPE) { + spin_unlock_irqrestore(&hp->lock, flags); + tty_hangup(tty); + spin_lock_irqsave(&hp->lock, flags); + } else if ( n == -EAGAIN ) { + /* + * Some back-ends can only ensure a certain min + * num of bytes read, which may be > 'count'. + * Let the tty clear the flip buff to make room. + */ poll_mask |= HVC_POLL_READ; - break; } + goto out; + } - n = hp->ops->get_chars(hp->vtermno, buf, count); - if (n <= 0) { - /* Hangup the tty when disconnected from host */ - if (n == -EPIPE) { - spin_unlock_irqrestore(&hp->lock, flags); - tty_hangup(tty); - spin_lock_irqsave(&hp->lock, flags); - } else if ( n == -EAGAIN ) { - /* - * Some back-ends can only ensure a certain min - * num of bytes read, which may be > 'count'. - * Let the tty clear the flip buff to make room. - */ - poll_mask |= HVC_POLL_READ; - } - break; - } - for (i = 0; i < n; ++i) { + for (i = 0; i < n; ++i) { #ifdef CONFIG_MAGIC_SYSRQ - if (hp->index == hvc_console.index) { - /* Handle the SysRq Hack */ - /* XXX should support a sequence */ - if (buf[i] == '\x0f') { /* ^O */ - /* if ^O is pressed again, reset - * sysrq_pressed and flip ^O char */ - sysrq_pressed = !sysrq_pressed; - if (sysrq_pressed) - continue; - } else if (sysrq_pressed) { - handle_sysrq(buf[i]); - sysrq_pressed = 0; + if (hp->index == hvc_console.index) { + /* Handle the SysRq Hack */ + /* XXX should support a sequence */ + if (buf[i] == '\x0f') { /* ^O */ + /* if ^O is pressed again, reset + * sysrq_pressed and flip ^O char */ + sysrq_pressed = !sysrq_pressed; + if (sysrq_pressed) continue; - } + } else if (sysrq_pressed) { + handle_sysrq(buf[i]); + sysrq_pressed = 0; + continue; } -#endif /* CONFIG_MAGIC_SYSRQ */ - tty_insert_flip_char(&hp->port, buf[i], 0); } - - read_total += n; +#endif /* CONFIG_MAGIC_SYSRQ */ + tty_insert_flip_char(&hp->port, buf[i], 0); } - throttled: + if (n == count) + poll_mask |= HVC_POLL_READ; + read_total = n; + + out: /* Wakeup write queue if necessary */ if (hp->do_wakeup) { hp->do_wakeup = 0; -- cgit From cfb5946b55f1dfd19e042feae1fbff6041e25a98 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 May 2018 00:55:55 +1000 Subject: tty: hvc: hvc_poll() may sleep Introduce points where hvc_poll drops the lock, enables interrupts, and reschedules. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- drivers/tty/hvc/hvc_console.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c index 745ac220fce8..2abfc0b15fbb 100644 --- a/drivers/tty/hvc/hvc_console.c +++ b/drivers/tty/hvc/hvc_console.c @@ -589,7 +589,7 @@ static u32 timeout = MIN_TIMEOUT; #define HVC_POLL_READ 0x00000001 #define HVC_POLL_WRITE 0x00000002 -int hvc_poll(struct hvc_struct *hp) +static int __hvc_poll(struct hvc_struct *hp, bool may_sleep) { struct tty_struct *tty; int i, n, count, poll_mask = 0; @@ -611,6 +611,12 @@ int hvc_poll(struct hvc_struct *hp) timeout = (written_total) ? 0 : MIN_TIMEOUT; } + if (may_sleep) { + spin_unlock_irqrestore(&hp->lock, flags); + cond_resched(); + spin_lock_irqsave(&hp->lock, flags); + } + /* No tty attached, just skip */ tty = tty_port_tty_get(&hp->port); if (tty == NULL) @@ -698,6 +704,11 @@ int hvc_poll(struct hvc_struct *hp) return poll_mask; } + +int hvc_poll(struct hvc_struct *hp) +{ + return __hvc_poll(hp, false); +} EXPORT_SYMBOL_GPL(hvc_poll); /** @@ -736,7 +747,8 @@ static int khvcd(void *unused) if (!cpus_are_in_xmon()) { mutex_lock(&hvc_structs_mutex); list_for_each_entry(hp, &hvc_structs, next) { - poll_mask |= hvc_poll(hp); + poll_mask |= __hvc_poll(hp, true); + cond_resched(); } mutex_unlock(&hvc_structs_mutex); } else -- cgit From 550ddadcc7580ec2a6c22d4ed04291bc6e2428fb Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 May 2018 00:55:56 +1000 Subject: tty: hvc: hvc_write() may sleep Rework the hvc_write loop to drop and re-take the spinlock on each iteration, add a cond_resched. Don't bother with an initial hvc_push initially, which makes the logic simpler -- just do a hvc_push on each time around the loop. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- drivers/tty/hvc/hvc_console.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c index 2abfc0b15fbb..6131d5084c42 100644 --- a/drivers/tty/hvc/hvc_console.c +++ b/drivers/tty/hvc/hvc_console.c @@ -493,23 +493,29 @@ static int hvc_write(struct tty_struct *tty, const unsigned char *buf, int count if (hp->port.count <= 0) return -EIO; - spin_lock_irqsave(&hp->lock, flags); + while (count > 0) { + spin_lock_irqsave(&hp->lock, flags); - /* Push pending writes */ - if (hp->n_outbuf > 0) - hvc_push(hp); - - while (count > 0 && (rsize = hp->outbuf_size - hp->n_outbuf) > 0) { - if (rsize > count) - rsize = count; - memcpy(hp->outbuf + hp->n_outbuf, buf, rsize); - count -= rsize; - buf += rsize; - hp->n_outbuf += rsize; - written += rsize; - hvc_push(hp); + rsize = hp->outbuf_size - hp->n_outbuf; + + if (rsize) { + if (rsize > count) + rsize = count; + memcpy(hp->outbuf + hp->n_outbuf, buf, rsize); + count -= rsize; + buf += rsize; + hp->n_outbuf += rsize; + written += rsize; + } + + if (hp->n_outbuf > 0) + hvc_push(hp); + + spin_unlock_irqrestore(&hp->lock, flags); + + if (count) + cond_resched(); } - spin_unlock_irqrestore(&hp->lock, flags); /* * Racy, but harmless, kick thread if there is still pending data. -- cgit From 9f65b81f36e31563c5a5e4df3b3b8bc7550b6030 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 May 2018 00:55:57 +1000 Subject: tty: hvc: introduce the hv_ops.flush operation for hvc drivers Use .flush to wait for drivers to flush their console outside of the spinlock, to reduce lock/irq latencies. Flush the hvc console driver after each write, which can help messages make it out to the console after a crash. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- drivers/tty/hvc/hvc_console.c | 35 +++++++++++++++++++++++++++++++++-- drivers/tty/hvc/hvc_console.h | 1 + 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c index 6131d5084c42..5414c4a87bea 100644 --- a/drivers/tty/hvc/hvc_console.c +++ b/drivers/tty/hvc/hvc_console.c @@ -110,6 +110,29 @@ static struct hvc_struct *hvc_get_by_index(int index) return hp; } +static int __hvc_flush(const struct hv_ops *ops, uint32_t vtermno, bool wait) +{ + if (wait) + might_sleep(); + + if (ops->flush) + return ops->flush(vtermno, wait); + return 0; +} + +static int hvc_console_flush(const struct hv_ops *ops, uint32_t vtermno) +{ + return __hvc_flush(ops, vtermno, false); +} + +/* + * Wait for the console to flush before writing more to it. This sleeps. + */ +static int hvc_flush(struct hvc_struct *hp) +{ + return __hvc_flush(hp->ops, hp->vtermno, true); +} + /* * Initial console vtermnos for console API usage prior to full console * initialization. Any vty adapter outside this range will not have usable @@ -155,8 +178,12 @@ static void hvc_console_print(struct console *co, const char *b, if (r <= 0) { /* throw away characters on error * but spin in case of -EAGAIN */ - if (r != -EAGAIN) + if (r != -EAGAIN) { i = 0; + } else { + hvc_console_flush(cons_ops[index], + vtermnos[index]); + } } else if (r > 0) { i -= r; if (i > 0) @@ -164,6 +191,7 @@ static void hvc_console_print(struct console *co, const char *b, } } } + hvc_console_flush(cons_ops[index], vtermnos[index]); } static struct tty_driver *hvc_console_device(struct console *c, int *index) @@ -513,8 +541,11 @@ static int hvc_write(struct tty_struct *tty, const unsigned char *buf, int count spin_unlock_irqrestore(&hp->lock, flags); - if (count) + if (count) { + if (hp->n_outbuf > 0) + hvc_flush(hp); cond_resched(); + } } /* diff --git a/drivers/tty/hvc/hvc_console.h b/drivers/tty/hvc/hvc_console.h index ea63090e013f..e9319954c832 100644 --- a/drivers/tty/hvc/hvc_console.h +++ b/drivers/tty/hvc/hvc_console.h @@ -54,6 +54,7 @@ struct hvc_struct { struct hv_ops { int (*get_chars)(uint32_t vtermno, char *buf, int count); int (*put_chars)(uint32_t vtermno, const char *buf, int count); + int (*flush)(uint32_t vtermno, bool wait); /* Callbacks for notification. Called in open, close and hangup */ int (*notifier_add)(struct hvc_struct *hp, int irq); -- cgit From a57a04c76e06822e4377831611364c846b7202ca Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Tue, 17 Jul 2018 06:51:02 -0700 Subject: powerpc/pkeys: Give all threads control of their key permissions Currently in a multithreaded application, a key allocated by one thread is not usable by other threads. By "not usable" we mean that other threads are unable to change the access permissions for that key for themselves. When a new key is allocated in one thread, the corresponding UAMOR bits for that thread get enabled, however the UAMOR bits for that key for all other threads remain disabled. Other threads have no way to set permissions on the key, and the current default permissions are that read/write is enabled for all keys, which means the key has no effect for other threads. Although that may be the desired behaviour in some circumstances, having all threads able to control their permissions for the key is more flexible. The current behaviour also differs from the x86 behaviour, which is problematic for users. To fix this, enable the UAMOR bits for all keys, at process creation (in start_thread(), ie exec time). Since the contents of UAMOR are inherited at fork, all threads are capable of modifying the permissions on any key. This is technically an ABI break on powerpc, but pkey support is fairly new on powerpc and not widely used, and this brings us into line with x86. Fixes: cf43d3b26452 ("powerpc: Enable pkey subsystem") Cc: stable@vger.kernel.org # v4.16+ Tested-by: Florian Weimer Signed-off-by: Ram Pai [mpe: Reword some of the changelog] Signed-off-by: Michael Ellerman --- arch/powerpc/mm/pkeys.c | 44 ++++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index e6f500fabf5e..6b55373f8c43 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -15,8 +15,9 @@ bool pkey_execute_disable_supported; int pkeys_total; /* Total pkeys as per device tree */ bool pkeys_devtree_defined; /* pkey property exported by device tree */ u32 initial_allocation_mask; /* Bits set for reserved keys */ -u64 pkey_amr_uamor_mask; /* Bits in AMR/UMOR not to be touched */ +u64 pkey_amr_mask; /* Bits in AMR not to be touched */ u64 pkey_iamr_mask; /* Bits in AMR not to be touched */ +u64 pkey_uamor_mask; /* Bits in UMOR not to be touched */ #define AMR_BITS_PER_PKEY 2 #define AMR_RD_BIT 0x1UL @@ -119,20 +120,26 @@ int pkey_initialize(void) #else os_reserved = 0; #endif - initial_allocation_mask = ~0x0; - pkey_amr_uamor_mask = ~0x0ul; + initial_allocation_mask = (0x1 << 0) | (0x1 << 1); + + /* register mask is in BE format */ + pkey_amr_mask = ~0x0ul; pkey_iamr_mask = ~0x0ul; - /* - * key 0, 1 are reserved. - * key 0 is the default key, which allows read/write/execute. - * key 1 is recommended not to be used. PowerISA(3.0) page 1015, - * programming note. - */ - for (i = 2; i < (pkeys_total - os_reserved); i++) { - initial_allocation_mask &= ~(0x1 << i); - pkey_amr_uamor_mask &= ~(0x3ul << pkeyshift(i)); + + for (i = 0; i < (pkeys_total - os_reserved); i++) { + pkey_amr_mask &= ~(0x3ul << pkeyshift(i)); pkey_iamr_mask &= ~(0x1ul << pkeyshift(i)); } + + pkey_uamor_mask = ~0x0ul; + pkey_uamor_mask &= ~(0x3ul << pkeyshift(0)); + + /* mark the rest of the keys as reserved and hence unavailable */ + for (i = (pkeys_total - os_reserved); i < pkeys_total; i++) { + initial_allocation_mask |= (0x1 << i); + pkey_uamor_mask &= ~(0x3ul << pkeyshift(i)); + } + return 0; } @@ -289,9 +296,6 @@ void thread_pkey_regs_restore(struct thread_struct *new_thread, if (static_branch_likely(&pkey_disabled)) return; - /* - * TODO: Just set UAMOR to zero if @new_thread hasn't used any keys yet. - */ if (old_thread->amr != new_thread->amr) write_amr(new_thread->amr); if (old_thread->iamr != new_thread->iamr) @@ -305,9 +309,13 @@ void thread_pkey_regs_init(struct thread_struct *thread) if (static_branch_likely(&pkey_disabled)) return; - thread->amr = read_amr() & pkey_amr_uamor_mask; - thread->iamr = read_iamr() & pkey_iamr_mask; - thread->uamor = read_uamor() & pkey_amr_uamor_mask; + thread->amr = pkey_amr_mask; + thread->iamr = pkey_iamr_mask; + thread->uamor = pkey_uamor_mask; + + write_uamor(pkey_uamor_mask); + write_amr(pkey_amr_mask); + write_iamr(pkey_iamr_mask); } static inline bool pkey_allows_readwrite(int pkey) -- cgit From de113256f8c1c24d8c79ae388bf2a5abd70f7577 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Tue, 17 Jul 2018 06:51:03 -0700 Subject: powerpc/pkeys: Deny read/write/execute by default Deny all permissions on all keys, with some exceptions. pkey-0 must allow all permissions, or else everything comes to a screaching halt. Execute-only key must allow execute permission. Fixes: cf43d3b26452 ("powerpc: Enable pkey subsystem") Cc: stable@vger.kernel.org # v4.16+ Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/mm/pkeys.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index 6b55373f8c43..98180f49ac5e 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -124,12 +124,10 @@ int pkey_initialize(void) /* register mask is in BE format */ pkey_amr_mask = ~0x0ul; - pkey_iamr_mask = ~0x0ul; + pkey_amr_mask &= ~(0x3ul << pkeyshift(0)); - for (i = 0; i < (pkeys_total - os_reserved); i++) { - pkey_amr_mask &= ~(0x3ul << pkeyshift(i)); - pkey_iamr_mask &= ~(0x1ul << pkeyshift(i)); - } + pkey_iamr_mask = ~0x0ul; + pkey_iamr_mask &= ~(0x3ul << pkeyshift(0)); pkey_uamor_mask = ~0x0ul; pkey_uamor_mask &= ~(0x3ul << pkeyshift(0)); -- cgit From 4a4a5e5d2aadc793be95024f454cf511d115b62d Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Tue, 17 Jul 2018 06:51:04 -0700 Subject: powerpc/pkeys: key allocation/deallocation must not change pkey registers Key allocation and deallocation has the side effect of programming the UAMOR/AMR/IAMR registers. This is wrong, since its the responsibility of the application and not that of the kernel, to modify the permission on the key. Do not modify the pkey registers at key allocation/deallocation. This patch also fixes a bug where a sys_pkey_free() resets the UAMOR bits of the key, thus making its permissions unmodifiable from user space. Later if the same key gets reallocated from a different thread this thread will no longer be able to change the permissions on the key. Fixes: cf43d3b26452 ("powerpc: Enable pkey subsystem") Cc: stable@vger.kernel.org # v4.16+ Reviewed-by: Thiago Jung Bauermann Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pkeys.h | 11 ----------- arch/powerpc/mm/pkeys.c | 27 --------------------------- 2 files changed, 38 deletions(-) diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index 5ba80cffb505..3312606fda07 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -94,8 +94,6 @@ static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) __mm_pkey_is_allocated(mm, pkey)); } -extern void __arch_activate_pkey(int pkey); -extern void __arch_deactivate_pkey(int pkey); /* * Returns a positive, 5-bit key on success, or -1 on failure. * Relies on the mmap_sem to protect against concurrency in mm_pkey_alloc() and @@ -124,11 +122,6 @@ static inline int mm_pkey_alloc(struct mm_struct *mm) ret = ffz((u32)mm_pkey_allocation_map(mm)); __mm_pkey_allocated(mm, ret); - /* - * Enable the key in the hardware - */ - if (ret > 0) - __arch_activate_pkey(ret); return ret; } @@ -140,10 +133,6 @@ static inline int mm_pkey_free(struct mm_struct *mm, int pkey) if (!mm_pkey_is_allocated(mm, pkey)) return -EINVAL; - /* - * Disable the key in the hardware - */ - __arch_deactivate_pkey(pkey); __mm_pkey_free(mm, pkey); return 0; diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index 98180f49ac5e..8f0052b15b84 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -218,33 +218,6 @@ static inline void init_iamr(int pkey, u8 init_bits) write_iamr(old_iamr | new_iamr_bits); } -static void pkey_status_change(int pkey, bool enable) -{ - u64 old_uamor; - - /* Reset the AMR and IAMR bits for this key */ - init_amr(pkey, 0x0); - init_iamr(pkey, 0x0); - - /* Enable/disable key */ - old_uamor = read_uamor(); - if (enable) - old_uamor |= (0x3ul << pkeyshift(pkey)); - else - old_uamor &= ~(0x3ul << pkeyshift(pkey)); - write_uamor(old_uamor); -} - -void __arch_activate_pkey(int pkey) -{ - pkey_status_change(pkey, true); -} - -void __arch_deactivate_pkey(int pkey) -{ - pkey_status_change(pkey, false); -} - /* * Set the access rights in AMR IAMR and UAMOR registers for @pkey to that * specified in @init_val. -- cgit From c76662e825f507b98938dc3bb141c4505bd4968c Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Tue, 17 Jul 2018 06:51:05 -0700 Subject: powerpc/pkeys: Save the pkey registers before fork When a thread forks the contents of AMR, IAMR, UAMOR registers in the newly forked thread are not inherited. Save the registers before forking, for content of those registers to be automatically copied into the new thread. Fixes: cf43d3b26452 ("powerpc: Enable pkey subsystem") Cc: stable@vger.kernel.org # v4.16+ Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 27f0caee55ea..eca92aac2e17 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -583,6 +583,7 @@ static void save_all(struct task_struct *tsk) __giveup_spe(tsk); msr_check_and_clear(msr_all_available); + thread_pkey_regs_save(&tsk->thread); } void flush_all_to_thread(struct task_struct *tsk) -- cgit From fe6a2804e65969a574377bdb3605afb79e6091a9 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Tue, 17 Jul 2018 06:51:06 -0700 Subject: powerpc/pkeys: Fix calculation of total pkeys. Total number of pkeys calculation is off by 1. Fix it. Fixes: 4fb158f65ac5 ("powerpc: track allocation status of all pkeys") Cc: stable@vger.kernel.org # v4.16+ Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/mm/pkeys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index 8f0052b15b84..04f65974e97b 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -92,7 +92,7 @@ int pkey_initialize(void) * arch-neutral code. */ pkeys_total = min_t(int, pkeys_total, - (ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)); + ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)+1)); if (!pkey_mmu_enabled() || radix_enabled() || !pkeys_total) static_branch_enable(&pkey_disabled); -- cgit From a4fcc877d4e18b5efe26e93f08f0cfd4e278c7d9 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Tue, 17 Jul 2018 06:51:07 -0700 Subject: powerpc/pkeys: Preallocate execute-only key execute-only key is allocated dynamically. This is a problem. When a thread implicitly creates an execute-only key, and resets the UAMOR for that key, the UAMOR value does not percolate to all the other threads. Any other thread may ignorantly change the permissions on the key. This can cause the key to be not execute-only for that thread. Preallocate the execute-only key and ensure that no thread can change the permission of the key, by resetting the corresponding bit in UAMOR. Fixes: 5586cf61e108 ("powerpc: introduce execute-only pkey") Cc: stable@vger.kernel.org # v4.16+ Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/mm/pkeys.c | 63 ++++++++++++++----------------------------------- 1 file changed, 18 insertions(+), 45 deletions(-) diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index 04f65974e97b..0e7810ccd1ae 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -18,6 +18,7 @@ u32 initial_allocation_mask; /* Bits set for reserved keys */ u64 pkey_amr_mask; /* Bits in AMR not to be touched */ u64 pkey_iamr_mask; /* Bits in AMR not to be touched */ u64 pkey_uamor_mask; /* Bits in UMOR not to be touched */ +int execute_only_key = 2; #define AMR_BITS_PER_PKEY 2 #define AMR_RD_BIT 0x1UL @@ -120,7 +121,8 @@ int pkey_initialize(void) #else os_reserved = 0; #endif - initial_allocation_mask = (0x1 << 0) | (0x1 << 1); + initial_allocation_mask = (0x1 << 0) | (0x1 << 1) | + (0x1 << execute_only_key); /* register mask is in BE format */ pkey_amr_mask = ~0x0ul; @@ -128,9 +130,11 @@ int pkey_initialize(void) pkey_iamr_mask = ~0x0ul; pkey_iamr_mask &= ~(0x3ul << pkeyshift(0)); + pkey_iamr_mask &= ~(0x3ul << pkeyshift(execute_only_key)); pkey_uamor_mask = ~0x0ul; pkey_uamor_mask &= ~(0x3ul << pkeyshift(0)); + pkey_uamor_mask &= ~(0x3ul << pkeyshift(execute_only_key)); /* mark the rest of the keys as reserved and hence unavailable */ for (i = (pkeys_total - os_reserved); i < pkeys_total; i++) { @@ -138,6 +142,17 @@ int pkey_initialize(void) pkey_uamor_mask &= ~(0x3ul << pkeyshift(i)); } + if (unlikely((pkeys_total - os_reserved) <= execute_only_key)) { + /* + * Insufficient number of keys to support + * execute only key. Mark it unavailable. + * Any AMR, UAMOR, IAMR bit set for + * this key is irrelevant since this key + * can never be allocated. + */ + execute_only_key = -1; + } + return 0; } @@ -148,8 +163,7 @@ void pkey_mm_init(struct mm_struct *mm) if (static_branch_likely(&pkey_disabled)) return; mm_pkey_allocation_map(mm) = initial_allocation_mask; - /* -1 means unallocated or invalid */ - mm->context.execute_only_pkey = -1; + mm->context.execute_only_pkey = execute_only_key; } static inline u64 read_amr(void) @@ -301,48 +315,7 @@ static inline bool pkey_allows_readwrite(int pkey) int __execute_only_pkey(struct mm_struct *mm) { - bool need_to_set_mm_pkey = false; - int execute_only_pkey = mm->context.execute_only_pkey; - int ret; - - /* Do we need to assign a pkey for mm's execute-only maps? */ - if (execute_only_pkey == -1) { - /* Go allocate one to use, which might fail */ - execute_only_pkey = mm_pkey_alloc(mm); - if (execute_only_pkey < 0) - return -1; - need_to_set_mm_pkey = true; - } - - /* - * We do not want to go through the relatively costly dance to set AMR - * if we do not need to. Check it first and assume that if the - * execute-only pkey is readwrite-disabled than we do not have to set it - * ourselves. - */ - if (!need_to_set_mm_pkey && !pkey_allows_readwrite(execute_only_pkey)) - return execute_only_pkey; - - /* - * Set up AMR so that it denies access for everything other than - * execution. - */ - ret = __arch_set_user_pkey_access(current, execute_only_pkey, - PKEY_DISABLE_ACCESS | - PKEY_DISABLE_WRITE); - /* - * If the AMR-set operation failed somehow, just return 0 and - * effectively disable execute-only support. - */ - if (ret) { - mm_pkey_free(mm, execute_only_pkey); - return -1; - } - - /* We got one, store it and use it from here on out */ - if (need_to_set_mm_pkey) - mm->context.execute_only_pkey = execute_only_pkey; - return execute_only_pkey; + return mm->context.execute_only_pkey; } static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma) -- cgit From 07f522d203242b359624a284b61977600eebfd5a Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Tue, 17 Jul 2018 06:51:08 -0700 Subject: powerpc/pkeys: make protection key 0 less special Applications need the ability to associate an address-range with some key and latter revert to its initial default key. Pkey-0 comes close to providing this function but falls short, because the current implementation disallows applications to explicitly associate pkey-0 to the address range. Lets make pkey-0 less special and treat it almost like any other key. Thus it can be explicitly associated with any address range, and can be freed. This gives the application more flexibility and power. The ability to free pkey-0 must be used responsibily, since pkey-0 is associated with almost all address-range by default. Even with this change pkey-0 continues to be slightly more special from the following point of view. (a) it is implicitly allocated. (b) it is the default key assigned to any address-range. (c) its permissions cannot be modified by userspace. NOTE: (c) is specific to powerpc only. pkey-0 is associated by default with all pages including kernel pages, and pkeys are also active in kernel mode. If any permission is denied on pkey-0, the kernel running in the context of the application will be unable to operate. Tested on powerpc. Signed-off-by: Ram Pai [mpe: Drop #define PKEY_0 0 in favour of plain old 0] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pkeys.h | 27 +++++++++++++++++++++------ arch/powerpc/mm/pkeys.c | 13 ++++++------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index 3312606fda07..20ebf153c871 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -13,7 +13,8 @@ DECLARE_STATIC_KEY_TRUE(pkey_disabled); extern int pkeys_total; /* total pkeys as per device tree */ -extern u32 initial_allocation_mask; /* bits set for reserved keys */ +extern u32 initial_allocation_mask; /* bits set for the initially allocated keys */ +extern u32 reserved_allocation_mask; /* bits set for reserved keys */ #define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \ VM_PKEY_BIT3 | VM_PKEY_BIT4) @@ -83,15 +84,19 @@ static inline u16 pte_to_pkey_bits(u64 pteflags) #define __mm_pkey_is_allocated(mm, pkey) \ (mm_pkey_allocation_map(mm) & pkey_alloc_mask(pkey)) -#define __mm_pkey_is_reserved(pkey) (initial_allocation_mask & \ +#define __mm_pkey_is_reserved(pkey) (reserved_allocation_mask & \ pkey_alloc_mask(pkey)) static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) { - /* A reserved key is never considered as 'explicitly allocated' */ - return ((pkey < arch_max_pkey()) && - !__mm_pkey_is_reserved(pkey) && - __mm_pkey_is_allocated(mm, pkey)); + if (pkey < 0 || pkey >= arch_max_pkey()) + return false; + + /* Reserved keys are never allocated. */ + if (__mm_pkey_is_reserved(pkey)) + return false; + + return __mm_pkey_is_allocated(mm, pkey); } /* @@ -176,6 +181,16 @@ static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, { if (static_branch_likely(&pkey_disabled)) return -EINVAL; + + /* + * userspace should not change pkey-0 permissions. + * pkey-0 is associated with every page in the kernel. + * If userspace denies any permission on pkey-0, the + * kernel cannot operate. + */ + if (pkey == 0) + return init_val ? -EINVAL : 0; + return __arch_set_user_pkey_access(tsk, pkey, init_val); } diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index 0e7810ccd1ae..333b1f80c435 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -14,7 +14,8 @@ DEFINE_STATIC_KEY_TRUE(pkey_disabled); bool pkey_execute_disable_supported; int pkeys_total; /* Total pkeys as per device tree */ bool pkeys_devtree_defined; /* pkey property exported by device tree */ -u32 initial_allocation_mask; /* Bits set for reserved keys */ +u32 initial_allocation_mask; /* Bits set for the initially allocated keys */ +u32 reserved_allocation_mask; /* Bits set for reserved keys */ u64 pkey_amr_mask; /* Bits in AMR not to be touched */ u64 pkey_iamr_mask; /* Bits in AMR not to be touched */ u64 pkey_uamor_mask; /* Bits in UMOR not to be touched */ @@ -121,8 +122,8 @@ int pkey_initialize(void) #else os_reserved = 0; #endif - initial_allocation_mask = (0x1 << 0) | (0x1 << 1) | - (0x1 << execute_only_key); + /* Bits are in LE format. */ + reserved_allocation_mask = (0x1 << 1) | (0x1 << execute_only_key); /* register mask is in BE format */ pkey_amr_mask = ~0x0ul; @@ -138,9 +139,10 @@ int pkey_initialize(void) /* mark the rest of the keys as reserved and hence unavailable */ for (i = (pkeys_total - os_reserved); i < pkeys_total; i++) { - initial_allocation_mask |= (0x1 << i); + reserved_allocation_mask |= (0x1 << i); pkey_uamor_mask &= ~(0x3ul << pkeyshift(i)); } + initial_allocation_mask = reserved_allocation_mask | (0x1 << 0); if (unlikely((pkeys_total - os_reserved) <= execute_only_key)) { /* @@ -359,9 +361,6 @@ static bool pkey_access_permitted(int pkey, bool write, bool execute) int pkey_shift; u64 amr; - if (!pkey) - return true; - if (!is_pkey_enabled(pkey)) return true; -- cgit From 5db26e8903deb111b303bd6e0cf304215e0b4fd9 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Tue, 17 Jul 2018 06:51:09 -0700 Subject: selftests/powerpc: Fix core-pkey for default execute permission change Only when the key is allocated, its permission are enabled. Reviewed-by: Thiago Jung Bauermann Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/ptrace/core-pkey.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index 36bc312b1f5c..e23e2e199eb4 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -140,6 +140,10 @@ static int child(struct shared_info *info) if (disable_execute) info->iamr |= 1ul << pkeyshift(pkey1); + else + info->iamr &= ~(1ul << pkeyshift(pkey1)); + + info->iamr &= ~(1ul << pkeyshift(pkey2) | 1ul << pkeyshift(pkey3)); info->uamor |= 3ul << pkeyshift(pkey1) | 3ul << pkeyshift(pkey2); -- cgit From 29e8131cd77ae49750e7e2f9f596afa5812435f2 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Tue, 17 Jul 2018 06:51:10 -0700 Subject: selftests/powerpc: Fix ptrace-pkey for default execute permission change The test case assumes execute-permissions of unallocated keys are enabled by default, which is incorrect. Reviewed-by: Thiago Jung Bauermann Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c index 5cf631f792cc..bdbbbe8431e0 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c @@ -104,6 +104,10 @@ static int child(struct shared_info *info) if (disable_execute) info->expected_iamr |= 1ul << pkeyshift(pkey1); + else + info->expected_iamr &= ~(1ul << pkeyshift(pkey1)); + + info->expected_iamr &= ~(1ul << pkeyshift(pkey2) | 1ul << pkeyshift(pkey3)); info->expected_uamor |= 3ul << pkeyshift(pkey1) | 3ul << pkeyshift(pkey2); -- cgit From 9b81c0211c249c1bc8caec2ddbc86e36c550ce0f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 3 Jun 2018 22:24:32 +1000 Subject: powerpc/64s: make PACA_IRQ_HARD_DIS track MSR[EE] closely When the masked interrupt handler clears MSR[EE] for an interrupt in the PACA_IRQ_MUST_HARD_MASK set, it does not set PACA_IRQ_HARD_DIS. This makes them get out of synch. With that taken into account, it's only low level irq manipulation (and interrupt entry before reconcile) where they can be out of synch. This makes the code less surprising. It also allows the IRQ replay code to rely on the IRQ_HARD_DIS value and not have to mtmsrd again in this case (e.g., for an external interrupt that has been masked). The bigger benefit might just be that there is not such an element of surprise in these two bits of state. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hw_irq.h | 10 ++++++---- arch/powerpc/kernel/entry_64.S | 8 ++++++++ arch/powerpc/kernel/exceptions-64e.S | 4 ++++ arch/powerpc/kernel/exceptions-64s.S | 5 ++++- arch/powerpc/kernel/idle_book3e.S | 7 ++++++- arch/powerpc/kernel/irq.c | 34 ++++++++++++++++++++++++---------- 6 files changed, 52 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index e151774cb577..32a18f2f49bc 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -253,14 +253,16 @@ static inline bool lazy_irq_pending(void) /* * This is called by asynchronous interrupts to conditionally - * re-enable hard interrupts when soft-disabled after having - * cleared the source of the interrupt + * re-enable hard interrupts after having cleared the source + * of the interrupt. They are kept disabled if there is a different + * soft-masked interrupt pending that requires hard masking. */ static inline void may_hard_irq_enable(void) { - get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS; - if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)) + if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)) { + get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS; __hard_irq_enable(); + } } static inline bool arch_irq_disabled_regs(struct pt_regs *regs) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 729e9ef4d3bb..0357f87a013c 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -992,6 +992,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) or r4,r4,r3 std r4,_TRAP(r1) + /* + * PACA_IRQ_HARD_DIS won't always be set here, so set it now + * to reconcile the IRQ state. Tracing is already accounted for. + */ + lbz r4,PACAIRQHAPPENED(r13) + ori r4,r4,PACA_IRQ_HARD_DIS + stb r4,PACAIRQHAPPENED(r13) + /* * Then find the right handler and call it. Interrupts are * still soft-disabled and we keep them that way. diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 9b6e653e501a..3325f721e7b2 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -949,7 +949,11 @@ kernel_dbg_exc: .macro masked_interrupt_book3e paca_irq full_mask lbz r10,PACAIRQHAPPENED(r13) + .if \full_mask == 1 + ori r10,r10,\paca_irq | PACA_IRQ_HARD_DIS + .else ori r10,r10,\paca_irq + .endif stb r10,PACAIRQHAPPENED(r13) .if \full_mask == 1 diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 76a14702cb9c..36aec1f1cb2d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1498,7 +1498,10 @@ masked_##_H##interrupt: \ mfspr r10,SPRN_##_H##SRR1; \ xori r10,r10,MSR_EE; /* clear MSR_EE */ \ mtspr SPRN_##_H##SRR1,r10; \ -2: mtcrf 0x80,r9; \ + ori r11,r11,PACA_IRQ_HARD_DIS; \ + stb r11,PACAIRQHAPPENED(r13); \ +2: /* done */ \ + mtcrf 0x80,r9; \ std r1,PACAR1(r13); \ ld r9,PACA_EXGEN+EX_R9(r13); \ ld r10,PACA_EXGEN+EX_R10(r13); \ diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S index 2b269315d377..4e0d94d02030 100644 --- a/arch/powerpc/kernel/idle_book3e.S +++ b/arch/powerpc/kernel/idle_book3e.S @@ -36,7 +36,7 @@ _GLOBAL(\name) */ lbz r3,PACAIRQHAPPENED(r13) cmpwi cr0,r3,0 - bnelr + bne 2f /* Now we are going to mark ourselves as soft and hard enabled in * order to be able to take interrupts while asleep. We inform lockdep @@ -72,6 +72,11 @@ _GLOBAL(\name) wrteei 1 \loop +2: + lbz r10,PACAIRQHAPPENED(r13) + ori r10,r10,PACA_IRQ_HARD_DIS + stb r10,PACAIRQHAPPENED(r13) + blr .endm .macro BOOK3E_IDLE_LOOP diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 0682fef1f385..ca941f1e83a9 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -145,8 +145,20 @@ notrace unsigned int __check_irq_replay(void) trace_hardirqs_on(); trace_hardirqs_off(); + /* + * We are always hard disabled here, but PACA_IRQ_HARD_DIS may + * not be set, which means interrupts have only just been hard + * disabled as part of the local_irq_restore or interrupt return + * code. In that case, skip the decrementr check becaus it's + * expensive to read the TB. + * + * HARD_DIS then gets cleared here, but it's reconciled later. + * Either local_irq_disable will replay the interrupt and that + * will reconcile state like other hard interrupts. Or interrupt + * retur will replay the interrupt and in that case it sets + * PACA_IRQ_HARD_DIS by hand (see comments in entry_64.S). + */ if (happened & PACA_IRQ_HARD_DIS) { - /* Clear bit 0 which we wouldn't clear otherwise */ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; /* @@ -248,24 +260,26 @@ notrace void arch_local_irq_restore(unsigned long mask) * cannot have preempted. */ irq_happened = get_irq_happened(); - if (!irq_happened) + if (!irq_happened) { +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON(!(mfmsr() & MSR_EE)); +#endif return; + } /* * We need to hard disable to get a trusted value from * __check_irq_replay(). We also need to soft-disable * again to avoid warnings in there due to the use of * per-cpu variables. - * - * We know that if the value in irq_happened is exactly 0x01 - * then we are already hard disabled (there are other less - * common cases that we'll ignore for now), so we skip the - * (expensive) mtmsrd. */ - if (unlikely(irq_happened != PACA_IRQ_HARD_DIS)) + if (!(irq_happened & PACA_IRQ_HARD_DIS)) { +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON(!(mfmsr() & MSR_EE)); +#endif __hard_irq_disable(); #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - else { + } else { /* * We should already be hard disabled here. We had bugs * where that wasn't the case so let's dbl check it and @@ -274,8 +288,8 @@ notrace void arch_local_irq_restore(unsigned long mask) */ if (WARN_ON(mfmsr() & MSR_EE)) __hard_irq_disable(); - } #endif + } irq_soft_mask_set(IRQS_ALL_DISABLED); trace_hardirqs_off(); -- cgit From 5b73151fff63fb019db8171cb81c6c978533844b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 25 Apr 2018 15:17:59 +1000 Subject: powerpc: NMI IPI make NMI IPIs fully sychronous There is an asynchronous aspect to smp_send_nmi_ipi. The caller waits for all CPUs to call in to the handler, but it does not wait for completion of the handler. This is a needless complication, so remove it and always wait synchronously. The synchronous wait allows the caller to easily time out and clear the wait for completion (zero nmi_ipi_busy_count) in the case of badly behaved handlers. This would have prevented the recent smp_send_stop NMI IPI bug from causing the system to hang. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/smp.h | 1 - arch/powerpc/kernel/smp.c | 50 +++++++++++++++++++++++------------------- arch/powerpc/kernel/watchdog.c | 1 - 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 29ffaabdf75b..95b66a0c639b 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -56,7 +56,6 @@ struct smp_ops_t { int (*cpu_bootable)(unsigned int nr); }; -extern void smp_flush_nmi_ipi(u64 delay_us); extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); extern int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); extern void smp_send_debugger_break(void); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 4794d6b4f4d2..b19d832ef386 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -423,7 +423,8 @@ int smp_handle_nmi_ipi(struct pt_regs *regs) fn(regs); nmi_ipi_lock(); - nmi_ipi_busy_count--; + if (nmi_ipi_busy_count > 1) /* Can race with caller time-out */ + nmi_ipi_busy_count--; out: nmi_ipi_unlock_end(&flags); @@ -448,29 +449,11 @@ static void do_smp_send_nmi_ipi(int cpu, bool safe) } } -void smp_flush_nmi_ipi(u64 delay_us) -{ - unsigned long flags; - - nmi_ipi_lock_start(&flags); - while (nmi_ipi_busy_count) { - nmi_ipi_unlock_end(&flags); - udelay(1); - if (delay_us) { - delay_us--; - if (!delay_us) - return; - } - nmi_ipi_lock_start(&flags); - } - nmi_ipi_unlock_end(&flags); -} - /* * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS. * - fn is the target callback function. * - delay_us > 0 is the delay before giving up waiting for targets to - * enter the handler, == 0 specifies indefinite delay. + * complete executing the handler, == 0 specifies indefinite delay. */ int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool safe) { @@ -507,8 +490,23 @@ int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool do_smp_send_nmi_ipi(cpu, safe); + nmi_ipi_lock(); + /* nmi_ipi_busy_count is held here, so unlock/lock is okay */ while (!cpumask_empty(&nmi_ipi_pending_mask)) { + nmi_ipi_unlock(); udelay(1); + nmi_ipi_lock(); + if (delay_us) { + delay_us--; + if (!delay_us) + break; + } + } + + while (nmi_ipi_busy_count > 1) { + nmi_ipi_unlock(); + udelay(1); + nmi_ipi_lock(); if (delay_us) { delay_us--; if (!delay_us) @@ -516,12 +514,17 @@ int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool } } - nmi_ipi_lock(); if (!cpumask_empty(&nmi_ipi_pending_mask)) { - /* Could not gather all CPUs */ + /* Timeout waiting for CPUs to call smp_handle_nmi_ipi */ ret = 0; cpumask_clear(&nmi_ipi_pending_mask); } + if (nmi_ipi_busy_count > 1) { + /* Timeout waiting for CPUs to execute fn */ + ret = 0; + nmi_ipi_busy_count = 1; + } + nmi_ipi_busy_count--; nmi_ipi_unlock_end(&flags); @@ -597,7 +600,8 @@ static void nmi_stop_this_cpu(struct pt_regs *regs) * IRQs are already hard disabled by the smp_handle_nmi_ipi. */ nmi_ipi_lock(); - nmi_ipi_busy_count--; + if (nmi_ipi_busy_count > 1) + nmi_ipi_busy_count--; nmi_ipi_unlock(); spin_begin(); diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 1d82274f7e9f..3c6ab22a0c4e 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -174,7 +174,6 @@ static void watchdog_smp_panic(int cpu, u64 tb) continue; smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000); } - smp_flush_nmi_ipi(1000000); } /* Take the stuck CPUs out of the watch group */ -- cgit From 6d44acae1937b81cf8115ada8958e04f601f3f2e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 9 Jul 2018 16:25:21 +1000 Subject: powerpc64s: Show ori31 availability in spectre_v1 sysfs file not v2 When I added the spectre_v2 information in sysfs, I included the availability of the ori31 speculation barrier. Although the ori31 barrier can be used to mitigate v2, it's primarily intended as a spectre v1 mitigation. Spectre v2 is mitigated by hardware changes. So rework the sysfs files to show the ori31 information in the spectre_v1 file, rather than v2. Currently we display eg: $ grep . spectre_v* spectre_v1:Mitigation: __user pointer sanitization spectre_v2:Mitigation: Indirect branch cache disabled, ori31 speculation barrier enabled After: $ grep . spectre_v* spectre_v1:Mitigation: __user pointer sanitization, ori31 speculation barrier enabled spectre_v2:Mitigation: Indirect branch cache disabled Fixes: d6fbe1c55c55 ("powerpc/64s: Wire up cpu_show_spectre_v2()") Cc: stable@vger.kernel.org # v4.17+ Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/security.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index a8b277362931..4cb8f1f7b593 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -117,25 +117,35 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) { - if (!security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR)) - return sprintf(buf, "Not affected\n"); + struct seq_buf s; + + seq_buf_init(&s, buf, PAGE_SIZE - 1); - if (barrier_nospec_enabled) - return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + if (security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR)) { + if (barrier_nospec_enabled) + seq_buf_printf(&s, "Mitigation: __user pointer sanitization"); + else + seq_buf_printf(&s, "Vulnerable"); - return sprintf(buf, "Vulnerable\n"); + if (security_ftr_enabled(SEC_FTR_SPEC_BAR_ORI31)) + seq_buf_printf(&s, ", ori31 speculation barrier enabled"); + + seq_buf_printf(&s, "\n"); + } else + seq_buf_printf(&s, "Not affected\n"); + + return s.len; } ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { - bool bcs, ccd, ori; struct seq_buf s; + bool bcs, ccd; seq_buf_init(&s, buf, PAGE_SIZE - 1); bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED); ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED); - ori = security_ftr_enabled(SEC_FTR_SPEC_BAR_ORI31); if (bcs || ccd) { seq_buf_printf(&s, "Mitigation: "); @@ -151,9 +161,6 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c } else seq_buf_printf(&s, "Vulnerable"); - if (ori) - seq_buf_printf(&s, ", ori31 speculation barrier enabled"); - seq_buf_printf(&s, "\n"); return s.len; -- cgit From 8db0c9d416f26018cb7cabfb0b144f5108e29735 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 10 Jul 2018 00:24:25 +1000 Subject: powerpc: Add ppc32_allmodconfig defconfig target Because the allmodconfig logic just sets every symbol to M or Y, it has the effect of always generating a 64-bit config, because CONFIG_PPC64 becomes Y. So to make it easier for folks to test 32-bit code, provide a phony defconfig target that generates a 32-bit allmodconfig. The 32-bit port has several mutually exclusive CPU types, we choose the Book3S variants as that's what the help text in Kconfig says is most common. Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 5 +++++ arch/powerpc/configs/book3s_32.config | 2 ++ 2 files changed, 7 insertions(+) create mode 100644 arch/powerpc/configs/book3s_32.config diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 2ea575cb3401..2556c2182789 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -354,6 +354,11 @@ mpc86xx_smp_defconfig: $(call merge_into_defconfig,mpc86xx_basic_defconfig,\ 86xx-smp 86xx-hw fsl-emb-nonhw) +PHONY += ppc32_allmodconfig +ppc32_allmodconfig: + $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/book3s_32.config \ + -f $(srctree)/Makefile allmodconfig + define archhelp @echo '* zImage - Build default images selected by kernel config' @echo ' zImage.* - Compressed kernel image (arch/$(ARCH)/boot/zImage.*)' diff --git a/arch/powerpc/configs/book3s_32.config b/arch/powerpc/configs/book3s_32.config new file mode 100644 index 000000000000..8721eb7b1294 --- /dev/null +++ b/arch/powerpc/configs/book3s_32.config @@ -0,0 +1,2 @@ +CONFIG_PPC64=n +CONFIG_PPC_BOOK3S_32=y -- cgit From 64de5d8d0457c325e2226c9c7efb37b1f772076c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 10 Jul 2018 00:24:26 +1000 Subject: powerpc: Add ppc64le and ppc64_book3e allmodconfig targets Similarly as we just did for 32-bit, add phony targets for generating a little endian and Book3E allmodconfig. These aren't covered by the regular allmodconfig, which is big endian and Book3S due to the way the Kconfig symbols are structured. Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 2556c2182789..48e887f03a6c 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -359,6 +359,16 @@ ppc32_allmodconfig: $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/book3s_32.config \ -f $(srctree)/Makefile allmodconfig +PHONY += ppc64le_allmodconfig +ppc64le_allmodconfig: + $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/le.config \ + -f $(srctree)/Makefile allmodconfig + +PHONY += ppc64_book3e_allmodconfig +ppc64_book3e_allmodconfig: + $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/85xx-64bit.config \ + -f $(srctree)/Makefile allmodconfig + define archhelp @echo '* zImage - Build default images selected by kernel config' @echo ' zImage.* - Compressed kernel image (arch/$(ARCH)/boot/zImage.*)' -- cgit From 6aba0c84ec474534bbae3675e95464958a6c5713 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 21 Jun 2018 14:01:57 +0530 Subject: powerpc/mm: Check memblock_add against MAX_PHYSMEM_BITS range With SPARSEMEM config enabled, we make sure that we don't add sections beyond MAX_PHYSMEM_BITS range. This results in not building vmemmap mapping for range beyond max range. But our memblock layer looks the device tree and create mapping for the full memory range. Prevent this by checking against MAX_PHSYSMEM_BITS when doing memblock_add. We don't do similar check for memeblock_reserve_range. If reserve range is beyond MAX_PHYSMEM_BITS we expect that to be configured with 'nomap'. Any other reserved range should come from existing memblock ranges which we already filtered while adding. This avoids crash as below when running on a system with system ram config above MAX_PHSYSMEM_BITS Unable to handle kernel paging request for data at address 0xc00a001000000440 Faulting instruction address: 0xc000000001034118 cpu 0x0: Vector: 300 (Data Access) at [c00000000124fb30] pc: c000000001034118: __free_pages_bootmem+0xc0/0x1c0 lr: c00000000103b258: free_all_bootmem+0x19c/0x22c sp: c00000000124fdb0 msr: 9000000002001033 dar: c00a001000000440 dsisr: 40000000 current = 0xc00000000120dd00 paca = 0xc000000001f60000^I irqmask: 0x03^I irq_happened: 0x01 pid = 0, comm = swapper [c00000000124fe20] c00000000103b258 free_all_bootmem+0x19c/0x22c [c00000000124fee0] c000000001010a68 mem_init+0x3c/0x5c [c00000000124ff00] c00000000100401c start_kernel+0x298/0x5e4 [c00000000124ff90] c00000000000b57c start_here_common+0x1c/0x520 Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 05e7fb47a7a4..8f32f14ba508 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -440,6 +440,29 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node, return 1; } +/* + * Compare the range against max mem limit and update + * size if it cross the limit. + */ + +#ifdef CONFIG_SPARSEMEM +static bool validate_mem_limit(u64 base, u64 *size) +{ + u64 max_mem = 1UL << (MAX_PHYSMEM_BITS); + + if (base >= max_mem) + return false; + if ((base + *size) > max_mem) + *size = max_mem - base; + return true; +} +#else +static bool validate_mem_limit(u64 base, u64 *size) +{ + return true; +} +#endif + #ifdef CONFIG_PPC_PSERIES /* * Interpret the ibm dynamic reconfiguration memory LMBs. @@ -494,7 +517,8 @@ static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, } DBG("Adding: %llx -> %llx\n", base, size); - memblock_add(base, size); + if (validate_mem_limit(base, &size)) + memblock_add(base, size); } while (--rngs); } #endif /* CONFIG_PPC_PSERIES */ @@ -548,8 +572,10 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) } /* Add the chunk to the MEMBLOCK list */ - if (add_mem_to_memblock) - memblock_add(base, size); + if (add_mem_to_memblock) { + if (validate_mem_limit(base, &size)) + memblock_add(base, size); + } } static void __init early_reserve_mem_dt(void) -- cgit From 7d4340bb92a9df78e6e28152f3dd89d9bd82146b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 21 Jun 2018 14:01:58 +0530 Subject: powerpc/mm: Increase MAX_PHYSMEM_BITS to 128TB with SPARSEMEM_VMEMMAP config We do this only with VMEMMAP config so that our page_to_[nid/section] etc are not impacted. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/sparsemem.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index bc66712bdc3c..28f5dae25db6 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -6,13 +6,20 @@ #ifdef CONFIG_SPARSEMEM /* * SECTION_SIZE_BITS 2^N: how big each section will be - * MAX_PHYSADDR_BITS 2^N: how much physical address space we have * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space */ #define SECTION_SIZE_BITS 24 - -#define MAX_PHYSADDR_BITS 46 +/* + * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS + * if we increase SECTIONS_WIDTH we will not store node details in page->flags and + * page_to_nid does a page->section->node lookup + * Hence only increase for VMEMMAP. + */ +#ifdef CONFIG_SPARSEMEM_VMEMMAP +#define MAX_PHYSMEM_BITS 47 +#else #define MAX_PHYSMEM_BITS 46 +#endif #endif /* CONFIG_SPARSEMEM */ -- cgit From 1531cff44b5bb30c899404c044805ec60b2f3620 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Jun 2018 14:06:29 +0530 Subject: powerpc/mm/hash: Remove the superfluous bitwise operation when find hpte group When computing the starting slot number for a hash page table group we used to do this hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; Multiplying with 8 (HPTES_PER_GROUP) imply the last three bits are 0. Hence we really don't need to clear then separately. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/dump_hashpagetable.c | 2 +- arch/powerpc/mm/hash64_4k.c | 8 ++++---- arch/powerpc/mm/hash64_64k.c | 15 +++++++-------- arch/powerpc/mm/hash_utils_64.c | 10 ++++------ arch/powerpc/mm/hugepage-hash64.c | 9 ++++----- 5 files changed, 20 insertions(+), 24 deletions(-) diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c index 14cfb11b09d0..d241cb6518da 100644 --- a/arch/powerpc/mm/dump_hashpagetable.c +++ b/arch/powerpc/mm/dump_hashpagetable.c @@ -260,7 +260,7 @@ static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 * /* to check in the secondary hash table, we invert the hash */ if (!primary) hash = ~hash; - hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; /* see if we can find an entry in the hpte with this hash */ for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) { lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes); diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c index d573d7d07f25..6fa6765a10eb 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/hash64_4k.c @@ -80,7 +80,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, hash = hpt_hash(vpn, shift, ssize); repeat: - hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; /* Insert into the hash table, primary slot */ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, @@ -89,7 +89,7 @@ repeat: * Primary is full, try the secondary */ if (unlikely(slot == -1)) { - hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP; slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, HPTE_V_SECONDARY, @@ -97,8 +97,8 @@ repeat: MMU_PAGE_4K, ssize); if (slot == -1) { if (mftb() & 0x1) - hpte_group = ((hash & htab_hash_mask) * - HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (hash & htab_hash_mask) * + HPTES_PER_GROUP; mmu_hash_ops.hpte_remove(hpte_group); /* * FIXME!! Should be try the group from which we removed ? diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index e601d95c3b20..3afa253d7f52 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -154,7 +154,7 @@ htab_insert_hpte: } hash = hpt_hash(vpn, shift, ssize); repeat: - hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; /* Insert into the hash table, primary slot */ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, @@ -165,7 +165,7 @@ repeat: if (unlikely(slot == -1)) { bool soft_invalid; - hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP; slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, HPTE_V_SECONDARY, MMU_PAGE_4K, MMU_PAGE_4K, @@ -193,8 +193,7 @@ repeat: * that we do not get the same soft-invalid slot. */ if (soft_invalid || (mftb() & 0x1)) - hpte_group = ((hash & htab_hash_mask) * - HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; mmu_hash_ops.hpte_remove(hpte_group); /* @@ -288,7 +287,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, hash = hpt_hash(vpn, shift, ssize); repeat: - hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; /* Insert into the hash table, primary slot */ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, @@ -298,7 +297,7 @@ repeat: * Primary is full, try the secondary */ if (unlikely(slot == -1)) { - hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP; slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, HPTE_V_SECONDARY, @@ -306,8 +305,8 @@ repeat: MMU_PAGE_64K, ssize); if (slot == -1) { if (mftb() & 0x1) - hpte_group = ((hash & htab_hash_mask) * - HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (hash & htab_hash_mask) * + HPTES_PER_GROUP; mmu_hash_ops.hpte_remove(hpte_group); /* * FIXME!! Should be try the group from which we removed ? diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 5a72e980e25a..743c8dbe1941 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1753,8 +1753,7 @@ long hpte_insert_repeating(unsigned long hash, unsigned long vpn, long slot; repeat: - hpte_group = ((hash & htab_hash_mask) * - HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; /* Insert into the hash table, primary slot */ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, vflags, @@ -1762,15 +1761,14 @@ repeat: /* Primary is full, try the secondary */ if (unlikely(slot == -1)) { - hpte_group = ((~hash & htab_hash_mask) * - HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP; slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, vflags | HPTE_V_SECONDARY, psize, psize, ssize); if (slot == -1) { if (mftb() & 0x1) - hpte_group = ((hash & htab_hash_mask) * - HPTES_PER_GROUP)&~0x7UL; + hpte_group = (hash & htab_hash_mask) * + HPTES_PER_GROUP; mmu_hash_ops.hpte_remove(hpte_group); goto repeat; diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index f20d16f849c5..01f213d2bcb9 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -128,7 +128,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, new_pmd |= H_PAGE_HASHPTE; repeat: - hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; /* Insert into the hash table, primary slot */ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, @@ -137,16 +137,15 @@ repeat: * Primary is full, try the secondary */ if (unlikely(slot == -1)) { - hpte_group = ((~hash & htab_hash_mask) * - HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP; slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, HPTE_V_SECONDARY, psize, lpsize, ssize); if (slot == -1) { if (mftb() & 0x1) - hpte_group = ((hash & htab_hash_mask) * - HPTES_PER_GROUP) & ~0x7UL; + hpte_group = (hash & htab_hash_mask) * + HPTES_PER_GROUP; mmu_hash_ops.hpte_remove(hpte_group); goto repeat; -- cgit From a833280b4aeeb3152369a3d5bcdc09447fea8358 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Jun 2018 14:06:30 +0530 Subject: powerpc/mm/hash: Add hpte_get_old_v and use that instead of opencoding No functional change Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 10 ++++++++++ arch/powerpc/mm/hash_native_64.c | 27 +++++++-------------------- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 50ed64fba4ae..eee0b5b8a23f 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -364,6 +364,16 @@ static inline unsigned long hpte_new_to_old_r(unsigned long r) return r & ~HPTE_R_3_0_SSIZE_MASK; } +static inline unsigned long hpte_get_old_v(struct hash_pte *hptep) +{ + unsigned long hpte_v; + + hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); + return hpte_v; +} + /* * This function sets the AVPN and L fields of the HPTE appropriately * using the base page size and actual page size. diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 1d049c78c82a..68e6eaf41bb9 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -423,9 +423,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)", vpn, want_v & HPTE_V_AVPN, slot, newpp); - hpte_v = be64_to_cpu(hptep->v); - if (cpu_has_feature(CPU_FTR_ARCH_300)) - hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); + hpte_v = hpte_get_old_v(hptep); /* * We need to invalidate the TLB always because hpte_remove doesn't do * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less @@ -439,9 +437,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, } else { native_lock_hpte(hptep); /* recheck with locks held */ - hpte_v = be64_to_cpu(hptep->v); - if (cpu_has_feature(CPU_FTR_ARCH_300)) - hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); + hpte_v = hpte_get_old_v(hptep); if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))) { ret = -1; @@ -481,11 +477,9 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize) /* Bolted mappings are only ever in the primary group */ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; for (i = 0; i < HPTES_PER_GROUP; i++) { - hptep = htab_address + slot; - hpte_v = be64_to_cpu(hptep->v); - if (cpu_has_feature(CPU_FTR_ARCH_300)) - hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); + hptep = htab_address + slot; + hpte_v = hpte_get_old_v(hptep); if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) /* HPTE matches */ return slot; @@ -575,9 +569,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, want_v = hpte_encode_avpn(vpn, bpsize, ssize); native_lock_hpte(hptep); - hpte_v = be64_to_cpu(hptep->v); - if (cpu_has_feature(CPU_FTR_ARCH_300)) - hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); + hpte_v = hpte_get_old_v(hptep); /* * We need to invalidate the TLB always because hpte_remove doesn't do @@ -635,9 +627,7 @@ static void native_hugepage_invalidate(unsigned long vsid, hptep = htab_address + slot; want_v = hpte_encode_avpn(vpn, psize, ssize); native_lock_hpte(hptep); - hpte_v = be64_to_cpu(hptep->v); - if (cpu_has_feature(CPU_FTR_ARCH_300)) - hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); + hpte_v = hpte_get_old_v(hptep); /* Even if we miss, we need to invalidate the TLB */ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) @@ -813,10 +803,7 @@ static void native_flush_hash_range(unsigned long number, int local) hptep = htab_address + slot; want_v = hpte_encode_avpn(vpn, psize, ssize); native_lock_hpte(hptep); - hpte_v = be64_to_cpu(hptep->v); - if (cpu_has_feature(CPU_FTR_ARCH_300)) - hpte_v = hpte_new_to_old_v(hpte_v, - be64_to_cpu(hptep->r)); + hpte_v = hpte_get_old_v(hptep); if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) native_unlock_hpte(hptep); -- cgit From 27d8959da712771e2038ec77318fe8b2b8591e28 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Jun 2018 14:06:31 +0530 Subject: powerpc/mm/hash: Reduce contention on hpte lock We do this in some part. This patch make sure we always try to search for hpte without holding lock and redo the compare with lock held once match found. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_native_64.c | 49 +++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 68e6eaf41bb9..ffbd5ed4e8de 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -568,9 +568,19 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot); want_v = hpte_encode_avpn(vpn, bpsize, ssize); - native_lock_hpte(hptep); hpte_v = hpte_get_old_v(hptep); + if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) { + native_lock_hpte(hptep); + /* recheck with locks held */ + hpte_v = hpte_get_old_v(hptep); + + if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) + /* Invalidate the hpte. NOTE: this also unlocks it */ + hptep->v = 0; + else + native_unlock_hpte(hptep); + } /* * We need to invalidate the TLB always because hpte_remove doesn't do * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less @@ -578,13 +588,6 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, * (hpte_remove) because we assume the old translation is still * technically "valid". */ - if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) - native_unlock_hpte(hptep); - else - /* Invalidate the hpte. NOTE: this also unlocks it */ - hptep->v = 0; - - /* Invalidate the TLB */ tlbie(vpn, bpsize, apsize, ssize, local); local_irq_restore(flags); @@ -626,15 +629,23 @@ static void native_hugepage_invalidate(unsigned long vsid, hptep = htab_address + slot; want_v = hpte_encode_avpn(vpn, psize, ssize); - native_lock_hpte(hptep); hpte_v = hpte_get_old_v(hptep); /* Even if we miss, we need to invalidate the TLB */ - if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) - native_unlock_hpte(hptep); - else - /* Invalidate the hpte. NOTE: this also unlocks it */ - hptep->v = 0; + if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) { + /* recheck with locks held */ + native_lock_hpte(hptep); + hpte_v = hpte_get_old_v(hptep); + + if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) { + /* + * Invalidate the hpte. NOTE: this also unlocks it + */ + + hptep->v = 0; + } else + native_unlock_hpte(hptep); + } /* * We need to do tlb invalidate for all the address, tlbie * instruction compares entry_VA in tlb with the VA specified @@ -802,13 +813,19 @@ static void native_flush_hash_range(unsigned long number, int local) slot += hidx & _PTEIDX_GROUP_IX; hptep = htab_address + slot; want_v = hpte_encode_avpn(vpn, psize, ssize); + hpte_v = hpte_get_old_v(hptep); + + if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) + continue; + /* lock and try again */ native_lock_hpte(hptep); hpte_v = hpte_get_old_v(hptep); - if (!HPTE_V_COMPARE(hpte_v, want_v) || - !(hpte_v & HPTE_V_VALID)) + + if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) native_unlock_hpte(hptep); else hptep->v = 0; + } pte_iterate_hashed_end(); } -- cgit From 65471d763e9178a8dc6f05db1c9c09c292f75c28 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Jun 2018 14:09:04 +0530 Subject: powerpc/pseries: Use pr_xxx() in lpar.c Switch from printk to pr_fmt() / pr_xxx(). Signed-off-by: Aneesh Kumar K.V [mpe: Split out of larger patch] Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/lpar.c | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 5a392e40f3d2..ed7e95f1dc0c 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -21,6 +21,7 @@ /* Enables debugging of low-level hash table routines - careful! */ #undef DEBUG +#define pr_fmt(fmt) "lpar: " fmt #include #include @@ -612,8 +613,8 @@ static int __init disable_bulk_remove(char *str) { if (strcmp(str, "off") == 0 && firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { - printk(KERN_INFO "Disabling BULK_REMOVE firmware feature"); - powerpc_firmware_features &= ~FW_FEATURE_BULK_REMOVE; + pr_info("Disabling BULK_REMOVE firmware feature"); + powerpc_firmware_features &= ~FW_FEATURE_BULK_REMOVE; } return 1; } @@ -659,8 +660,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift) if (!firmware_has_feature(FW_FEATURE_HPT_RESIZE)) return -ENODEV; - printk(KERN_INFO "lpar: Attempting to resize HPT to shift %lu\n", - shift); + pr_info("Attempting to resize HPT to shift %lu\n", shift); t0 = ktime_get(); @@ -672,8 +672,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift) /* prepare with shift==0 cancels an in-progress resize */ rc = plpar_resize_hpt_prepare(0, 0); if (rc != H_SUCCESS) - printk(KERN_WARNING - "lpar: Unexpected error %d cancelling timed out HPT resize\n", + pr_warn("Unexpected error %d cancelling timed out HPT resize\n", rc); return -ETIMEDOUT; } @@ -691,9 +690,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift) case H_RESOURCE: return -EPERM; default: - printk(KERN_WARNING - "lpar: Unexpected error %d from H_RESIZE_HPT_PREPARE\n", - rc); + pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc); return -EIO; } @@ -706,22 +703,19 @@ static int pseries_lpar_resize_hpt(unsigned long shift) if (rc != 0) { switch (state.commit_rc) { case H_PTEG_FULL: - printk(KERN_WARNING - "lpar: Hash collision while resizing HPT\n"); + pr_warn("Hash collision while resizing HPT\n"); return -ENOSPC; default: - printk(KERN_WARNING - "lpar: Unexpected error %d from H_RESIZE_HPT_COMMIT\n", - state.commit_rc); + pr_warn("Unexpected error %d from H_RESIZE_HPT_COMMIT\n", + state.commit_rc); return -EIO; }; } - printk(KERN_INFO - "lpar: HPT resize to shift %lu complete (%lld ms / %lld ms)\n", - shift, (long long) ktime_ms_delta(t1, t0), - (long long) ktime_ms_delta(t2, t1)); + pr_info("HPT resize to shift %lu complete (%lld ms / %lld ms)\n", + shift, (long long) ktime_ms_delta(t1, t0), + (long long) ktime_ms_delta(t2, t1)); return 0; } @@ -785,13 +779,13 @@ static int __init cmo_free_hint(char *str) parm = strstrip(str); if (strcasecmp(parm, "no") == 0 || strcasecmp(parm, "off") == 0) { - printk(KERN_INFO "cmo_free_hint: CMO free page hinting is not active.\n"); + pr_info("%s: CMO free page hinting is not active.\n", __func__); cmo_free_hint_flag = 0; return 1; } cmo_free_hint_flag = 1; - printk(KERN_INFO "cmo_free_hint: CMO free page hinting is active.\n"); + pr_info("%s: CMO free page hinting is active.\n", __func__); if (strcasecmp(parm, "yes") == 0 || strcasecmp(parm, "on") == 0) return 1; -- cgit From ca42d8d2d6c55822fa8f1d230ffa3b78824fb60c Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Jun 2018 14:09:04 +0530 Subject: powerpc/pseries/mm: Improve error reporting on HCALL failures This patch adds error reporting to H_ENTER and H_READ hcalls. A failure for both these hcalls are mostly fatal and it would be good to log the failure reason. Signed-off-by: Aneesh Kumar K.V [mpe: Split out of larger patch] Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/lpar.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index ed7e95f1dc0c..52eeff1297f4 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -166,8 +166,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot); if (unlikely(lpar_rc == H_PTEG_FULL)) { - if (!(vflags & HPTE_V_BOLTED)) - pr_devel(" full\n"); + pr_devel("Hash table group is full\n"); return -1; } @@ -177,8 +176,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, * or we will loop forever, so return -2 in this case. */ if (unlikely(lpar_rc != H_SUCCESS)) { - if (!(vflags & HPTE_V_BOLTED)) - pr_devel(" lpar err %ld\n", lpar_rc); + pr_err("Failed hash pte insert with error %ld\n", lpar_rc); return -2; } if (!(vflags & HPTE_V_BOLTED)) @@ -241,8 +239,11 @@ static void manual_hpte_clear_all(void) */ for (i = 0; i < hpte_count; i += 4) { lpar_rc = plpar_pte_read_4_raw(0, i, (void *)ptes); - if (lpar_rc != H_SUCCESS) + if (lpar_rc != H_SUCCESS) { + pr_info("Failed to read hash page table at %ld err %ld\n", + i, lpar_rc); continue; + } for (j = 0; j < 4; j++){ if ((ptes[j].pteh & HPTE_V_VRMA_MASK) == HPTE_V_VRMA_MASK) @@ -341,8 +342,11 @@ static long __pSeries_lpar_hpte_find(unsigned long want_v, unsigned long hpte_gr for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) { lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes); - if (lpar_rc != H_SUCCESS) + if (lpar_rc != H_SUCCESS) { + pr_info("Failed to read hash page table at %ld err %ld\n", + hpte_group, lpar_rc); continue; + } for (j = 0; j < 4; j++) { if (HPTE_V_COMPARE(ptes[j].pteh, want_v) && -- cgit From 2d9ee327adce5f6becea2dd51d282a6183e40b0f Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Thu, 7 Jun 2018 09:57:51 +0800 Subject: powerpc/64: Align bytes before fall back to .Lshort in powerpc64 memcmp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently memcmp() 64bytes version in powerpc will fall back to .Lshort (compare per byte mode) if either src or dst address is not 8 bytes aligned. It can be opmitized in 2 situations: 1) if both addresses are with the same offset with 8 bytes boundary: memcmp() can compare the unaligned bytes within 8 bytes boundary firstly and then compare the rest 8-bytes-aligned content with .Llong mode. 2) If src/dst addrs are not with the same offset of 8 bytes boundary: memcmp() can align src addr with 8 bytes, increment dst addr accordingly, then load src with aligned mode and load dst with unaligned mode. This patch optmizes memcmp() behavior in the above 2 situations. Tested with both little/big endian. Performance result below is based on little endian. Following is the test result with src/dst having the same offset case: (a similar result was observed when src/dst having different offset): (1) 256 bytes Test with the existing tools/testing/selftests/powerpc/stringloops/memcmp: - without patch 29.773018302 seconds time elapsed ( +- 0.09% ) - with patch 16.485568173 seconds time elapsed ( +- 0.02% ) -> There is ~+80% percent improvement (2) 32 bytes To observe performance impact on < 32 bytes, modify tools/testing/selftests/powerpc/stringloops/memcmp.c with following: ------- #include #include "utils.h" -#define SIZE 256 +#define SIZE 32 #define ITERATIONS 10000 int test_memcmp(const void *s1, const void *s2, size_t n); -------- - Without patch 0.244746482 seconds time elapsed ( +- 0.36%) - with patch 0.215069477 seconds time elapsed ( +- 0.51%) -> There is ~+13% improvement (3) 0~8 bytes To observe <8 bytes performance impact, modify tools/testing/selftests/powerpc/stringloops/memcmp.c with following: ------- #include #include "utils.h" -#define SIZE 256 -#define ITERATIONS 10000 +#define SIZE 8 +#define ITERATIONS 1000000 int test_memcmp(const void *s1, const void *s2, size_t n); ------- - Without patch 1.845642503 seconds time elapsed ( +- 0.12% ) - With patch 1.849767135 seconds time elapsed ( +- 0.26% ) -> They are nearly the same. (-0.2%) Signed-off-by: Simon Guo Signed-off-by: Michael Ellerman --- arch/powerpc/lib/memcmp_64.S | 140 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 133 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S index d75d18b7bd55..5776f91da29e 100644 --- a/arch/powerpc/lib/memcmp_64.S +++ b/arch/powerpc/lib/memcmp_64.S @@ -24,28 +24,41 @@ #define rH r31 #ifdef __LITTLE_ENDIAN__ +#define LH lhbrx +#define LW lwbrx #define LD ldbrx #else +#define LH lhzx +#define LW lwzx #define LD ldx #endif +/* + * There are 2 categories for memcmp: + * 1) src/dst has the same offset to the 8 bytes boundary. The handlers + * are named like .Lsameoffset_xxxx + * 2) src/dst has different offset to the 8 bytes boundary. The handlers + * are named like .Ldiffoffset_xxxx + */ _GLOBAL(memcmp) cmpdi cr1,r5,0 - /* Use the short loop if both strings are not 8B aligned */ - or r6,r3,r4 + /* Use the short loop if the src/dst addresses are not + * with the same offset of 8 bytes align boundary. + */ + xor r6,r3,r4 andi. r6,r6,7 - /* Use the short loop if length is less than 32B */ - cmpdi cr6,r5,31 + /* Fall back to short loop if compare at aligned addrs + * with less than 8 bytes. + */ + cmpdi cr6,r5,7 beq cr1,.Lzero - bne .Lshort - bgt cr6,.Llong + bgt cr6,.Lno_short .Lshort: mtctr r5 - 1: lbz rA,0(r3) lbz rB,0(r4) subf. rC,rB,rA @@ -78,11 +91,89 @@ _GLOBAL(memcmp) li r3,0 blr +.Lno_short: + dcbt 0,r3 + dcbt 0,r4 + bne .Ldiffoffset_8bytes_make_align_start + + +.Lsameoffset_8bytes_make_align_start: + /* attempt to compare bytes not aligned with 8 bytes so that + * rest comparison can run based on 8 bytes alignment. + */ + andi. r6,r3,7 + + /* Try to compare the first double word which is not 8 bytes aligned: + * load the first double word at (src & ~7UL) and shift left appropriate + * bits before comparision. + */ + rlwinm r6,r3,3,26,28 + beq .Lsameoffset_8bytes_aligned + clrrdi r3,r3,3 + clrrdi r4,r4,3 + LD rA,0,r3 + LD rB,0,r4 + sld rA,rA,r6 + sld rB,rB,r6 + cmpld cr0,rA,rB + srwi r6,r6,3 + bne cr0,.LcmpAB_lightweight + subfic r6,r6,8 + subf. r5,r6,r5 + addi r3,r3,8 + addi r4,r4,8 + beq .Lzero + +.Lsameoffset_8bytes_aligned: + /* now we are aligned with 8 bytes. + * Use .Llong loop if left cmp bytes are equal or greater than 32B. + */ + cmpdi cr6,r5,31 + bgt cr6,.Llong + +.Lcmp_lt32bytes: + /* compare 1 ~ 32 bytes, at least r3 addr is 8 bytes aligned now */ + cmpdi cr5,r5,7 + srdi r0,r5,3 + ble cr5,.Lcmp_rest_lt8bytes + + /* handle 8 ~ 31 bytes */ + clrldi r5,r5,61 + mtctr r0 +2: + LD rA,0,r3 + LD rB,0,r4 + cmpld cr0,rA,rB + addi r3,r3,8 + addi r4,r4,8 + bne cr0,.LcmpAB_lightweight + bdnz 2b + + cmpwi r5,0 + beq .Lzero + +.Lcmp_rest_lt8bytes: + /* Here we have only less than 8 bytes to compare with. at least s1 + * Address is aligned with 8 bytes. + * The next double words are load and shift right with appropriate + * bits. + */ + subfic r6,r5,8 + slwi r6,r6,3 + LD rA,0,r3 + LD rB,0,r4 + srd rA,rA,r6 + srd rB,rB,r6 + cmpld cr0,rA,rB + bne cr0,.LcmpAB_lightweight + b .Lzero + .Lnon_zero: mr r3,rC blr .Llong: + /* At least s1 addr is aligned with 8 bytes */ li off8,8 li off16,16 li off24,24 @@ -232,4 +323,39 @@ _GLOBAL(memcmp) ld r28,-32(r1) ld r27,-40(r1) blr + +.LcmpAB_lightweight: /* skip NV GPRS restore */ + li r3,1 + bgtlr + li r3,-1 + blr + +.Ldiffoffset_8bytes_make_align_start: + /* now try to align s1 with 8 bytes */ + rlwinm r6,r3,3,26,28 + beq .Ldiffoffset_align_s1_8bytes + + clrrdi r3,r3,3 + LD rA,0,r3 + LD rB,0,r4 /* unaligned load */ + sld rA,rA,r6 + srd rA,rA,r6 + srd rB,rB,r6 + cmpld cr0,rA,rB + srwi r6,r6,3 + bne cr0,.LcmpAB_lightweight + + subfic r6,r6,8 + subf. r5,r6,r5 + addi r3,r3,8 + add r4,r4,r6 + + beq .Lzero + +.Ldiffoffset_align_s1_8bytes: + /* now s1 is aligned with 8 bytes. */ + cmpdi cr5,r5,31 + ble cr5,.Lcmp_lt32bytes + b .Llong + EXPORT_SYMBOL(memcmp) -- cgit From f1ecbaf466be5a7a5c666b41eede6991caff8646 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Thu, 7 Jun 2018 09:57:52 +0800 Subject: powerpc: add vcmpequd/vcmpequb ppc instruction macro Some old tool chains don't know about instructions like vcmpequd. This patch adds .long macro for vcmpequd and vcmpequb, which is a preparation to optimize ppc64 memcmp with VMX instructions. Signed-off-by: Simon Guo Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 4436887bc415..c103caf99897 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -367,6 +367,8 @@ #define PPC_INST_STFDX 0x7c0005ae #define PPC_INST_LVX 0x7c0000ce #define PPC_INST_STVX 0x7c0001ce +#define PPC_INST_VCMPEQUD 0x100000c7 +#define PPC_INST_VCMPEQUB 0x10000006 /* macros to insert fields into opcodes */ #define ___PPC_RA(a) (((a) & 0x1f) << 16) @@ -397,6 +399,7 @@ #define __PPC_BI(s) (((s) & 0x1f) << 16) #define __PPC_CT(t) (((t) & 0x0f) << 21) #define __PPC_SPR(r) ((((r) & 0x1f) << 16) | ((((r) >> 5) & 0x1f) << 11)) +#define __PPC_RC21 (0x1 << 10) /* * Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a @@ -568,4 +571,12 @@ ((IH & 0x7) << 21)) #define PPC_INVALIDATE_ERAT PPC_SLBIA(7) +#define VCMPEQUD_RC(vrt, vra, vrb) stringify_in_c(.long PPC_INST_VCMPEQUD | \ + ___PPC_RT(vrt) | ___PPC_RA(vra) | \ + ___PPC_RB(vrb) | __PPC_RC21) + +#define VCMPEQUB_RC(vrt, vra, vrb) stringify_in_c(.long PPC_INST_VCMPEQUB | \ + ___PPC_RT(vrt) | ___PPC_RA(vra) | \ + ___PPC_RB(vrb) | __PPC_RC21) + #endif /* _ASM_POWERPC_PPC_OPCODE_H */ -- cgit From d58badfb7cf1792ab4f1d0cd7896d733b85d650f Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Thu, 7 Jun 2018 09:57:53 +0800 Subject: powerpc/64: enhance memcmp() with VMX instruction for long bytes comparision This patch add VMX primitives to do memcmp() in case the compare size is equal or greater than 4K bytes. KSM feature can benefit from this. Test result with following test program(replace the "^>" with ""): ------ ># cat tools/testing/selftests/powerpc/stringloops/memcmp.c >#include >#include >#include >#include >#include "utils.h" >#define SIZE (1024 * 1024 * 900) >#define ITERATIONS 40 int test_memcmp(const void *s1, const void *s2, size_t n); static int testcase(void) { char *s1; char *s2; unsigned long i; s1 = memalign(128, SIZE); if (!s1) { perror("memalign"); exit(1); } s2 = memalign(128, SIZE); if (!s2) { perror("memalign"); exit(1); } for (i = 0; i < SIZE; i++) { s1[i] = i & 0xff; s2[i] = i & 0xff; } for (i = 0; i < ITERATIONS; i++) { int ret = test_memcmp(s1, s2, SIZE); if (ret) { printf("return %d at[%ld]! should have returned zero\n", ret, i); abort(); } } return 0; } int main(void) { return test_harness(testcase, "memcmp"); } ------ Without this patch (but with the first patch "powerpc/64: Align bytes before fall back to .Lshort in powerpc64 memcmp()." in the series): 4.726728762 seconds time elapsed ( +- 3.54%) With VMX patch: 4.234335473 seconds time elapsed ( +- 2.63%) There is ~+10% improvement. Testing with unaligned and different offset version (make s1 and s2 shift random offset within 16 bytes) can archieve higher improvement than 10%.. Signed-off-by: Simon Guo Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/asm-prototypes.h | 4 +- arch/powerpc/lib/copypage_power7.S | 4 +- arch/powerpc/lib/memcmp_64.S | 241 +++++++++++++++++++++++++++++- arch/powerpc/lib/memcpy_power7.S | 6 +- arch/powerpc/lib/vmx-helper.c | 4 +- 5 files changed, 248 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 7841b8a60657..769567b66c0c 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -48,8 +48,8 @@ void __trace_opal_exit(long opcode, unsigned long retval); /* VMX copying */ int enter_vmx_usercopy(void); int exit_vmx_usercopy(void); -int enter_vmx_copy(void); -void * exit_vmx_copy(void *dest); +int enter_vmx_ops(void); +void *exit_vmx_ops(void *dest); /* Traps */ long machine_check_early(struct pt_regs *regs); diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S index 8fa73b7ab20e..e38f956f7d9f 100644 --- a/arch/powerpc/lib/copypage_power7.S +++ b/arch/powerpc/lib/copypage_power7.S @@ -57,7 +57,7 @@ _GLOBAL(copypage_power7) std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) - bl enter_vmx_copy + bl enter_vmx_ops cmpwi r3,0 ld r0,STACKFRAMESIZE+16(r1) ld r3,STK_REG(R31)(r1) @@ -100,7 +100,7 @@ _GLOBAL(copypage_power7) addi r3,r3,128 bdnz 1b - b exit_vmx_copy /* tail call optimise */ + b exit_vmx_ops /* tail call optimise */ #else li r0,(PAGE_SIZE/128) diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S index 5776f91da29e..be2f7925926b 100644 --- a/arch/powerpc/lib/memcmp_64.S +++ b/arch/powerpc/lib/memcmp_64.S @@ -9,6 +9,7 @@ */ #include #include +#include #define off8 r6 #define off16 r7 @@ -27,12 +28,73 @@ #define LH lhbrx #define LW lwbrx #define LD ldbrx +#define LVS lvsr +#define VPERM(_VRT,_VRA,_VRB,_VRC) \ + vperm _VRT,_VRB,_VRA,_VRC #else #define LH lhzx #define LW lwzx #define LD ldx +#define LVS lvsl +#define VPERM(_VRT,_VRA,_VRB,_VRC) \ + vperm _VRT,_VRA,_VRB,_VRC #endif +#define VMX_THRESH 4096 +#define ENTER_VMX_OPS \ + mflr r0; \ + std r3,-STACKFRAMESIZE+STK_REG(R31)(r1); \ + std r4,-STACKFRAMESIZE+STK_REG(R30)(r1); \ + std r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \ + std r0,16(r1); \ + stdu r1,-STACKFRAMESIZE(r1); \ + bl enter_vmx_ops; \ + cmpwi cr1,r3,0; \ + ld r0,STACKFRAMESIZE+16(r1); \ + ld r3,STK_REG(R31)(r1); \ + ld r4,STK_REG(R30)(r1); \ + ld r5,STK_REG(R29)(r1); \ + addi r1,r1,STACKFRAMESIZE; \ + mtlr r0 + +#define EXIT_VMX_OPS \ + mflr r0; \ + std r3,-STACKFRAMESIZE+STK_REG(R31)(r1); \ + std r4,-STACKFRAMESIZE+STK_REG(R30)(r1); \ + std r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \ + std r0,16(r1); \ + stdu r1,-STACKFRAMESIZE(r1); \ + bl exit_vmx_ops; \ + ld r0,STACKFRAMESIZE+16(r1); \ + ld r3,STK_REG(R31)(r1); \ + ld r4,STK_REG(R30)(r1); \ + ld r5,STK_REG(R29)(r1); \ + addi r1,r1,STACKFRAMESIZE; \ + mtlr r0 + +/* + * LD_VSR_CROSS16B load the 2nd 16 bytes for _vaddr which is unaligned with + * 16 bytes boundary and permute the result with the 1st 16 bytes. + + * | y y y y y y y y y y y y y 0 1 2 | 3 4 5 6 7 8 9 a b c d e f z z z | + * ^ ^ ^ + * 0xbbbb10 0xbbbb20 0xbbb30 + * ^ + * _vaddr + * + * + * _vmask is the mask generated by LVS + * _v1st_qw is the 1st aligned QW of current addr which is already loaded. + * for example: 0xyyyyyyyyyyyyy012 for big endian + * _v2nd_qw is the 2nd aligned QW of cur _vaddr to be loaded. + * for example: 0x3456789abcdefzzz for big endian + * The permute result is saved in _v_res. + * for example: 0x0123456789abcdef for big endian. + */ +#define LD_VSR_CROSS16B(_vaddr,_vmask,_v1st_qw,_v2nd_qw,_v_res) \ + lvx _v2nd_qw,_vaddr,off16; \ + VPERM(_v_res,_v1st_qw,_v2nd_qw,_vmask) + /* * There are 2 categories for memcmp: * 1) src/dst has the same offset to the 8 bytes boundary. The handlers @@ -40,7 +102,7 @@ * 2) src/dst has different offset to the 8 bytes boundary. The handlers * are named like .Ldiffoffset_xxxx */ -_GLOBAL(memcmp) +_GLOBAL_TOC(memcmp) cmpdi cr1,r5,0 /* Use the short loop if the src/dst addresses are not @@ -132,7 +194,7 @@ _GLOBAL(memcmp) bgt cr6,.Llong .Lcmp_lt32bytes: - /* compare 1 ~ 32 bytes, at least r3 addr is 8 bytes aligned now */ + /* compare 1 ~ 31 bytes, at least r3 addr is 8 bytes aligned now */ cmpdi cr5,r5,7 srdi r0,r5,3 ble cr5,.Lcmp_rest_lt8bytes @@ -173,6 +235,15 @@ _GLOBAL(memcmp) blr .Llong: +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + /* Try to use vmx loop if length is equal or greater than 4K */ + cmpldi cr6,r5,VMX_THRESH + bge cr6,.Lsameoffset_vmx_cmp +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + +.Llong_novmx_cmp: +#endif /* At least s1 addr is aligned with 8 bytes */ li off8,8 li off16,16 @@ -330,7 +401,97 @@ _GLOBAL(memcmp) li r3,-1 blr +#ifdef CONFIG_ALTIVEC +.Lsameoffset_vmx_cmp: + /* Enter with src/dst addrs has the same offset with 8 bytes + * align boundary + */ + ENTER_VMX_OPS + beq cr1,.Llong_novmx_cmp + +3: + /* need to check whether r4 has the same offset with r3 + * for 16 bytes boundary. + */ + xor r0,r3,r4 + andi. r0,r0,0xf + bne .Ldiffoffset_vmx_cmp_start + + /* len is no less than 4KB. Need to align with 16 bytes further. + */ + andi. rA,r3,8 + LD rA,0,r3 + beq 4f + LD rB,0,r4 + cmpld cr0,rA,rB + addi r3,r3,8 + addi r4,r4,8 + addi r5,r5,-8 + + beq cr0,4f + /* save and restore cr0 */ + mfocrf r5,128 + EXIT_VMX_OPS + mtocrf 128,r5 + b .LcmpAB_lightweight + +4: + /* compare 32 bytes for each loop */ + srdi r0,r5,5 + mtctr r0 + clrldi r5,r5,59 + li off16,16 + +.balign 16 +5: + lvx v0,0,r3 + lvx v1,0,r4 + VCMPEQUD_RC(v0,v0,v1) + bnl cr6,7f + lvx v0,off16,r3 + lvx v1,off16,r4 + VCMPEQUD_RC(v0,v0,v1) + bnl cr6,6f + addi r3,r3,32 + addi r4,r4,32 + bdnz 5b + + EXIT_VMX_OPS + cmpdi r5,0 + beq .Lzero + b .Lcmp_lt32bytes + +6: + addi r3,r3,16 + addi r4,r4,16 + +7: + /* diff the last 16 bytes */ + EXIT_VMX_OPS + LD rA,0,r3 + LD rB,0,r4 + cmpld cr0,rA,rB + li off8,8 + bne cr0,.LcmpAB_lightweight + + LD rA,off8,r3 + LD rB,off8,r4 + cmpld cr0,rA,rB + bne cr0,.LcmpAB_lightweight + b .Lzero +#endif + .Ldiffoffset_8bytes_make_align_start: +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + /* only do vmx ops when the size equal or greater than 4K bytes */ + cmpdi cr5,r5,VMX_THRESH + bge cr5,.Ldiffoffset_vmx_cmp +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + +.Ldiffoffset_novmx_cmp: +#endif + /* now try to align s1 with 8 bytes */ rlwinm r6,r3,3,26,28 beq .Ldiffoffset_align_s1_8bytes @@ -356,6 +517,82 @@ _GLOBAL(memcmp) /* now s1 is aligned with 8 bytes. */ cmpdi cr5,r5,31 ble cr5,.Lcmp_lt32bytes + +#ifdef CONFIG_ALTIVEC + b .Llong_novmx_cmp +#else b .Llong +#endif + +#ifdef CONFIG_ALTIVEC +.Ldiffoffset_vmx_cmp: + ENTER_VMX_OPS + beq cr1,.Ldiffoffset_novmx_cmp + +.Ldiffoffset_vmx_cmp_start: + /* Firstly try to align r3 with 16 bytes */ + andi. r6,r3,0xf + li off16,16 + beq .Ldiffoffset_vmx_s1_16bytes_align + LVS v3,0,r3 + LVS v4,0,r4 + + lvx v5,0,r3 + lvx v6,0,r4 + LD_VSR_CROSS16B(r3,v3,v5,v7,v9) + LD_VSR_CROSS16B(r4,v4,v6,v8,v10) + + VCMPEQUB_RC(v7,v9,v10) + bnl cr6,.Ldiffoffset_vmx_diff_found + + subfic r6,r6,16 + subf r5,r6,r5 + add r3,r3,r6 + add r4,r4,r6 + +.Ldiffoffset_vmx_s1_16bytes_align: + /* now s1 is aligned with 16 bytes */ + lvx v6,0,r4 + LVS v4,0,r4 + srdi r6,r5,5 /* loop for 32 bytes each */ + clrldi r5,r5,59 + mtctr r6 + +.balign 16 +.Ldiffoffset_vmx_32bytesloop: + /* the first qw of r4 was saved in v6 */ + lvx v9,0,r3 + LD_VSR_CROSS16B(r4,v4,v6,v8,v10) + VCMPEQUB_RC(v7,v9,v10) + vor v6,v8,v8 + bnl cr6,.Ldiffoffset_vmx_diff_found + + addi r3,r3,16 + addi r4,r4,16 + + lvx v9,0,r3 + LD_VSR_CROSS16B(r4,v4,v6,v8,v10) + VCMPEQUB_RC(v7,v9,v10) + vor v6,v8,v8 + bnl cr6,.Ldiffoffset_vmx_diff_found + + addi r3,r3,16 + addi r4,r4,16 + + bdnz .Ldiffoffset_vmx_32bytesloop + + EXIT_VMX_OPS + + cmpdi r5,0 + beq .Lzero + b .Lcmp_lt32bytes + +.Ldiffoffset_vmx_diff_found: + EXIT_VMX_OPS + /* anyway, the diff will appear in next 16 bytes */ + li r5,16 + b .Lcmp_lt32bytes + +#endif EXPORT_SYMBOL(memcmp) diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S index df7de9d3da08..070cdf6f584f 100644 --- a/arch/powerpc/lib/memcpy_power7.S +++ b/arch/powerpc/lib/memcpy_power7.S @@ -230,7 +230,7 @@ _GLOBAL(memcpy_power7) std r5,-STACKFRAMESIZE+STK_REG(R29)(r1) std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) - bl enter_vmx_copy + bl enter_vmx_ops cmpwi cr1,r3,0 ld r0,STACKFRAMESIZE+16(r1) ld r3,STK_REG(R31)(r1) @@ -445,7 +445,7 @@ _GLOBAL(memcpy_power7) 15: addi r1,r1,STACKFRAMESIZE ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) - b exit_vmx_copy /* tail call optimise */ + b exit_vmx_ops /* tail call optimise */ .Lvmx_unaligned_copy: /* Get the destination 16B aligned */ @@ -649,5 +649,5 @@ _GLOBAL(memcpy_power7) 15: addi r1,r1,STACKFRAMESIZE ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) - b exit_vmx_copy /* tail call optimise */ + b exit_vmx_ops /* tail call optimise */ #endif /* CONFIG_ALTIVEC */ diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c index bf925cdcaca9..9f340494a8ac 100644 --- a/arch/powerpc/lib/vmx-helper.c +++ b/arch/powerpc/lib/vmx-helper.c @@ -53,7 +53,7 @@ int exit_vmx_usercopy(void) return 0; } -int enter_vmx_copy(void) +int enter_vmx_ops(void) { if (in_interrupt()) return 0; @@ -70,7 +70,7 @@ int enter_vmx_copy(void) * passed a pointer to the destination which we return as required by a * memcpy implementation. */ -void *exit_vmx_copy(void *dest) +void *exit_vmx_ops(void *dest) { disable_kernel_altivec(); preempt_enable(); -- cgit From c2a4e54e8b6a89dde574608c47e460a0371e44be Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Thu, 7 Jun 2018 09:57:54 +0800 Subject: powerpc/64: add 32 bytes prechecking before using VMX optimization on memcmp() This patch is based on the previous VMX patch on memcmp(). To optimize ppc64 memcmp() with VMX instruction, we need to think about the VMX penalty brought with: If kernel uses VMX instruction, it needs to save/restore current thread's VMX registers. There are 32 x 128 bits VMX registers in PPC, which means 32 x 16 = 512 bytes for load and store. The major concern regarding the memcmp() performance in kernel is KSM, who will use memcmp() frequently to merge identical pages. So it will make sense to take some measures/enhancement on KSM to see whether any improvement can be done here. Cyril Bur indicates that the memcmp() for KSM has a higher possibility to fail (unmatch) early in previous bytes in following mail. https://patchwork.ozlabs.org/patch/817322/#1773629 And I am taking a follow-up on this with this patch. Per some testing, it shows KSM memcmp() will fail early at previous 32 bytes. More specifically: - 76% cases will fail/unmatch before 16 bytes; - 83% cases will fail/unmatch before 32 bytes; - 84% cases will fail/unmatch before 64 bytes; So 32 bytes looks a better choice than other bytes for pre-checking. The early failure is also true for memcmp() for non-KSM case. With a non-typical call load, it shows ~73% cases fail before first 32 bytes. This patch adds a 32 bytes pre-checking firstly before jumping into VMX operations, to avoid the unnecessary VMX penalty. It is not limited to KSM case. And the testing shows ~20% improvement on memcmp() average execution time with this patch. And note the 32B pre-checking is only performed when the compare size is long enough (>=4K currently) to allow VMX operation. The detail data and analysis is at: https://github.com/justdoitqd/publicFiles/blob/master/memcmp/README.md Signed-off-by: Simon Guo Signed-off-by: Michael Ellerman --- arch/powerpc/lib/memcmp_64.S | 57 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S index be2f7925926b..844d8e774492 100644 --- a/arch/powerpc/lib/memcmp_64.S +++ b/arch/powerpc/lib/memcmp_64.S @@ -404,8 +404,27 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) #ifdef CONFIG_ALTIVEC .Lsameoffset_vmx_cmp: /* Enter with src/dst addrs has the same offset with 8 bytes - * align boundary + * align boundary. + * + * There is an optimization based on following fact: memcmp() + * prones to fail early at the first 32 bytes. + * Before applying VMX instructions which will lead to 32x128bits + * VMX regs load/restore penalty, we compare the first 32 bytes + * so that we can catch the ~80% fail cases. */ + + li r0,4 + mtctr r0 +.Lsameoffset_prechk_32B_loop: + LD rA,0,r3 + LD rB,0,r4 + cmpld cr0,rA,rB + addi r3,r3,8 + addi r4,r4,8 + bne cr0,.LcmpAB_lightweight + addi r5,r5,-8 + bdnz .Lsameoffset_prechk_32B_loop + ENTER_VMX_OPS beq cr1,.Llong_novmx_cmp @@ -482,16 +501,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) #endif .Ldiffoffset_8bytes_make_align_start: -#ifdef CONFIG_ALTIVEC -BEGIN_FTR_SECTION - /* only do vmx ops when the size equal or greater than 4K bytes */ - cmpdi cr5,r5,VMX_THRESH - bge cr5,.Ldiffoffset_vmx_cmp -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) - -.Ldiffoffset_novmx_cmp: -#endif - /* now try to align s1 with 8 bytes */ rlwinm r6,r3,3,26,28 beq .Ldiffoffset_align_s1_8bytes @@ -515,6 +524,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) .Ldiffoffset_align_s1_8bytes: /* now s1 is aligned with 8 bytes. */ +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + /* only do vmx ops when the size equal or greater than 4K bytes */ + cmpdi cr5,r5,VMX_THRESH + bge cr5,.Ldiffoffset_vmx_cmp +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + +.Ldiffoffset_novmx_cmp: +#endif + + cmpdi cr5,r5,31 ble cr5,.Lcmp_lt32bytes @@ -526,6 +546,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) #ifdef CONFIG_ALTIVEC .Ldiffoffset_vmx_cmp: + /* perform a 32 bytes pre-checking before + * enable VMX operations. + */ + li r0,4 + mtctr r0 +.Ldiffoffset_prechk_32B_loop: + LD rA,0,r3 + LD rB,0,r4 + cmpld cr0,rA,rB + addi r3,r3,8 + addi r4,r4,8 + bne cr0,.LcmpAB_lightweight + addi r5,r5,-8 + bdnz .Ldiffoffset_prechk_32B_loop + ENTER_VMX_OPS beq cr1,.Ldiffoffset_novmx_cmp -- cgit From c827ac450d379f113b92663cbe5562643635c6b9 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Thu, 7 Jun 2018 09:57:55 +0800 Subject: selftests/powerpc: Update memcmp_64 selftest for VMX implementation This patch reworked selftest memcmp_64 so that memcmp selftest can cover more test cases. It adds testcases for: - memcmp over 4K bytes size. - s1/s2 with different/random offset on 16 bytes boundary. - enter/exit_vmx_ops pairness. Signed-off-by: Simon Guo [mpe: Add -maltivec to fix build on some toolchains] Signed-off-by: Michael Ellerman --- .../selftests/powerpc/copyloops/asm/ppc_asm.h | 4 +- .../testing/selftests/powerpc/stringloops/Makefile | 2 +- .../selftests/powerpc/stringloops/asm/ppc-opcode.h | 39 +++++++++ .../selftests/powerpc/stringloops/asm/ppc_asm.h | 25 ++++++ .../testing/selftests/powerpc/stringloops/memcmp.c | 98 +++++++++++++++++----- 5 files changed, 143 insertions(+), 25 deletions(-) create mode 100644 tools/testing/selftests/powerpc/stringloops/asm/ppc-opcode.h diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h index 5ffe04d802c9..dfce161aebfc 100644 --- a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h +++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h @@ -36,11 +36,11 @@ FUNC_START(exit_vmx_usercopy) li r3,0 blr -FUNC_START(enter_vmx_copy) +FUNC_START(enter_vmx_ops) li r3,1 blr -FUNC_START(exit_vmx_copy) +FUNC_START(exit_vmx_ops) blr FUNC_START(memcpy_power7) diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile index 1125e489055e..c60c6172dd3c 100644 --- a/tools/testing/selftests/powerpc/stringloops/Makefile +++ b/tools/testing/selftests/powerpc/stringloops/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # The loops are all 64-bit code -CFLAGS += -m64 +CFLAGS += -m64 -maltivec CFLAGS += -I$(CURDIR) TEST_GEN_PROGS := memcmp diff --git a/tools/testing/selftests/powerpc/stringloops/asm/ppc-opcode.h b/tools/testing/selftests/powerpc/stringloops/asm/ppc-opcode.h new file mode 100644 index 000000000000..9de413c0c2cb --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/asm/ppc-opcode.h @@ -0,0 +1,39 @@ +/* + * Copyright 2009 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * provides masks and opcode images for use by code generation, emulation + * and for instructions that older assemblers might not know about + */ +#ifndef _ASM_POWERPC_PPC_OPCODE_H +#define _ASM_POWERPC_PPC_OPCODE_H + + +# define stringify_in_c(...) __VA_ARGS__ +# define ASM_CONST(x) x + + +#define PPC_INST_VCMPEQUD_RC 0x100000c7 +#define PPC_INST_VCMPEQUB_RC 0x10000006 + +#define __PPC_RC21 (0x1 << 10) + +/* macros to insert fields into opcodes */ +#define ___PPC_RA(a) (((a) & 0x1f) << 16) +#define ___PPC_RB(b) (((b) & 0x1f) << 11) +#define ___PPC_RS(s) (((s) & 0x1f) << 21) +#define ___PPC_RT(t) ___PPC_RS(t) + +#define VCMPEQUD_RC(vrt, vra, vrb) stringify_in_c(.long PPC_INST_VCMPEQUD_RC | \ + ___PPC_RT(vrt) | ___PPC_RA(vra) | \ + ___PPC_RB(vrb) | __PPC_RC21) + +#define VCMPEQUB_RC(vrt, vra, vrb) stringify_in_c(.long PPC_INST_VCMPEQUB_RC | \ + ___PPC_RT(vrt) | ___PPC_RA(vra) | \ + ___PPC_RB(vrb) | __PPC_RC21) + +#endif /* _ASM_POWERPC_PPC_OPCODE_H */ diff --git a/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h index 136242ec4b0e..d2c0a911f55e 100644 --- a/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h +++ b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _PPC_ASM_H +#define __PPC_ASM_H #include #ifndef r1 @@ -6,3 +8,26 @@ #endif #define _GLOBAL(A) FUNC_START(test_ ## A) +#define _GLOBAL_TOC(A) FUNC_START(test_ ## A) + +#define CONFIG_ALTIVEC + +#define R14 r14 +#define R15 r15 +#define R16 r16 +#define R17 r17 +#define R18 r18 +#define R19 r19 +#define R20 r20 +#define R21 r21 +#define R22 r22 +#define R29 r29 +#define R30 r30 +#define R31 r31 + +#define STACKFRAMESIZE 256 +#define STK_REG(i) (112 + ((i)-14)*8) + +#define BEGIN_FTR_SECTION +#define END_FTR_SECTION_IFSET(val) +#endif diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c index 8250db25b379..b5cf71727b2d 100644 --- a/tools/testing/selftests/powerpc/stringloops/memcmp.c +++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c @@ -2,20 +2,40 @@ #include #include #include +#include #include "utils.h" #define SIZE 256 #define ITERATIONS 10000 +#define LARGE_SIZE (5 * 1024) +#define LARGE_ITERATIONS 1000 +#define LARGE_MAX_OFFSET 32 +#define LARGE_SIZE_START 4096 + +#define MAX_OFFSET_DIFF_S1_S2 48 + +int vmx_count; +int enter_vmx_ops(void) +{ + vmx_count++; + return 1; +} + +void exit_vmx_ops(void) +{ + vmx_count--; +} int test_memcmp(const void *s1, const void *s2, size_t n); /* test all offsets and lengths */ -static void test_one(char *s1, char *s2) +static void test_one(char *s1, char *s2, unsigned long max_offset, + unsigned long size_start, unsigned long max_size) { unsigned long offset, size; - for (offset = 0; offset < SIZE; offset++) { - for (size = 0; size < (SIZE-offset); size++) { + for (offset = 0; offset < max_offset; offset++) { + for (size = size_start; size < (max_size - offset); size++) { int x, y; unsigned long i; @@ -35,70 +55,104 @@ static void test_one(char *s1, char *s2) printf("\n"); abort(); } + + if (vmx_count != 0) { + printf("vmx enter/exit not paired.(offset:%ld size:%ld s1:%p s2:%p vc:%d\n", + offset, size, s1, s2, vmx_count); + printf("\n"); + abort(); + } } } } -static int testcase(void) +static int testcase(bool islarge) { char *s1; char *s2; unsigned long i; - s1 = memalign(128, SIZE); + unsigned long comp_size = (islarge ? LARGE_SIZE : SIZE); + unsigned long alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2; + int iterations = islarge ? LARGE_ITERATIONS : ITERATIONS; + + s1 = memalign(128, alloc_size); if (!s1) { perror("memalign"); exit(1); } - s2 = memalign(128, SIZE); + s2 = memalign(128, alloc_size); if (!s2) { perror("memalign"); exit(1); } - srandom(1); + srandom(time(0)); - for (i = 0; i < ITERATIONS; i++) { + for (i = 0; i < iterations; i++) { unsigned long j; unsigned long change; + char *rand_s1 = s1; + char *rand_s2 = s2; - for (j = 0; j < SIZE; j++) + for (j = 0; j < alloc_size; j++) s1[j] = random(); - memcpy(s2, s1, SIZE); + rand_s1 += random() % MAX_OFFSET_DIFF_S1_S2; + rand_s2 += random() % MAX_OFFSET_DIFF_S1_S2; + memcpy(rand_s2, rand_s1, comp_size); /* change one byte */ - change = random() % SIZE; - s2[change] = random() & 0xff; - - test_one(s1, s2); + change = random() % comp_size; + rand_s2[change] = random() & 0xff; + + if (islarge) + test_one(rand_s1, rand_s2, LARGE_MAX_OFFSET, + LARGE_SIZE_START, comp_size); + else + test_one(rand_s1, rand_s2, SIZE, 0, comp_size); } - srandom(1); + srandom(time(0)); - for (i = 0; i < ITERATIONS; i++) { + for (i = 0; i < iterations; i++) { unsigned long j; unsigned long change; + char *rand_s1 = s1; + char *rand_s2 = s2; - for (j = 0; j < SIZE; j++) + for (j = 0; j < alloc_size; j++) s1[j] = random(); - memcpy(s2, s1, SIZE); + rand_s1 += random() % MAX_OFFSET_DIFF_S1_S2; + rand_s2 += random() % MAX_OFFSET_DIFF_S1_S2; + memcpy(rand_s2, rand_s1, comp_size); /* change multiple bytes, 1/8 of total */ - for (j = 0; j < SIZE / 8; j++) { - change = random() % SIZE; + for (j = 0; j < comp_size / 8; j++) { + change = random() % comp_size; s2[change] = random() & 0xff; } - test_one(s1, s2); + if (islarge) + test_one(rand_s1, rand_s2, LARGE_MAX_OFFSET, + LARGE_SIZE_START, comp_size); + else + test_one(rand_s1, rand_s2, SIZE, 0, comp_size); } return 0; } +static int testcases(void) +{ + testcase(0); + testcase(1); + return 0; +} + int main(void) { - return test_harness(testcase, "memcmp"); + return test_harness(testcases, "memcmp"); } -- cgit From a596a7e91710d26fd862e3b7031c4012974583f2 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Mon, 5 Feb 2018 16:17:16 +1100 Subject: powerpc/tm: Update function prototype comment In commit eb5c3f1c8647 ("powerpc: Always save/restore checkpointed regs during treclaim/trecheckpoint") __tm_recheckpoint was modified to no longer take the second parameter 'unsigned long orig_msr' as part of a TM rewrite to simplify the reclaiming/recheckpointing process. There is a comment in the asm file where the function is delcared which has an incorrect prototype with the 'orig_msr' parameter. This patch corrects the comment. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/tm.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index ff12f47a96b6..6bb6f5123dcf 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -312,8 +312,8 @@ _GLOBAL(tm_reclaim) blr - /* void __tm_recheckpoint(struct thread_struct *thread, - * unsigned long orig_msr) + /* + * void __tm_recheckpoint(struct thread_struct *thread) * - Restore the checkpointed register state saved by tm_reclaim * when we switch_to a process. * -- cgit From edd00b830731be468fd3caf7f9154d13228f4a93 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Thu, 1 Feb 2018 12:07:46 +1100 Subject: powerpc/tm: Remove struct thread_info param from tm_reclaim_thread() Since commit dc3106690b20 ("powerpc: tm: Always use fp_state and vr_state to store live registers") tm_reclaim_thread() doesn't use the parameter anymore, both callers have to bother getting it as they have no need for a struct thread_info either. Just remove it and adjust the callers. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index eca92aac2e17..e9533b4d2f08 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -867,8 +867,7 @@ static inline bool tm_enabled(struct task_struct *tsk) return tsk && tsk->thread.regs && (tsk->thread.regs->msr & MSR_TM); } -static void tm_reclaim_thread(struct thread_struct *thr, - struct thread_info *ti, uint8_t cause) +static void tm_reclaim_thread(struct thread_struct *thr, uint8_t cause) { /* * Use the current MSR TM suspended bit to track if we have @@ -915,7 +914,7 @@ static void tm_reclaim_thread(struct thread_struct *thr, void tm_reclaim_current(uint8_t cause) { tm_enable(); - tm_reclaim_thread(¤t->thread, current_thread_info(), cause); + tm_reclaim_thread(¤t->thread, cause); } static inline void tm_reclaim_task(struct task_struct *tsk) @@ -946,7 +945,7 @@ static inline void tm_reclaim_task(struct task_struct *tsk) thr->regs->ccr, thr->regs->msr, thr->regs->trap); - tm_reclaim_thread(thr, task_thread_info(tsk), TM_CAUSE_RESCHED); + tm_reclaim_thread(thr, TM_CAUSE_RESCHED); TM_DEBUG("--- tm_reclaim on pid %d complete\n", tsk->pid); -- cgit From a5bbe8fd29f7e42fe5d26371adbad97c76884355 Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Mon, 20 Feb 2017 18:52:10 +0530 Subject: powerpc/powernv/opal-dump : Handles opal_dump_info properly Moves the return value check of 'opal_dump_info' to a proper place which was previously unnecessarily filling all the dump info even on failure. Signed-off-by: Mukesh Ojha Acked-by: Stewart Smith Acked-by: Jeremy Kerr Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-dump.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 0dc8fa4e0af2..a792966ab0f2 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -225,13 +225,16 @@ static int64_t dump_read_info(uint32_t *dump_id, uint32_t *dump_size, uint32_t * if (rc == OPAL_PARAMETER) rc = opal_dump_info(&id, &size); + if (rc) { + pr_warn("%s: Failed to get dump info (%d)\n", + __func__, rc); + return rc; + } + *dump_id = be32_to_cpu(id); *dump_size = be32_to_cpu(size); *dump_type = be32_to_cpu(type); - if (rc) - pr_warn("%s: Failed to get dump info (%d)\n", - __func__, rc); return rc; } -- cgit From b29336c0e1785a28bc40a9fd47c2321671e9792e Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Mon, 20 Feb 2017 18:52:11 +0530 Subject: powerpc/powernv/opal-dump : Use IRQ_HANDLED instead of numbers in interrupt handler Fixes: 8034f715f ("powernv/opal-dump: Convert to irq domain") Converts all the return explicit number to a more proper IRQ_HANDLED, which looks proper incase of interrupt handler returning case. Here, It also removes error message like "nobody cared" which was getting unveiled while returning -1 or 0 from handler. Signed-off-by: Mukesh Ojha Reviewed-by: Vasant Hegde Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-dump.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index a792966ab0f2..198143833f00 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -371,13 +371,12 @@ static irqreturn_t process_dump(int irq, void *data) { int rc; uint32_t dump_id, dump_size, dump_type; - struct dump_obj *dump; char name[22]; struct kobject *kobj; rc = dump_read_info(&dump_id, &dump_size, &dump_type); if (rc != OPAL_SUCCESS) - return rc; + return IRQ_HANDLED; sprintf(name, "0x%x-0x%x", dump_type, dump_id); @@ -389,12 +388,10 @@ static irqreturn_t process_dump(int irq, void *data) if (kobj) { /* Drop reference added by kset_find_obj() */ kobject_put(kobj); - return 0; + return IRQ_HANDLED; } - dump = create_dump_obj(dump_id, dump_size, dump_type); - if (!dump) - return -1; + create_dump_obj(dump_id, dump_size, dump_type); return IRQ_HANDLED; } -- cgit From bd90284cc6c1c9e8e48c8eadd0c79574fcce0b81 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 May 2018 00:55:44 +1000 Subject: powerpc/powernv: opal_put_chars partial write fix The intention here is to consume and discard the remaining buffer upon error. This works if there has not been a previous partial write. If there has been, then total_len is no longer total number of bytes to copy. total_len is always "bytes left to copy", so it should be added to written bytes. This code may not be exercised any more if partial writes will not be hit, but this is a small bugfix before a larger change. Reviewed-by: Benjamin Herrenschmidt Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 0d539c661748..371e33ecc547 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -388,7 +388,7 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) /* Closed or other error drop */ if (rc != OPAL_SUCCESS && rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT) { - written = total_len; + written += total_len; break; } if (rc == OPAL_SUCCESS) { -- cgit From 36d2dabc8762f1f4118f80d85049958390f061a8 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 May 2018 00:55:45 +1000 Subject: powerpc/powernv: Fix OPAL console driver OPAL_BUSY loops The OPAL console driver does not delay in case it gets OPAL_BUSY or OPAL_BUSY_EVENT from firmware. It can't yet be made to sleep because it is called under spinlock, but it can be changed to the standard OPAL_BUSY loop form, and a delay added to keep it from hitting the firmware too frequently. Cc: Benjamin Herrenschmidt Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal.c | 38 +++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 371e33ecc547..a9773a619081 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -378,33 +378,41 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) /* We still try to handle partial completions, though they * should no longer happen. */ - rc = OPAL_BUSY; - while(total_len > 0 && (rc == OPAL_BUSY || - rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) { + + while (total_len > 0) { olen = cpu_to_be64(total_len); - rc = opal_console_write(vtermno, &olen, data); + + rc = OPAL_BUSY; + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + rc = opal_console_write(vtermno, &olen, data); + if (rc == OPAL_BUSY_EVENT) { + mdelay(OPAL_BUSY_DELAY_MS); + opal_poll_events(NULL); + } else if (rc == OPAL_BUSY) { + mdelay(OPAL_BUSY_DELAY_MS); + } + } + len = be64_to_cpu(olen); /* Closed or other error drop */ - if (rc != OPAL_SUCCESS && rc != OPAL_BUSY && - rc != OPAL_BUSY_EVENT) { - written += total_len; + if (rc != OPAL_SUCCESS) { + written += total_len; /* drop remaining chars */ break; } - if (rc == OPAL_SUCCESS) { - total_len -= len; - data += len; - written += len; - } + + total_len -= len; + data += len; + written += len; + /* This is a bit nasty but we need that for the console to * flush when there aren't any interrupts. We will clean * things a bit later to limit that to synchronous path * such as the kernel console and xmon/udbg */ - do + do { opal_poll_events(&evt); - while(rc == OPAL_SUCCESS && - (be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT)); + } while (be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT); } spin_unlock_irqrestore(&opal_write_lock, flags); return written; -- cgit From 3a80bfc7ea96d253be0e7d869c059b1976c19809 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 May 2018 00:55:46 +1000 Subject: powerpc/powernv: opal-kmsg standardise OPAL_BUSY handling OPAL_CONSOLE_FLUSH is documented as being able to return OPAL_BUSY, so implement the standard OPAL_BUSY handling for it. Reviewed-by: Russell Currey Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-kmsg.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-kmsg.c b/arch/powerpc/platforms/powernv/opal-kmsg.c index 6f1214d4de92..f8f41ccce75f 100644 --- a/arch/powerpc/platforms/powernv/opal-kmsg.c +++ b/arch/powerpc/platforms/powernv/opal-kmsg.c @@ -12,6 +12,7 @@ */ #include +#include #include #include @@ -26,8 +27,7 @@ static void force_opal_console_flush(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason) { - int i; - int64_t ret; + s64 rc; /* * Outside of a panic context the pollers will continue to run, @@ -37,14 +37,22 @@ static void force_opal_console_flush(struct kmsg_dumper *dumper, return; if (opal_check_token(OPAL_CONSOLE_FLUSH)) { - ret = opal_console_flush(0); + do { + rc = OPAL_BUSY; + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + rc = opal_console_flush(0); + if (rc == OPAL_BUSY_EVENT) { + mdelay(OPAL_BUSY_DELAY_MS); + opal_poll_events(NULL); + } else if (rc == OPAL_BUSY) { + mdelay(OPAL_BUSY_DELAY_MS); + } + } + } while (rc == OPAL_PARTIAL); /* More to flush */ - if (ret == OPAL_UNSUPPORTED || ret == OPAL_PARAMETER) - return; - - /* Incrementally flush until there's nothing left */ - while (opal_console_flush(0) != OPAL_SUCCESS); } else { + int i; + /* * If OPAL_CONSOLE_FLUSH is not implemented in the firmware, * the console can still be flushed by calling the polling -- cgit From e00da0f2db91b90e990cc05088f03adbc58af895 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 May 2018 00:55:47 +1000 Subject: powerpc/powernv: opal-kmsg use flush fallback from console code Use the more refined and tested event polling loop from opal_put_chars as the fallback console flush in the opal-kmsg path. This loop is used by the console driver today, whereas the opal-kmsg fallback is not likely to have been used for years. Use WARN_ONCE rather than a printk when the fallback is invoked to prepare for moving the console flush into a common function. Reviewed-by: Russell Currey Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-kmsg.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-kmsg.c b/arch/powerpc/platforms/powernv/opal-kmsg.c index f8f41ccce75f..fd2bbf4fd6dc 100644 --- a/arch/powerpc/platforms/powernv/opal-kmsg.c +++ b/arch/powerpc/platforms/powernv/opal-kmsg.c @@ -51,20 +51,17 @@ static void force_opal_console_flush(struct kmsg_dumper *dumper, } while (rc == OPAL_PARTIAL); /* More to flush */ } else { - int i; + __be64 evt; + WARN_ONCE(1, "opal: OPAL_CONSOLE_FLUSH missing.\n"); /* * If OPAL_CONSOLE_FLUSH is not implemented in the firmware, * the console can still be flushed by calling the polling - * function enough times to flush the buffer. We don't know - * how much output still needs to be flushed, but we can be - * generous since the kernel is in panic and doesn't need - * to do much else. + * function while it has OPAL_EVENT_CONSOLE_OUTPUT events. */ - printk(KERN_NOTICE "opal: OPAL_CONSOLE_FLUSH missing.\n"); - for (i = 0; i < 1024; i++) { - opal_poll_events(NULL); - } + do { + opal_poll_events(&evt); + } while (be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT); } } -- cgit From d2a2262e686ce7a27776add27751f925ceda856f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 May 2018 00:55:48 +1000 Subject: powerpc/powernv: Implement and use opal_flush_console A new console flushing firmware API was introduced to replace event polling loops, and implemented in opal-kmsg with affddff69c55e ("powerpc/powernv: Add a kmsg_dumper that flushes console output on panic"), to flush the console in the panic path. The OPAL console driver has other situations where interrupts are off and it needs to flush the console synchronously. These still use a polling loop. So move the opal-kmsg flush code to opal_flush_console, and use the new function in opal-kmsg and opal_put_chars. Cc: Benjamin Herrenschmidt Reviewed-by: Russell Currey Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal.h | 1 + arch/powerpc/platforms/powernv/opal-kmsg.c | 35 +++---------------------- arch/powerpc/platforms/powernv/opal.c | 42 +++++++++++++++++++++++++++--- 3 files changed, 42 insertions(+), 36 deletions(-) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index e1b2910c6e81..33ab95a4ac0f 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -305,6 +305,7 @@ extern void opal_configure_cores(void); extern int opal_get_chars(uint32_t vtermno, char *buf, int count); extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len); +extern int opal_flush_console(uint32_t vtermno); extern void hvc_opal_init_early(void); diff --git a/arch/powerpc/platforms/powernv/opal-kmsg.c b/arch/powerpc/platforms/powernv/opal-kmsg.c index fd2bbf4fd6dc..55691950d981 100644 --- a/arch/powerpc/platforms/powernv/opal-kmsg.c +++ b/arch/powerpc/platforms/powernv/opal-kmsg.c @@ -12,7 +12,6 @@ */ #include -#include #include #include @@ -24,11 +23,9 @@ * may not be completely printed. This function does not actually dump the * message, it just ensures that OPAL completely flushes the console buffer. */ -static void force_opal_console_flush(struct kmsg_dumper *dumper, +static void kmsg_dump_opal_console_flush(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason) { - s64 rc; - /* * Outside of a panic context the pollers will continue to run, * so we don't need to do any special flushing. @@ -36,37 +33,11 @@ static void force_opal_console_flush(struct kmsg_dumper *dumper, if (reason != KMSG_DUMP_PANIC) return; - if (opal_check_token(OPAL_CONSOLE_FLUSH)) { - do { - rc = OPAL_BUSY; - while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { - rc = opal_console_flush(0); - if (rc == OPAL_BUSY_EVENT) { - mdelay(OPAL_BUSY_DELAY_MS); - opal_poll_events(NULL); - } else if (rc == OPAL_BUSY) { - mdelay(OPAL_BUSY_DELAY_MS); - } - } - } while (rc == OPAL_PARTIAL); /* More to flush */ - - } else { - __be64 evt; - - WARN_ONCE(1, "opal: OPAL_CONSOLE_FLUSH missing.\n"); - /* - * If OPAL_CONSOLE_FLUSH is not implemented in the firmware, - * the console can still be flushed by calling the polling - * function while it has OPAL_EVENT_CONSOLE_OUTPUT events. - */ - do { - opal_poll_events(&evt); - } while (be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT); - } + opal_flush_console(0); } static struct kmsg_dumper opal_kmsg_dumper = { - .dump = force_opal_console_flush + .dump = kmsg_dump_opal_console_flush }; void __init opal_kmsg_init(void) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index a9773a619081..b007d3ff1445 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -350,7 +350,6 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) __be64 olen; s64 len, rc; unsigned long flags; - __be64 evt; if (!opal.entry) return -ENODEV; @@ -371,7 +370,7 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) /* Closed -> drop characters */ if (rc) return total_len; - opal_poll_events(NULL); + opal_flush_console(vtermno); return -EAGAIN; } @@ -410,12 +409,47 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) * things a bit later to limit that to synchronous path * such as the kernel console and xmon/udbg */ + opal_flush_console(vtermno); + } + spin_unlock_irqrestore(&opal_write_lock, flags); + + return written; +} + +int opal_flush_console(uint32_t vtermno) +{ + s64 rc; + + if (!opal_check_token(OPAL_CONSOLE_FLUSH)) { + __be64 evt; + + WARN_ONCE(1, "opal: OPAL_CONSOLE_FLUSH missing.\n"); + /* + * If OPAL_CONSOLE_FLUSH is not implemented in the firmware, + * the console can still be flushed by calling the polling + * function while it has OPAL_EVENT_CONSOLE_OUTPUT events. + */ do { opal_poll_events(&evt); } while (be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT); + + return OPAL_SUCCESS; } - spin_unlock_irqrestore(&opal_write_lock, flags); - return written; + + do { + rc = OPAL_BUSY; + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + rc = opal_console_flush(vtermno); + if (rc == OPAL_BUSY_EVENT) { + mdelay(OPAL_BUSY_DELAY_MS); + opal_poll_events(NULL); + } else if (rc == OPAL_BUSY) { + mdelay(OPAL_BUSY_DELAY_MS); + } + } + } while (rc == OPAL_PARTIAL); /* More to flush */ + + return opal_error_code(rc); } static int opal_recover_mce(struct pt_regs *regs, -- cgit From b74d2807ae0cdb17ccc45d22260fc151a1b2d46b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 May 2018 00:55:49 +1000 Subject: powerpc/powernv: Remove OPALv1 support from opal console driver opal_put_chars deals with partial writes because in OPALv1, opal_console_write_buffer_space did not work correctly. That firmware is not supported. This reworks the opal_put_chars code to no longer deal with partial writes by turning them into full writes. Partial write handling is still supported in terms of what gets returned to the caller, but it may not go to the console atomically. A warning message is printed in this case. This allows console flushing to be moved out of the opal_write_lock spinlock. That could cause the lock to be held for long periods if the console is busy (especially if it was being spammed by firmware), which is dangerous because the lock is taken by xmon to debug the system. Flushing outside the lock improves the situation a bit. Cc: Benjamin Herrenschmidt Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal.c | 86 ++++++++++++++++------------------- 1 file changed, 40 insertions(+), 46 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index b007d3ff1445..e18472757617 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -346,10 +346,10 @@ int opal_get_chars(uint32_t vtermno, char *buf, int count) int opal_put_chars(uint32_t vtermno, const char *data, int total_len) { - int written = 0; - __be64 olen; - s64 len, rc; unsigned long flags; + int written; + __be64 olen; + s64 rc; if (!opal.entry) return -ENODEV; @@ -357,62 +357,56 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) /* We want put_chars to be atomic to avoid mangling of hvsi * packets. To do that, we first test for room and return * -EAGAIN if there isn't enough. - * - * Unfortunately, opal_console_write_buffer_space() doesn't - * appear to work on opal v1, so we just assume there is - * enough room and be done with it */ spin_lock_irqsave(&opal_write_lock, flags); rc = opal_console_write_buffer_space(vtermno, &olen); - len = be64_to_cpu(olen); - if (rc || len < total_len) { - spin_unlock_irqrestore(&opal_write_lock, flags); + if (rc || be64_to_cpu(olen) < total_len) { /* Closed -> drop characters */ if (rc) - return total_len; - opal_flush_console(vtermno); - return -EAGAIN; + written = total_len; + else + written = -EAGAIN; + goto out; } - /* We still try to handle partial completions, though they - * should no longer happen. - */ - - while (total_len > 0) { - olen = cpu_to_be64(total_len); - - rc = OPAL_BUSY; - while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { - rc = opal_console_write(vtermno, &olen, data); - if (rc == OPAL_BUSY_EVENT) { - mdelay(OPAL_BUSY_DELAY_MS); - opal_poll_events(NULL); - } else if (rc == OPAL_BUSY) { - mdelay(OPAL_BUSY_DELAY_MS); - } - } - - len = be64_to_cpu(olen); - - /* Closed or other error drop */ - if (rc != OPAL_SUCCESS) { - written += total_len; /* drop remaining chars */ - break; + /* Should not get a partial write here because space is available. */ + olen = cpu_to_be64(total_len); + rc = opal_console_write(vtermno, &olen, data); + if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + if (rc == OPAL_BUSY_EVENT) { + mdelay(OPAL_BUSY_DELAY_MS); + opal_poll_events(NULL); + } else if (rc == OPAL_BUSY_EVENT) { + mdelay(OPAL_BUSY_DELAY_MS); } + written = -EAGAIN; + goto out; + } - total_len -= len; - data += len; - written += len; + /* Closed or other error drop */ + if (rc != OPAL_SUCCESS) { + written = opal_error_code(rc); + goto out; + } - /* This is a bit nasty but we need that for the console to - * flush when there aren't any interrupts. We will clean - * things a bit later to limit that to synchronous path - * such as the kernel console and xmon/udbg - */ - opal_flush_console(vtermno); + written = be64_to_cpu(olen); + if (written < total_len) { + /* Should not happen */ + pr_warn("atomic console write returned partial len=%d written=%d\n", total_len, written); + if (!written) + written = -EAGAIN; } + +out: spin_unlock_irqrestore(&opal_write_lock, flags); + /* This is a bit nasty but we need that for the console to + * flush when there aren't any interrupts. We will clean + * things a bit later to limit that to synchronous path + * such as the kernel console and xmon/udbg + */ + opal_flush_console(vtermno); + return written; } -- cgit From ac4ac788fdadc6b703ff3322de07dee442e08e1c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 May 2018 00:55:50 +1000 Subject: powerpc/powernv: move opal console flushing to udbg OPAL console writes do not have to synchronously flush firmware / hardware buffers unless they are going through the udbg path. Remove the unconditional flushing from opal_put_chars. Flush if there was no space in the buffer as an optimisation (callers loop waiting for success in that case). udbg flushing is moved to udbg_opal_putc. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal.c | 12 +++++++----- drivers/tty/hvc/hvc_opal.c | 5 +++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index e18472757617..cf02e602237a 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -400,12 +400,14 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) out: spin_unlock_irqrestore(&opal_write_lock, flags); - /* This is a bit nasty but we need that for the console to - * flush when there aren't any interrupts. We will clean - * things a bit later to limit that to synchronous path - * such as the kernel console and xmon/udbg + /* In the -EAGAIN case, callers loop, so we have to flush the console + * here in case they have interrupts off (and we don't want to wait + * for async flushing if we can make immediate progress here). If + * necessary the API could be made entirely non-flushing if the + * callers had a ->flush API to use. */ - opal_flush_console(vtermno); + if (written == -EAGAIN) + opal_flush_console(vtermno); return written; } diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c index 9645c0062a90..958c61c8f35a 100644 --- a/drivers/tty/hvc/hvc_opal.c +++ b/drivers/tty/hvc/hvc_opal.c @@ -275,6 +275,11 @@ static void udbg_opal_putc(char c) count = hvc_opal_hvsi_put_chars(termno, &c, 1); break; } + + /* This is needed for the cosole to flush + * when there aren't any interrupts. + */ + opal_flush_console(termno); } while(count == 0 || count == -EAGAIN); } -- cgit From 17cc1dd4924391b54fb179d0868b89dc96f2ee80 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 May 2018 00:55:51 +1000 Subject: powerpc/powernv: implement opal_put_chars_atomic The RAW console does not need writes to be atomic, so relax opal_put_chars to be able to do partial writes, and implement an _atomic variant which does not take a spinlock. This API is used in xmon, so the less locking that is used, the better chance there is that a crash can be debugged. Cc: Benjamin Herrenschmidt Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal.h | 1 + arch/powerpc/platforms/powernv/opal.c | 37 +++++++++++++++++++++++++---------- drivers/tty/hvc/hvc_opal.c | 18 ++++++++++++----- 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 33ab95a4ac0f..cd1acc8be8ee 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -305,6 +305,7 @@ extern void opal_configure_cores(void); extern int opal_get_chars(uint32_t vtermno, char *buf, int count); extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len); +extern int opal_put_chars_atomic(uint32_t vtermno, const char *buf, int total_len); extern int opal_flush_console(uint32_t vtermno); extern void hvc_opal_init_early(void); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index cf02e602237a..6405b6f20193 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -344,9 +344,9 @@ int opal_get_chars(uint32_t vtermno, char *buf, int count) return 0; } -int opal_put_chars(uint32_t vtermno, const char *data, int total_len) +static int __opal_put_chars(uint32_t vtermno, const char *data, int total_len, bool atomic) { - unsigned long flags; + unsigned long flags = 0 /* shut up gcc */; int written; __be64 olen; s64 rc; @@ -354,11 +354,8 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) if (!opal.entry) return -ENODEV; - /* We want put_chars to be atomic to avoid mangling of hvsi - * packets. To do that, we first test for room and return - * -EAGAIN if there isn't enough. - */ - spin_lock_irqsave(&opal_write_lock, flags); + if (atomic) + spin_lock_irqsave(&opal_write_lock, flags); rc = opal_console_write_buffer_space(vtermno, &olen); if (rc || be64_to_cpu(olen) < total_len) { /* Closed -> drop characters */ @@ -391,14 +388,18 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) written = be64_to_cpu(olen); if (written < total_len) { - /* Should not happen */ - pr_warn("atomic console write returned partial len=%d written=%d\n", total_len, written); + if (atomic) { + /* Should not happen */ + pr_warn("atomic console write returned partial " + "len=%d written=%d\n", total_len, written); + } if (!written) written = -EAGAIN; } out: - spin_unlock_irqrestore(&opal_write_lock, flags); + if (atomic) + spin_unlock_irqrestore(&opal_write_lock, flags); /* In the -EAGAIN case, callers loop, so we have to flush the console * here in case they have interrupts off (and we don't want to wait @@ -412,6 +413,22 @@ out: return written; } +int opal_put_chars(uint32_t vtermno, const char *data, int total_len) +{ + return __opal_put_chars(vtermno, data, total_len, false); +} + +/* + * opal_put_chars_atomic will not perform partial-writes. Data will be + * atomically written to the terminal or not at all. This is not strictly + * true at the moment because console space can race with OPAL's console + * writes. + */ +int opal_put_chars_atomic(uint32_t vtermno, const char *data, int total_len) +{ + return __opal_put_chars(vtermno, data, total_len, true); +} + int opal_flush_console(uint32_t vtermno) { s64 rc; diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c index 958c61c8f35a..810fd42904c3 100644 --- a/drivers/tty/hvc/hvc_opal.c +++ b/drivers/tty/hvc/hvc_opal.c @@ -183,9 +183,15 @@ static int hvc_opal_probe(struct platform_device *dev) return -ENOMEM; pv->proto = proto; hvc_opal_privs[termno] = pv; - if (proto == HV_PROTOCOL_HVSI) - hvsilib_init(&pv->hvsi, opal_get_chars, opal_put_chars, + if (proto == HV_PROTOCOL_HVSI) { + /* + * We want put_chars to be atomic to avoid mangling of + * hvsi packets. + */ + hvsilib_init(&pv->hvsi, + opal_get_chars, opal_put_chars_atomic, termno, 0); + } /* Instanciate now to establish a mapping index==vtermno */ hvc_instantiate(termno, termno, ops); @@ -375,8 +381,9 @@ void __init hvc_opal_init_early(void) else if (of_device_is_compatible(stdout_node,"ibm,opal-console-hvsi")) { hvc_opal_boot_priv.proto = HV_PROTOCOL_HVSI; ops = &hvc_opal_hvsi_ops; - hvsilib_init(&hvc_opal_boot_priv.hvsi, opal_get_chars, - opal_put_chars, index, 1); + hvsilib_init(&hvc_opal_boot_priv.hvsi, + opal_get_chars, opal_put_chars_atomic, + index, 1); /* HVSI, perform the handshake now */ hvsilib_establish(&hvc_opal_boot_priv.hvsi); pr_devel("hvc_opal: Found HVSI console\n"); @@ -408,7 +415,8 @@ void __init udbg_init_debug_opal_hvsi(void) hvc_opal_privs[index] = &hvc_opal_boot_priv; hvc_opal_boot_termno = index; udbg_init_opal_common(); - hvsilib_init(&hvc_opal_boot_priv.hvsi, opal_get_chars, opal_put_chars, + hvsilib_init(&hvc_opal_boot_priv.hvsi, + opal_get_chars, opal_put_chars_atomic, index, 1); hvsilib_establish(&hvc_opal_boot_priv.hvsi); } -- cgit From cca3d5290ede73b01144aa522910c88b1e02fcef Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 May 2018 00:55:52 +1000 Subject: tty: hvc: remove unexplained "just in case" spin delay This delay was in the very first OPAL console commit 6.5 years ago, and came from the vio hvc driver. The firmware console has hardened sufficiently to remove it. Reviewed-by: Benjamin Herrenschmidt Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- drivers/tty/hvc/hvc_opal.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c index 810fd42904c3..f631f8bee308 100644 --- a/drivers/tty/hvc/hvc_opal.c +++ b/drivers/tty/hvc/hvc_opal.c @@ -313,14 +313,8 @@ static int udbg_opal_getc(void) int ch; for (;;) { ch = udbg_opal_getc_poll(); - if (ch == -1) { - /* This shouldn't be needed...but... */ - volatile unsigned long delay; - for (delay=0; delay < 2000000; delay++) - ; - } else { + if (ch != -1) return ch; - } } } -- cgit From db0a2b633da4216b767d7aed95ffe30d37409c7a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 5 Jul 2018 16:24:51 +0000 Subject: powerpc: remove kdump.h from page.h page.h doesn't need kdump.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/page.h | 1 - arch/powerpc/kernel/crash.c | 1 - arch/powerpc/kernel/machine_kexec.c | 1 + arch/powerpc/kernel/setup_32.c | 1 + 4 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index db7be0779d55..a9fbefaacf10 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -17,7 +17,6 @@ #include #endif #include -#include /* * On regular PPC32 page size is 4K (but we support 4K/16K/64K/256K pages diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 17c8b99680f2..43a3ce2301e8 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index b53401334e81..63f5a9311a29 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 74457485574b..ef747a5a30b9 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -40,6 +40,7 @@ #include #include #include +#include #define DBG(fmt...) -- cgit From 8c58259bba43084eb5876aeefa574e9344b85ae3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 5 Jul 2018 16:24:53 +0000 Subject: powerpc: remove unneeded inclusions of cpu_has_feature.h Files not using cpu_has_feature() don't need cpu_has_feature.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cacheflush.h | 1 - arch/powerpc/include/asm/cputime.h | 1 - arch/powerpc/include/asm/dbell.h | 1 - arch/powerpc/kernel/vdso.c | 1 - 4 files changed, 4 deletions(-) diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 0d72ec75da63..d5a8d7bf0759 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -11,7 +11,6 @@ #include #include -#include /* * No cache flushing is required when address mappings are changed, diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index bc4903badb3f..133672744b2e 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -23,7 +23,6 @@ #include #include #include -#include typedef u64 __nocast cputime_t; typedef u64 __nocast cputime64_t; diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h index 9f2ae0d25e15..998c42ff1caa 100644 --- a/arch/powerpc/include/asm/dbell.h +++ b/arch/powerpc/include/asm/dbell.h @@ -16,7 +16,6 @@ #include #include -#include #define PPC_DBELL_MSG_BRDCAST (0x04000000) #define PPC_DBELL_TYPE(x) (((x) & 0xf) << (63-36)) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index d2205b97628c..65b3bdb99f0b 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -22,7 +22,6 @@ #include #include -#include #include #include #include -- cgit From 36a7eeaff7d06cef253c8df6dfe363bfc4a553f8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 5 Jul 2018 16:24:55 +0000 Subject: powerpc/405: move PPC405_ERR77 in asm-405.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/asm-405.h | 19 +++++++++++++++++++ arch/powerpc/include/asm/asm-compat.h | 13 ------------- arch/powerpc/include/asm/atomic.h | 1 + arch/powerpc/include/asm/bitops.h | 1 + arch/powerpc/include/asm/book3s/32/pgtable.h | 2 -- arch/powerpc/include/asm/cmpxchg.h | 1 + arch/powerpc/include/asm/futex.h | 1 + arch/powerpc/include/asm/nohash/32/pgtable.h | 1 + arch/powerpc/include/asm/spinlock.h | 1 + arch/powerpc/kernel/entry_32.S | 1 + arch/powerpc/kernel/head_40x.S | 1 + 11 files changed, 27 insertions(+), 15 deletions(-) create mode 100644 arch/powerpc/include/asm/asm-405.h diff --git a/arch/powerpc/include/asm/asm-405.h b/arch/powerpc/include/asm/asm-405.h new file mode 100644 index 000000000000..7270d3ae7c8e --- /dev/null +++ b/arch/powerpc/include/asm/asm-405.h @@ -0,0 +1,19 @@ +#ifndef _ASM_POWERPC_ASM_405_H +#define _ASM_POWERPC_ASM_405_H + +#include + +#ifdef __KERNEL__ +#ifdef CONFIG_IBM405_ERR77 +/* Erratum #77 on the 405 means we need a sync or dcbt before every + * stwcx. The old ATOMIC_SYNC_FIX covered some but not all of this. + */ +#define PPC405_ERR77(ra,rb) stringify_in_c(dcbt ra, rb;) +#define PPC405_ERR77_SYNC stringify_in_c(sync;) +#else +#define PPC405_ERR77(ra,rb) +#define PPC405_ERR77_SYNC +#endif +#endif + +#endif /* _ASM_POWERPC_ASM_405_H */ diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index 7f2a7702596c..d2cf3593e987 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h @@ -70,17 +70,4 @@ #endif -#ifdef __KERNEL__ -#ifdef CONFIG_IBM405_ERR77 -/* Erratum #77 on the 405 means we need a sync or dcbt before every - * stwcx. The old ATOMIC_SYNC_FIX covered some but not all of this. - */ -#define PPC405_ERR77(ra,rb) stringify_in_c(dcbt ra, rb;) -#define PPC405_ERR77_SYNC stringify_in_c(sync;) -#else -#define PPC405_ERR77(ra,rb) -#define PPC405_ERR77_SYNC -#endif -#endif - #endif /* _ASM_POWERPC_ASM_COMPAT_H */ diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 682b3e6a1e21..cbdb0b7e60a3 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -10,6 +10,7 @@ #include #include #include +#include #define ATOMIC_INIT(i) { (i) } diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index b750ffef83c7..ff71566dadee 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -45,6 +45,7 @@ #include #include #include +#include /* PPC bit number conversion */ #define PPC_BITLSHIFT(be) (BITS_PER_LONG - 1 - (be)) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 02f5acd7ccc4..3c3e34240628 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -164,7 +164,6 @@ static inline unsigned long pte_update(pte_t *p, 1: lwarx %0,0,%3\n\ andc %1,%0,%4\n\ or %1,%1,%5\n" - PPC405_ERR77(0,%3) " stwcx. %1,0,%3\n\ bne- 1b" : "=&r" (old), "=&r" (tmp), "=m" (*p) @@ -186,7 +185,6 @@ static inline unsigned long long pte_update(pte_t *p, lwzx %0,0,%3\n\ andc %1,%L0,%5\n\ or %1,%1,%6\n" - PPC405_ERR77(0,%3) " stwcx. %1,0,%4\n\ bne- 1b" : "=&r" (old), "=&r" (tmp), "=m" (*p) diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index 9b001f1f6b32..67ec1073ac97 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -7,6 +7,7 @@ #include #include #include +#include #ifdef __BIG_ENDIAN #define BITOFF_CAL(size, off) ((sizeof(u32) - size - off) * BITS_PER_BYTE) diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index 1a944c18c539..76c8648d0fa8 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -9,6 +9,7 @@ #include #include #include +#include #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ __asm__ __volatile ( \ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 7c46a98cc7f4..7df2f3a66cc5 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -9,6 +9,7 @@ #include #include #include /* For sub-arch specific PPC_PIN_SIZE */ +#include extern unsigned long ioremap_bot; diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 72dc4ddc2972..7ec38f4ee927 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -27,6 +27,7 @@ #include #include #include +#include #ifdef CONFIG_PPC64 /* use 0x800000yy when locked, where yy == CPU number */ diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 973577f2141c..7642cb984d3a 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -33,6 +33,7 @@ #include #include #include +#include /* * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE. diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 41374a468d1c..b19d78410511 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -42,6 +42,7 @@ #include #include #include +#include /* As with the other PowerPC ports, it is expected that when code * execution begins here, the following registers contain valid, yet -- cgit From ec0c464cdbf38bf6ddabec8bfa595bd421cab203 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 5 Jul 2018 16:24:57 +0000 Subject: powerpc: move ASM_CONST and stringify_in_c() into asm-const.h This patch moves ASM_CONST() and stringify_in_c() into dedicated asm-const.h, then cleans all related inclusions. Signed-off-by: Christophe Leroy [mpe: asm-compat.h should include asm-const.h] Signed-off-by: Michael Ellerman --- arch/powerpc/crypto/md5-asm.S | 1 + arch/powerpc/crypto/sha1-powerpc-asm.S | 1 + arch/powerpc/include/asm/asm-compat.h | 13 +------------ arch/powerpc/include/asm/asm-const.h | 14 ++++++++++++++ arch/powerpc/include/asm/barrier.h | 2 ++ arch/powerpc/include/asm/book3s/64/hash.h | 2 ++ arch/powerpc/include/asm/book3s/64/mmu-hash.h | 2 +- arch/powerpc/include/asm/book3s/64/radix.h | 2 ++ arch/powerpc/include/asm/cmpxchg.h | 1 - arch/powerpc/include/asm/code-patching.h | 1 + arch/powerpc/include/asm/cputable.h | 2 +- arch/powerpc/include/asm/dt_cpu_ftrs.h | 1 - arch/powerpc/include/asm/feature-fixups.h | 2 ++ arch/powerpc/include/asm/firmware.h | 2 +- arch/powerpc/include/asm/futex.h | 1 - arch/powerpc/include/asm/iommu.h | 1 + arch/powerpc/include/asm/jump_label.h | 2 +- arch/powerpc/include/asm/mmu-44x.h | 1 + arch/powerpc/include/asm/mmu.h | 2 +- arch/powerpc/include/asm/nohash/64/pgtable.h | 1 + arch/powerpc/include/asm/page.h | 2 +- arch/powerpc/include/asm/page_64.h | 2 ++ arch/powerpc/include/asm/ppc-opcode.h | 2 +- arch/powerpc/include/asm/ptrace.h | 1 + arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/include/asm/reg_a2.h | 2 ++ arch/powerpc/include/asm/spinlock.h | 1 - arch/powerpc/include/asm/synch.h | 1 + arch/powerpc/include/asm/thread_info.h | 2 ++ arch/powerpc/include/asm/uaccess.h | 1 - arch/powerpc/kernel/entry_64.S | 1 + arch/powerpc/kernel/fpu.S | 1 + arch/powerpc/kernel/idle_book3s.S | 1 + arch/powerpc/kernel/kvm_emul.S | 1 + arch/powerpc/kernel/ppc_save_regs.S | 1 + arch/powerpc/kernel/vector.S | 1 + arch/powerpc/kvm/book3s_64_slb.S | 2 ++ arch/powerpc/kvm/book3s_hv_interrupts.S | 1 + arch/powerpc/kvm/book3s_hv_rmhandlers.S | 1 + arch/powerpc/kvm/book3s_interrupts.S | 1 + arch/powerpc/kvm/book3s_rmhandlers.S | 1 + arch/powerpc/kvm/book3s_segment.S | 2 ++ arch/powerpc/lib/copyuser_64.S | 1 + arch/powerpc/lib/feature-fixups-test.S | 1 + arch/powerpc/lib/ldstfp.S | 1 + arch/powerpc/lib/memcpy_64.S | 1 + arch/powerpc/mm/tlb_nohash_low.S | 1 + arch/powerpc/net/bpf_jit32.h | 1 + arch/powerpc/net/bpf_jit_asm.S | 1 + arch/powerpc/net/bpf_jit_comp.c | 1 + arch/powerpc/net/bpf_jit_comp64.c | 1 + arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + arch/powerpc/platforms/pseries/setup.c | 1 + arch/powerpc/purgatory/trampoline.S | 10 +--------- arch/powerpc/xmon/spr_access.S | 1 + tools/testing/selftests/powerpc/copyloops/asm/asm-compat.h | 0 tools/testing/selftests/powerpc/primitives/asm/asm-const.h | 1 + 57 files changed, 73 insertions(+), 33 deletions(-) create mode 100644 arch/powerpc/include/asm/asm-const.h create mode 100644 tools/testing/selftests/powerpc/copyloops/asm/asm-compat.h create mode 120000 tools/testing/selftests/powerpc/primitives/asm/asm-const.h diff --git a/arch/powerpc/crypto/md5-asm.S b/arch/powerpc/crypto/md5-asm.S index 10cdf5bceebb..1834065362c7 100644 --- a/arch/powerpc/crypto/md5-asm.S +++ b/arch/powerpc/crypto/md5-asm.S @@ -11,6 +11,7 @@ */ #include #include +#include #define rHP r3 #define rWP r4 diff --git a/arch/powerpc/crypto/sha1-powerpc-asm.S b/arch/powerpc/crypto/sha1-powerpc-asm.S index c8951ce0dcc4..23e248beff71 100644 --- a/arch/powerpc/crypto/sha1-powerpc-asm.S +++ b/arch/powerpc/crypto/sha1-powerpc-asm.S @@ -7,6 +7,7 @@ #include #include +#include #ifdef __BIG_ENDIAN__ #define LWZ(rt, d, ra) \ diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index d2cf3593e987..19b70c5b5f18 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h @@ -1,21 +1,10 @@ #ifndef _ASM_POWERPC_ASM_COMPAT_H #define _ASM_POWERPC_ASM_COMPAT_H +#include #include #include -#ifdef __ASSEMBLY__ -# define stringify_in_c(...) __VA_ARGS__ -# define ASM_CONST(x) x -#else -/* This version of stringify will deal with commas... */ -# define __stringify_in_c(...) #__VA_ARGS__ -# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " " -# define __ASM_CONST(x) x##UL -# define ASM_CONST(x) __ASM_CONST(x) -#endif - - #ifdef __powerpc64__ /* operations for longs and pointers */ diff --git a/arch/powerpc/include/asm/asm-const.h b/arch/powerpc/include/asm/asm-const.h new file mode 100644 index 000000000000..082c1538c562 --- /dev/null +++ b/arch/powerpc/include/asm/asm-const.h @@ -0,0 +1,14 @@ +#ifndef _ASM_POWERPC_ASM_CONST_H +#define _ASM_POWERPC_ASM_CONST_H + +#ifdef __ASSEMBLY__ +# define stringify_in_c(...) __VA_ARGS__ +# define ASM_CONST(x) x +#else +/* This version of stringify will deal with commas... */ +# define __stringify_in_c(...) #__VA_ARGS__ +# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " " +# define __ASM_CONST(x) x##UL +# define ASM_CONST(x) __ASM_CONST(x) +#endif +#endif /* _ASM_POWERPC_ASM_CONST_H */ diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index f67b3f6e36be..de1316874e45 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -5,6 +5,8 @@ #ifndef _ASM_POWERPC_BARRIER_H #define _ASM_POWERPC_BARRIER_H +#include + /* * Memory barrier. * The sync instruction guarantees that all memory accesses initiated diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 0387b155f13d..d52a51b2ce7b 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -3,6 +3,8 @@ #define _ASM_POWERPC_BOOK3S_64_HASH_H #ifdef __KERNEL__ +#include + /* * Common bits between 4K and 64K pages in a linux-style PTE. * Additional bits may be defined in pgtable-hash64-*.h diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index eee0b5b8a23f..2f74bdc805e0 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -12,9 +12,9 @@ * 2 of the License, or (at your option) any later version. */ -#include #include #include +#include /* * This is necessary to get the definition of PGTABLE_RANGE which we diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 3ab3f7aef022..77440e837869 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -2,6 +2,8 @@ #ifndef _ASM_POWERPC_PGTABLE_RADIX_H #define _ASM_POWERPC_PGTABLE_RADIX_H +#include + #ifndef __ASSEMBLY__ #include #endif diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index 67ec1073ac97..27183871eb3b 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -5,7 +5,6 @@ #ifdef __KERNEL__ #include #include -#include #include #include diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 812535f40124..9ecc7bfc8ae7 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -14,6 +14,7 @@ #include #include #include +#include /* Flags for create_branch: * "b" == create_branch(addr, target, 0); diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 82816a2043b9..751126c22ed9 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -4,9 +4,9 @@ #include -#include #include #include +#include #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/dt_cpu_ftrs.h b/arch/powerpc/include/asm/dt_cpu_ftrs.h index 71515d909ed1..55113432fc91 100644 --- a/arch/powerpc/include/asm/dt_cpu_ftrs.h +++ b/arch/powerpc/include/asm/dt_cpu_ftrs.h @@ -10,7 +10,6 @@ */ #include -#include #include #include diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index fcfd05672b1b..33b6f9c892c8 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -1,6 +1,8 @@ #ifndef __ASM_POWERPC_FEATURE_FIXUPS_H #define __ASM_POWERPC_FEATURE_FIXUPS_H +#include + /* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 535add3f7791..ce8aab72c21b 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -14,8 +14,8 @@ #ifdef __KERNEL__ -#include #include +#include /* firmware feature bitmask values */ diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index 76c8648d0fa8..94542776a62d 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -8,7 +8,6 @@ #include #include #include -#include #include #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index daa3ee5d7ad2..ab3a4fba38e3 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -30,6 +30,7 @@ #include #include #include +#include #define IOMMU_PAGE_SHIFT_4K 12 #define IOMMU_PAGE_SIZE_4K (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K) diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 9a287e0ac8b1..a3b2cf940b4e 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -14,7 +14,7 @@ #include #include -#include +#include #define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG) #define JUMP_LABEL_NOP_SIZE 4 diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h index cb57f29f531d..9bdbe1d1c9b9 100644 --- a/arch/powerpc/include/asm/mmu-44x.h +++ b/arch/powerpc/include/asm/mmu-44x.h @@ -6,6 +6,7 @@ */ #include +#include #define PPC44x_MMUCR_TID 0x000000ff #define PPC44x_MMUCR_STS 0x00010000 diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 61d15ce92278..8418d83b5eb0 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -5,8 +5,8 @@ #include -#include #include +#include /* * MMU features bit definitions diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index dd0c7236208f..fe05b3e03cf1 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -8,6 +8,7 @@ #include #include +#include #ifdef CONFIG_PPC_64K_PAGES #error "Page size not supported" diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index a9fbefaacf10..f6a1265face2 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -16,7 +16,7 @@ #else #include #endif -#include +#include /* * On regular PPC32 page size is 4K (but we support 4K/16K/64K/256K pages diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index af04acdb873f..c0ce17e909ef 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -10,6 +10,8 @@ * 2 of the License, or (at your option) any later version. */ +#include + /* * We always define HW_PAGE_SHIFT to 12 as use of 64K pages remains Linux * specific, every notion of page number shared with the firmware, TCEs, diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index c103caf99897..954edf935158 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -13,7 +13,7 @@ #define _ASM_POWERPC_PPC_OPCODE_H #include -#include +#include #define __REG_R0 0 #define __REG_R1 1 diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index e4923686e43a..447cbd1bee99 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -24,6 +24,7 @@ #define _ASM_POWERPC_PTRACE_H #include +#include #ifdef __powerpc64__ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 562568414cf4..d4a8dc71a057 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -13,6 +13,7 @@ #include #include +#include /* Pickup Book E specific registers. */ #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) diff --git a/arch/powerpc/include/asm/reg_a2.h b/arch/powerpc/include/asm/reg_a2.h index 3ba9c6f096fc..74c2c57c492a 100644 --- a/arch/powerpc/include/asm/reg_a2.h +++ b/arch/powerpc/include/asm/reg_a2.h @@ -12,6 +12,8 @@ #ifndef __ASM_POWERPC_REG_A2_H__ #define __ASM_POWERPC_REG_A2_H__ +#include + #define SPRN_TENSR 0x1b5 #define SPRN_TENS 0x1b6 /* Thread ENable Set */ #define SPRN_TENC 0x1b7 /* Thread ENable Clear */ diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 7ec38f4ee927..685c72310f5d 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -24,7 +24,6 @@ #include #include #endif -#include #include #include #include diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h index 6ec546090ba1..f6f8c75bbb24 100644 --- a/arch/powerpc/include/asm/synch.h +++ b/arch/powerpc/include/asm/synch.h @@ -5,6 +5,7 @@ #include #include +#include #ifndef __ASSEMBLY__ extern unsigned int __start___lwsync_fixup, __stop___lwsync_fixup; diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index f308dfeb2746..ae554b6fe6b9 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -9,6 +9,8 @@ #ifndef _ASM_POWERPC_THREAD_INFO_H #define _ASM_POWERPC_THREAD_INFO_H +#include + #ifdef __KERNEL__ #define THREAD_SHIFT CONFIG_THREAD_SHIFT diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 468653ce844c..643cfbd5bcb5 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -2,7 +2,6 @@ #ifndef _ARCH_POWERPC_UACCESS_H #define _ARCH_POWERPC_UACCESS_H -#include #include #include #include diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 0357f87a013c..f62d9ddc0312 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -38,6 +38,7 @@ #include #include #include +#include #ifdef CONFIG_PPC_BOOK3S #include #else diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 6c509f39bbde..07c913fd5aba 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -25,6 +25,7 @@ #include #include #include +#include #ifdef CONFIG_VSX #define __REST_32FPVSRS(n,c,base) \ diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index d85d5515a091..436caa9d6eec 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -23,6 +23,7 @@ #include #include #include +#include #undef DEBUG diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S index e100ff324a85..c005088f6c9c 100644 --- a/arch/powerpc/kernel/kvm_emul.S +++ b/arch/powerpc/kernel/kvm_emul.S @@ -23,6 +23,7 @@ #include #include #include +#include #define KVM_MAGIC_PAGE (-4096) diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S index 8afbe213d729..6d1b42ee797c 100644 --- a/arch/powerpc/kernel/ppc_save_regs.S +++ b/arch/powerpc/kernel/ppc_save_regs.S @@ -12,6 +12,7 @@ #include #include #include +#include /* * Grab the register values as they are now. diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index f314fd475491..21165da0052d 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -8,6 +8,7 @@ #include #include #include +#include /* * Load state from memory into VMX registers including VSCR. diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S index 688722acd692..d293485c1a60 100644 --- a/arch/powerpc/kvm/book3s_64_slb.S +++ b/arch/powerpc/kvm/book3s_64_slb.S @@ -17,6 +17,8 @@ * Authors: Alexander Graf */ +#include + #define SHADOW_SLB_ENTRY_LEN 0x10 #define OFFSET_ESID(x) (SHADOW_SLB_ENTRY_LEN * x) #define OFFSET_VSID(x) ((SHADOW_SLB_ENTRY_LEN * x) + 8) diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 82f2ff9410b6..4218073eea1f 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -27,6 +27,7 @@ #include #include #include +#include /***************************************************************************** * * diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 6e4554b273f1..7405222a4e28 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -32,6 +32,7 @@ #include #include #include +#include /* Sign-extend HDEC if not on POWER9 */ #define EXTEND_HDEC(reg) \ diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index c18e845019ec..d71dab16dc6f 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -23,6 +23,7 @@ #include #include #include +#include #if defined(CONFIG_PPC_BOOK3S_64) #ifdef PPC64_ELF_ABI_v2 diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 34a5adeff084..b0089e04c8c8 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -23,6 +23,7 @@ #include #include #include +#include #ifdef CONFIG_PPC_BOOK3S_64 #include diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 98ccc7ec5d48..7fec258bb072 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -19,6 +19,8 @@ /* Real mode helpers */ +#include + #if defined(CONFIG_PPC_BOOK3S_64) #define GET_SHADOW_VCPU(reg) \ diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index 506677395681..65244263b6a3 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -9,6 +9,7 @@ #include #include #include +#include #ifdef __BIG_ENDIAN__ #define sLd sld /* Shift towards low-numbered address. */ diff --git a/arch/powerpc/lib/feature-fixups-test.S b/arch/powerpc/lib/feature-fixups-test.S index f16cec989506..ee7c5fd5fc64 100644 --- a/arch/powerpc/lib/feature-fixups-test.S +++ b/arch/powerpc/lib/feature-fixups-test.S @@ -11,6 +11,7 @@ #include #include #include +#include .text diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S index ae15eba49c1f..32e91994b6b2 100644 --- a/arch/powerpc/lib/ldstfp.S +++ b/arch/powerpc/lib/ldstfp.S @@ -15,6 +15,7 @@ #include #include #include +#include #include #ifdef CONFIG_PPC_FPU diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S index 8d8265be1a59..26ea02b7311f 100644 --- a/arch/powerpc/lib/memcpy_64.S +++ b/arch/powerpc/lib/memcpy_64.S @@ -9,6 +9,7 @@ #include #include #include +#include .align 7 _GLOBAL_TOC(memcpy) diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S index 048b8e9f4492..505a3d010c47 100644 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ b/arch/powerpc/mm/tlb_nohash_low.S @@ -34,6 +34,7 @@ #include #include #include +#include #if defined(CONFIG_40x) diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h index a8cd7e289ecd..6f4daacad296 100644 --- a/arch/powerpc/net/bpf_jit32.h +++ b/arch/powerpc/net/bpf_jit32.h @@ -13,6 +13,7 @@ #ifndef _BPF_JIT32_H #define _BPF_JIT32_H +#include #include "bpf_jit.h" #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/net/bpf_jit_asm.S b/arch/powerpc/net/bpf_jit_asm.S index 3dd9c43d40c9..c80280dc2e04 100644 --- a/arch/powerpc/net/bpf_jit_asm.S +++ b/arch/powerpc/net/bpf_jit_asm.S @@ -10,6 +10,7 @@ */ #include +#include #include "bpf_jit32.h" /* diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 5b061fc81df3..d5bfe24bb3b5 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -12,6 +12,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 380cbf9a40d9..b8de5244a58c 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -13,6 +13,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index a8d9b4089c31..4016e3c3d18b 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -14,6 +14,7 @@ #include #include #include +#include .section ".text" diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 139f0af6c3d9..e14ccf32a97d 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -69,6 +69,7 @@ #include #include #include +#include #include "pseries.h" diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S index 4aad9dd10ace..1e1129553fd7 100644 --- a/arch/powerpc/purgatory/trampoline.S +++ b/arch/powerpc/purgatory/trampoline.S @@ -12,15 +12,7 @@ * Software Foundation (version 2 of the License). */ -#if defined(__LITTLE_ENDIAN__) -#define STWX_BE stwbrx -#define LWZX_BE lwbrx -#elif defined(__BIG_ENDIAN__) -#define STWX_BE stwx -#define LWZX_BE lwzx -#else -#error no endianness defined! -#endif +#include .machine ppc64 .balign 256 diff --git a/arch/powerpc/xmon/spr_access.S b/arch/powerpc/xmon/spr_access.S index 4099cbcddaaa..720a52afdd58 100644 --- a/arch/powerpc/xmon/spr_access.S +++ b/arch/powerpc/xmon/spr_access.S @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include +#include /* unsigned long xmon_mfspr(sprn, default_value) */ _GLOBAL(xmon_mfspr) diff --git a/tools/testing/selftests/powerpc/copyloops/asm/asm-compat.h b/tools/testing/selftests/powerpc/copyloops/asm/asm-compat.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/testing/selftests/powerpc/primitives/asm/asm-const.h b/tools/testing/selftests/powerpc/primitives/asm/asm-const.h new file mode 120000 index 000000000000..18d8be13e67f --- /dev/null +++ b/tools/testing/selftests/powerpc/primitives/asm/asm-const.h @@ -0,0 +1 @@ +../../../../../../arch/powerpc/include/asm/asm-const.h \ No newline at end of file -- cgit From 5c35a02c545a7bbe77f3a1ae337d9e29beed079b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 5 Jul 2018 16:24:59 +0000 Subject: powerpc: clean the inclusion of stringify.h Only include linux/stringify.h is files using __stringify() Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/dcr-native.h | 1 + arch/powerpc/include/asm/ppc-opcode.h | 1 - arch/powerpc/include/asm/reg_fsl_emb.h | 2 ++ arch/powerpc/include/asm/synch.h | 1 - arch/powerpc/include/asm/thread_info.h | 1 - arch/powerpc/kernel/prom.c | 1 - arch/powerpc/kernel/prom_init.c | 1 - arch/powerpc/kvm/book3s_64_vio_hv.c | 1 + arch/powerpc/lib/locks.c | 1 - arch/powerpc/perf/req-gen/_begin.h | 2 ++ arch/powerpc/perf/req-gen/perf.h | 1 + arch/powerpc/platforms/cell/cbe_thermal.c | 1 + arch/powerpc/platforms/cell/spufs/sputrace.h | 1 + arch/powerpc/platforms/powernv/vas.h | 1 + arch/powerpc/platforms/pseries/mobility.c | 1 + 15 files changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/dcr-native.h b/arch/powerpc/include/asm/dcr-native.h index 4a2beef74277..151dff555f50 100644 --- a/arch/powerpc/include/asm/dcr-native.h +++ b/arch/powerpc/include/asm/dcr-native.h @@ -25,6 +25,7 @@ #include #include #include +#include typedef struct { unsigned int base; diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 954edf935158..665af14850e4 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -12,7 +12,6 @@ #ifndef _ASM_POWERPC_PPC_OPCODE_H #define _ASM_POWERPC_PPC_OPCODE_H -#include #include #define __REG_R0 0 diff --git a/arch/powerpc/include/asm/reg_fsl_emb.h b/arch/powerpc/include/asm/reg_fsl_emb.h index d7ccf93e6279..a21f529c43d9 100644 --- a/arch/powerpc/include/asm/reg_fsl_emb.h +++ b/arch/powerpc/include/asm/reg_fsl_emb.h @@ -7,6 +7,8 @@ #ifndef __ASM_POWERPC_REG_FSL_EMB_H__ #define __ASM_POWERPC_REG_FSL_EMB_H__ +#include + #ifndef __ASSEMBLY__ /* Performance Monitor Registers */ #define mfpmr(rn) ({unsigned int rval; \ diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h index f6f8c75bbb24..aca70fb43147 100644 --- a/arch/powerpc/include/asm/synch.h +++ b/arch/powerpc/include/asm/synch.h @@ -3,7 +3,6 @@ #define _ASM_POWERPC_SYNCH_H #ifdef __KERNEL__ -#include #include #include diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index ae554b6fe6b9..3c0002044bc9 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -27,7 +27,6 @@ #include #include #include -#include #include /* diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 8f32f14ba508..c4d7078e5295 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index c45fb463c9e5..0433bf24a10d 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index d4bcd1b17b09..12318fa62655 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index b7b1237d4aa6..35a0ef932e1a 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -15,7 +15,6 @@ #include #include #include -#include #include /* waiting for a spinlock... */ diff --git a/arch/powerpc/perf/req-gen/_begin.h b/arch/powerpc/perf/req-gen/_begin.h index 549f8782c52d..a200b86eba3b 100644 --- a/arch/powerpc/perf/req-gen/_begin.h +++ b/arch/powerpc/perf/req-gen/_begin.h @@ -3,6 +3,8 @@ #ifndef POWERPC_PERF_REQ_GEN_H_ #define POWERPC_PERF_REQ_GEN_H_ +#include + #define CAT2_STR_(t, s) __stringify(t/s) #define CAT2_STR(t, s) CAT2_STR_(t, s) #define I(...) __VA_ARGS__ diff --git a/arch/powerpc/perf/req-gen/perf.h b/arch/powerpc/perf/req-gen/perf.h index 871a9a1766c2..fa9bc804e67a 100644 --- a/arch/powerpc/perf/req-gen/perf.h +++ b/arch/powerpc/perf/req-gen/perf.h @@ -3,6 +3,7 @@ #define LINUX_POWERPC_PERF_REQ_GEN_PERF_H_ #include +#include #ifndef REQUEST_FILE #error "REQUEST_FILE must be defined before including" diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c index 2c15ff094483..55aac74e1cb9 100644 --- a/arch/powerpc/platforms/cell/cbe_thermal.c +++ b/arch/powerpc/platforms/cell/cbe_thermal.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.h b/arch/powerpc/platforms/cell/spufs/sputrace.h index d557e999b662..1def11e911ac 100644 --- a/arch/powerpc/platforms/cell/spufs/sputrace.h +++ b/arch/powerpc/platforms/cell/spufs/sputrace.h @@ -3,6 +3,7 @@ #define _TRACE_SPUFS_H #include +#include #undef TRACE_SYSTEM #define TRACE_SYSTEM spufs diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index ae0100fd35bb..f5493dbdd7ff 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -15,6 +15,7 @@ #include #include #include +#include /* * Overview of Virtual Accelerator Switchboard (VAS). diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 8a8033a249c7..f0e30dc94988 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include -- cgit From 2c86cd188f8a5631f3d75a1dea14d22df85189b4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 5 Jul 2018 16:25:01 +0000 Subject: powerpc: clean inclusions of asm/feature-fixups.h files not using feature fixup don't need asm/feature-fixups.h files using feature fixup need asm/feature-fixups.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cputable.h | 1 - arch/powerpc/include/asm/dbell.h | 1 + arch/powerpc/include/asm/dt_cpu_ftrs.h | 1 - arch/powerpc/include/asm/exception-64s.h | 1 + arch/powerpc/include/asm/firmware.h | 1 - arch/powerpc/include/asm/kvm_booke_hv_asm.h | 2 ++ arch/powerpc/include/asm/mmu.h | 1 - arch/powerpc/include/asm/ppc_asm.h | 1 + arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/kernel/cpu_setup_6xx.S | 1 + arch/powerpc/kernel/entry_32.S | 1 + arch/powerpc/kernel/entry_64.S | 1 + arch/powerpc/kernel/exceptions-64e.S | 1 + arch/powerpc/kernel/exceptions-64s.S | 1 + arch/powerpc/kernel/fpu.S | 1 + arch/powerpc/kernel/head_32.S | 1 + arch/powerpc/kernel/head_64.S | 1 + arch/powerpc/kernel/head_fsl_booke.S | 1 + arch/powerpc/kernel/idle_6xx.S | 1 + arch/powerpc/kernel/idle_book3s.S | 1 + arch/powerpc/kernel/idle_e500.S | 1 + arch/powerpc/kernel/idle_power4.S | 1 + arch/powerpc/kernel/l2cr_6xx.S | 1 + arch/powerpc/kernel/misc_32.S | 1 + arch/powerpc/kernel/misc_64.S | 1 + arch/powerpc/kernel/setup_32.c | 1 + arch/powerpc/kernel/setup_64.c | 1 + arch/powerpc/kernel/swsusp_32.S | 1 + arch/powerpc/kernel/swsusp_asm64.S | 1 + arch/powerpc/kernel/tm.S | 1 + arch/powerpc/kvm/book3s_64_slb.S | 1 + arch/powerpc/kvm/book3s_hv_interrupts.S | 1 + arch/powerpc/kvm/book3s_hv_rmhandlers.S | 1 + arch/powerpc/kvm/book3s_segment.S | 1 + arch/powerpc/lib/copypage_64.S | 1 + arch/powerpc/lib/copyuser_64.S | 1 + arch/powerpc/lib/hweight_64.S | 1 + arch/powerpc/lib/memcpy_64.S | 1 + arch/powerpc/mm/hash_low_32.S | 1 + arch/powerpc/mm/hash_native_64.c | 1 + arch/powerpc/mm/slb_low.S | 1 + arch/powerpc/mm/tlb_low_64e.S | 1 + arch/powerpc/mm/tlb_nohash_low.S | 1 + arch/powerpc/platforms/powermac/cache.S | 1 + arch/powerpc/platforms/powermac/sleep.S | 1 + arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + arch/powerpc/platforms/pseries/hvCall.S | 1 + tools/testing/selftests/powerpc/copyloops/asm/feature-fixups.h | 0 tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h | 1 + 49 files changed, 45 insertions(+), 4 deletions(-) create mode 100644 tools/testing/selftests/powerpc/copyloops/asm/feature-fixups.h create mode 120000 tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 751126c22ed9..29f49a35d6ee 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -4,7 +4,6 @@ #include -#include #include #include diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h index 998c42ff1caa..99b84db23e8c 100644 --- a/arch/powerpc/include/asm/dbell.h +++ b/arch/powerpc/include/asm/dbell.h @@ -16,6 +16,7 @@ #include #include +#include #define PPC_DBELL_MSG_BRDCAST (0x04000000) #define PPC_DBELL_TYPE(x) (((x) & 0xf) << (63-36)) diff --git a/arch/powerpc/include/asm/dt_cpu_ftrs.h b/arch/powerpc/include/asm/dt_cpu_ftrs.h index 55113432fc91..0c729e2d0e8a 100644 --- a/arch/powerpc/include/asm/dt_cpu_ftrs.h +++ b/arch/powerpc/include/asm/dt_cpu_ftrs.h @@ -10,7 +10,6 @@ */ #include -#include #include #ifdef CONFIG_PPC_DT_CPU_FTRS diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index c40b4380951c..1f2efc1a9769 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -35,6 +35,7 @@ * implementations as possible. */ #include +#include /* PACA save area offsets (exgen, exmc, etc) */ #define EX_R9 0 diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index ce8aab72c21b..7a051bd21f87 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -14,7 +14,6 @@ #ifdef __KERNEL__ -#include #include /* firmware feature bitmask values */ diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h index e5f048bbcb7c..931260b59ac6 100644 --- a/arch/powerpc/include/asm/kvm_booke_hv_asm.h +++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h @@ -9,6 +9,8 @@ #ifndef ASM_KVM_BOOKE_HV_ASM_H #define ASM_KVM_BOOKE_HV_ASM_H +#include + #ifdef __ASSEMBLY__ /* diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 8418d83b5eb0..13ea441ac531 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -5,7 +5,6 @@ #include -#include #include /* diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 75ece56dcd62..b5d023680801 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -9,6 +9,7 @@ #include #include #include +#include #ifdef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index d4a8dc71a057..486b7c83b8c5 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -14,6 +14,7 @@ #include #include #include +#include /* Pickup Book E specific registers. */ #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index a9f3970693e1..fa3c2c91290c 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -16,6 +16,7 @@ #include #include #include +#include _GLOBAL(__setup_cpu_603) mflr r5 diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 7642cb984d3a..3bd097be90d9 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -34,6 +34,7 @@ #include #include #include +#include /* * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE. diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index f62d9ddc0312..a9e74ecdab0b 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -44,6 +44,7 @@ #else #include #endif +#include /* * System calls. diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 3325f721e7b2..6d6e144a28ce 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -27,6 +27,7 @@ #include #include #include +#include /* XXX This will ultimately add space for a special exception save * structure used to save things like SRR0/SRR1, SPRGs, MAS, etc... diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 36aec1f1cb2d..7a672dafd94f 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -18,6 +18,7 @@ #include #include #include +#include /* * There are a few constraints to be concerned with. diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 07c913fd5aba..529dcc21c3f9 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -26,6 +26,7 @@ #include #include #include +#include #ifdef CONFIG_VSX #define __REST_32FPVSRS(n,c,base) \ diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 29b2fed93289..61ca27929355 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -35,6 +35,7 @@ #include #include #include +#include /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ #define LOAD_BAT(n, reg, RA, RB) \ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 6eca15f25c73..4898e9491a1c 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -44,6 +44,7 @@ #include #include #include +#include /* The physical memory is laid out such that the secondary processor * spin code sits at 0x0000...0x00ff. On server, the vectors follow diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index bf4c6021515f..e2750b856c8f 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -43,6 +43,7 @@ #include #include #include +#include #include "head_booke.h" /* As with the other PowerPC ports, it is expected that when code diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 1686916cc7f0..ff026c9d3cab 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -20,6 +20,7 @@ #include #include #include +#include .text diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 436caa9d6eec..b107e3df7f8f 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -24,6 +24,7 @@ #include #include #include +#include #undef DEBUG diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S index b9b6ef510be1..583e55ac7d26 100644 --- a/arch/powerpc/kernel/idle_e500.S +++ b/arch/powerpc/kernel/idle_e500.S @@ -17,6 +17,7 @@ #include #include #include +#include .text diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S index 08faa93755f9..dd7471fe20bd 100644 --- a/arch/powerpc/kernel/idle_power4.S +++ b/arch/powerpc/kernel/idle_power4.S @@ -16,6 +16,7 @@ #include #include #include +#include #undef DEBUG diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S index 6408f09dbbd9..6e7dbb7d527c 100644 --- a/arch/powerpc/kernel/l2cr_6xx.S +++ b/arch/powerpc/kernel/l2cr_6xx.S @@ -45,6 +45,7 @@ #include #include #include +#include /* Usage: diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 3f7a9a2d2435..695b24a2d954 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -34,6 +34,7 @@ #include #include #include +#include .text diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index fa267e94090a..262ba9481781 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -28,6 +28,7 @@ #include #include #include +#include .text diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index ef747a5a30b9..0e3743343280 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -41,6 +41,7 @@ #include #include #include +#include #define DBG(fmt...) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 225bc5f91049..6a501b25dd85 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -68,6 +68,7 @@ #include #include #include +#include #include "setup.h" diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index 34b73a262709..7a919e9a3400 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -7,6 +7,7 @@ #include #include #include +#include /* * Structure for storing CPU registers on the save area. diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index 82d8aae81c6a..f83bf6f72cb0 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -13,6 +13,7 @@ #include #include #include +#include /* * Structure for storing CPU registers on the save area. diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 6bb6f5123dcf..6bffbc5affe7 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef CONFIG_VSX /* See fpu.S, this is borrowed from there */ diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S index d293485c1a60..066c665dc86f 100644 --- a/arch/powerpc/kvm/book3s_64_slb.S +++ b/arch/powerpc/kvm/book3s_64_slb.S @@ -18,6 +18,7 @@ */ #include +#include #define SHADOW_SLB_ENTRY_LEN 0x10 #define OFFSET_ESID(x) (SHADOW_SLB_ENTRY_LEN * x) diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 4218073eea1f..666b91c79eb4 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -28,6 +28,7 @@ #include #include #include +#include /***************************************************************************** * * diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 7405222a4e28..1d14046124a0 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -33,6 +33,7 @@ #include #include #include +#include /* Sign-extend HDEC if not on POWER9 */ #define EXTEND_HDEC(reg) \ diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 7fec258bb072..e5c542a7c5ac 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -20,6 +20,7 @@ /* Real mode helpers */ #include +#include #if defined(CONFIG_PPC_BOOK3S_64) diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index 8d5034f645f3..694390357667 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S @@ -11,6 +11,7 @@ #include #include #include +#include .section ".toc","aw" PPC64_CACHES: diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index 65244263b6a3..2d6f128d3ebe 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -10,6 +10,7 @@ #include #include #include +#include #ifdef __BIG_ENDIAN__ #define sLd sld /* Shift towards low-numbered address. */ diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S index 3de7ac154f24..0526b2225260 100644 --- a/arch/powerpc/lib/hweight_64.S +++ b/arch/powerpc/lib/hweight_64.S @@ -20,6 +20,7 @@ #include #include #include +#include /* Note: This code relies on -mminimal-toc */ diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S index 26ea02b7311f..94650d6eae9c 100644 --- a/arch/powerpc/lib/memcpy_64.S +++ b/arch/powerpc/lib/memcpy_64.S @@ -10,6 +10,7 @@ #include #include #include +#include .align 7 _GLOBAL_TOC(memcpy) diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index ffbd7c0bda96..26acf6c8c20c 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -27,6 +27,7 @@ #include #include #include +#include #ifdef CONFIG_SMP .section .bss diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index ffbd5ed4e8de..fc5dbbfd09fe 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -30,6 +30,7 @@ #include #include #include +#include #include diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index a83fbd2a4a24..4ac5057ad439 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -22,6 +22,7 @@ #include #include #include +#include /* * This macro generates asm code to compute the VSID scramble diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index eb82d787d99a..7fd20c52a8ec 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -22,6 +22,7 @@ #include #include #include +#include #ifdef CONFIG_PPC_64K_PAGES #define VPTE_PMD_SHIFT (PTE_INDEX_SIZE+1) diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S index 505a3d010c47..e066a658acac 100644 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ b/arch/powerpc/mm/tlb_nohash_low.S @@ -35,6 +35,7 @@ #include #include #include +#include #if defined(CONFIG_40x) diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S index cc5347eb1662..27862feee4a5 100644 --- a/arch/powerpc/platforms/powermac/cache.S +++ b/arch/powerpc/platforms/powermac/cache.S @@ -17,6 +17,7 @@ #include #include #include +#include /* * Flush and disable all data caches (dL1, L2, L3). This is used diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S index 1c2802fabd57..f89808b9713d 100644 --- a/arch/powerpc/platforms/powermac/sleep.S +++ b/arch/powerpc/platforms/powermac/sleep.S @@ -18,6 +18,7 @@ #include #include #include +#include #define MAGIC 0x4c617273 /* 'Lars' */ diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 4016e3c3d18b..3508be7d758a 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -15,6 +15,7 @@ #include #include #include +#include .section ".text" diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index c511a1743a44..d91412c591ef 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -13,6 +13,7 @@ #include #include #include +#include .section ".text" diff --git a/tools/testing/selftests/powerpc/copyloops/asm/feature-fixups.h b/tools/testing/selftests/powerpc/copyloops/asm/feature-fixups.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h b/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h new file mode 120000 index 000000000000..8dc6d4d46e8e --- /dev/null +++ b/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h @@ -0,0 +1 @@ +../../../../../../arch/powerpc/include/asm/feature-fixups.h \ No newline at end of file -- cgit From e8cb7a55eb8dcf65838d0911dc7ba00b7d7accf5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 5 Jul 2018 16:25:03 +0000 Subject: powerpc: remove superflous inclusions of asm/fixmap.h Files not using fixmap consts or functions don't need asm/fixmap.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/fixmap.h | 2 -- arch/powerpc/kernel/head_8xx.S | 1 - arch/powerpc/mm/dump_hashpagetable.c | 1 - arch/powerpc/sysdev/cpm_common.c | 1 - 4 files changed, 5 deletions(-) diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 6c40dfda5912..40efdf1d2d6e 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -15,9 +15,7 @@ #define _ASM_FIXMAP_H #ifndef __ASSEMBLY__ -#include #include -#include #ifdef CONFIG_HIGHMEM #include #include diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 04501b65040e..6582f824d620 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -30,7 +30,6 @@ #include #include #include -#include #include #if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c index d241cb6518da..869294695048 100644 --- a/arch/powerpc/mm/dump_hashpagetable.c +++ b/arch/powerpc/mm/dump_hashpagetable.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c index b74508175b67..010975c3422f 100644 --- a/arch/powerpc/sysdev/cpm_common.c +++ b/arch/powerpc/sysdev/cpm_common.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include -- cgit From b5ac51d747122f8858bdcb3fc7a5c702ef06f6c5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 5 Jul 2018 16:25:05 +0000 Subject: powerpc: declare set_breakpoint() static set_breakpoint() is only used in process.c so make it static Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/debug.h | 1 - arch/powerpc/kernel/process.c | 14 +++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index ce5da214ffe5..7756026b95ca 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -45,7 +45,6 @@ static inline int debugger_break_match(struct pt_regs *regs) { return 0; } static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; } #endif -void set_breakpoint(struct arch_hw_breakpoint *brk); void __set_breakpoint(struct arch_hw_breakpoint *brk); bool ppc_breakpoint_available(void); #ifdef CONFIG_PPC_ADV_DEBUG_REGS diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e9533b4d2f08..2172f9908633 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -717,6 +717,13 @@ void switch_booke_debug_regs(struct debug_reg *new_debug) EXPORT_SYMBOL_GPL(switch_booke_debug_regs); #else /* !CONFIG_PPC_ADV_DEBUG_REGS */ #ifndef CONFIG_HAVE_HW_BREAKPOINT +static void set_breakpoint(struct arch_hw_breakpoint *brk) +{ + preempt_disable(); + __set_breakpoint(brk); + preempt_enable(); +} + static void set_debug_reg_defaults(struct thread_struct *thread) { thread->hw_brk.address = 0; @@ -829,13 +836,6 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk) WARN_ON_ONCE(1); } -void set_breakpoint(struct arch_hw_breakpoint *brk) -{ - preempt_disable(); - __set_breakpoint(brk); - preempt_enable(); -} - /* Check if we have DAWR or DABR hardware */ bool ppc_breakpoint_available(void) { -- cgit From 6b622669119e20a53a1983cd41115e6895bb85a2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 5 Jul 2018 16:25:07 +0000 Subject: powerpc/book3s: Remove PPC_PIN_SIZE PPC_PIN_SIZE is specific to the 44x and is defined in mmu.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/pgtable.h | 5 ----- arch/powerpc/include/asm/nohash/32/pgtable.h | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 3c3e34240628..751cf931bb3f 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -84,17 +84,12 @@ * of RAM. -- Cort */ #define VMALLOC_OFFSET (0x1000000) /* 16M */ -#ifdef PPC_PIN_SIZE -#define VMALLOC_START (((_ALIGN((long)high_memory, PPC_PIN_SIZE) + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))) -#else #define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))) -#endif #define VMALLOC_END ioremap_bot #ifndef __ASSEMBLY__ #include #include -#include /* For sub-arch specific PPC_PIN_SIZE */ extern unsigned long ioremap_bot; diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 7df2f3a66cc5..79805e0dad27 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -8,7 +8,7 @@ #ifndef __ASSEMBLY__ #include #include -#include /* For sub-arch specific PPC_PIN_SIZE */ +#include /* For sub-arch specific PPC_PIN_SIZE */ #include extern unsigned long ioremap_bot; -- cgit From 62b8426578c414c918468ab4cc7517da7adc31d5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 5 Jul 2018 16:25:09 +0000 Subject: powerpc: fix includes in asm/processor.h Remove superflous includes and add missing ones Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hw_breakpoint.h | 1 + arch/powerpc/include/asm/processor.h | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index 8e7b09703ca4..3637588d3f6d 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -55,6 +55,7 @@ struct arch_hw_breakpoint { struct perf_event; struct pmu; struct perf_sample_data; +struct task_struct; #define HW_BREAKPOINT_ALIGN 0x7 diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 5debe337ea9d..52fadded5c1e 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -39,10 +39,9 @@ #endif /* CONFIG_PPC64 */ #ifndef __ASSEMBLY__ -#include -#include +#include +#include #include -#include #include /* We do _not_ want to define new machine types at all, those must die -- cgit From 0c295d0e9c9f7a592f230bbcf51655bc6c9351ba Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 5 Jul 2018 16:25:11 +0000 Subject: powerpc/nohash: fix hash related comments in pgtable.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nohash/32/pgtable.h | 4 ---- arch/powerpc/include/asm/nohash/64/pgtable.h | 18 ++++-------------- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 79805e0dad27..a507a65b0866 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -223,10 +223,6 @@ static inline unsigned long long pte_update(pte_t *p, } #endif /* CONFIG_PTE_64BIT */ -/* - * 2.6 calls this without flushing the TLB entry; this is wrong - * for our hash-based implementation, we fix that up here. - */ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int __ptep_test_and_clear_young(unsigned int context, unsigned long addr, pte_t *ptep) { diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index fe05b3e03cf1..7cd6809f4d33 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -3,7 +3,7 @@ #define _ASM_POWERPC_NOHASH_64_PGTABLE_H /* * This file contains the functions and defines necessary to modify and use - * the ppc64 hashed page table. + * the ppc64 non-hashed page table. */ #include @@ -38,7 +38,7 @@ /* * The vmalloc space starts at the beginning of that region, and - * occupies half of it on hash CPUs and a quarter of it on Book3E + * occupies a quarter of it on Book3E * (we keep a quarter for the virtual memmap) */ #define VMALLOC_START KERN_VIRT_START @@ -78,7 +78,7 @@ /* * Defines the address of the vmemap area, in its own region on - * hash table CPUs and after the vmalloc space on Book3E + * after the vmalloc space on Book3E */ #define VMEMMAP_BASE VMALLOC_END #define VMEMMAP_END KERN_IO_START @@ -248,14 +248,6 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, pte_update(mm, addr, ptep, _PAGE_RW, 0, 1); } -/* - * We currently remove entries from the hashtable regardless of whether - * the entry was young or dirty. The generic routines only flush if the - * entry was young or dirty which is not good enough. - * - * We should be more intelligent about this but for the moment we override - * these functions and force a tlb flush unconditionally - */ #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH #define ptep_clear_flush_young(__vma, __address, __ptep) \ ({ \ @@ -279,9 +271,7 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, } -/* Set the dirty and/or accessed bits atomically in a linux PTE, this - * function doesn't need to flush the hash entry - */ +/* Set the dirty and/or accessed bits atomically in a linux PTE */ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, pte_t entry, unsigned long address, -- cgit From 7bc396958cafba126078ad92480a4241599ae4ef Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 5 Jul 2018 16:25:13 +0000 Subject: powerpc/44x: remove page.h from mmu-44x.h mmu-44x.h doesn't need asm/page.h if PAGE_SHIFT are replaced by CONFIG_PPC_XX_PAGES Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu-44x.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h index 9bdbe1d1c9b9..295b3dbb2698 100644 --- a/arch/powerpc/include/asm/mmu-44x.h +++ b/arch/powerpc/include/asm/mmu-44x.h @@ -5,7 +5,6 @@ * PPC440 support */ -#include #include #define PPC44x_MMUCR_TID 0x000000ff @@ -125,19 +124,19 @@ typedef struct { /* Size of the TLBs used for pinning in lowmem */ #define PPC_PIN_SIZE (1 << 28) /* 256M */ -#if (PAGE_SHIFT == 12) +#if defined(CONFIG_PPC_4K_PAGES) #define PPC44x_TLBE_SIZE PPC44x_TLB_4K #define PPC47x_TLBE_SIZE PPC47x_TLB0_4K #define mmu_virtual_psize MMU_PAGE_4K -#elif (PAGE_SHIFT == 14) +#elif defined(CONFIG_PPC_16K_PAGES) #define PPC44x_TLBE_SIZE PPC44x_TLB_16K #define PPC47x_TLBE_SIZE PPC47x_TLB0_16K #define mmu_virtual_psize MMU_PAGE_16K -#elif (PAGE_SHIFT == 16) +#elif defined(CONFIG_PPC_64K_PAGES) #define PPC44x_TLBE_SIZE PPC44x_TLB_64K #define PPC47x_TLBE_SIZE PPC47x_TLB0_64K #define mmu_virtual_psize MMU_PAGE_64K -#elif (PAGE_SHIFT == 18) +#elif defined(CONFIG_PPC_256K_PAGES) #define PPC44x_TLBE_SIZE PPC44x_TLB_256K #define mmu_virtual_psize MMU_PAGE_256K #else -- cgit From 45ef5992e06dcc3a4c7d34d23052289c7676d56c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 5 Jul 2018 16:25:19 +0000 Subject: powerpc: remove unnecessary inclusion of asm/tlbflush.h asm/tlbflush.h is only needed for: - using functions xxx_flush_tlb_xxx() - using MMU_NO_CONTEXT - including asm-generic/pgtable.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/highmem.h | 2 +- arch/powerpc/include/asm/tlb.h | 1 - arch/powerpc/kvm/book3s.c | 1 - arch/powerpc/kvm/book3s_32_mmu.c | 1 - arch/powerpc/kvm/book3s_64_mmu.c | 1 - arch/powerpc/kvm/book3s_64_mmu_hv.c | 1 - arch/powerpc/kvm/book3s_64_vio.c | 1 - arch/powerpc/kvm/book3s_64_vio_hv.c | 1 - arch/powerpc/kvm/book3s_hv.c | 1 - arch/powerpc/kvm/book3s_hv_rm_mmu.c | 1 - arch/powerpc/kvm/book3s_pr.c | 1 - arch/powerpc/kvm/e500.c | 1 - arch/powerpc/kvm/e500mc.c | 1 - arch/powerpc/kvm/powerpc.c | 1 - arch/powerpc/mm/fault.c | 1 - arch/powerpc/mm/hash_native_64.c | 1 - arch/powerpc/mm/hash_utils_64.c | 1 - arch/powerpc/mm/mmu_context_hash32.c | 1 - arch/powerpc/mm/mmu_decl.h | 1 - arch/powerpc/mm/subpage-prot.c | 1 - arch/powerpc/platforms/pseries/lpar.c | 1 - arch/powerpc/sysdev/cpm1.c | 1 - 22 files changed, 1 insertion(+), 22 deletions(-) diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h index cec820f961da..a4b65b186ec6 100644 --- a/arch/powerpc/include/asm/highmem.h +++ b/arch/powerpc/include/asm/highmem.h @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index 9138baccebb0..6d2ba7c779dc 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -17,7 +17,6 @@ #include #endif #include -#include #ifndef __powerpc64__ #include #include diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index edaf4720d156..87348e498c89 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 45c8ea4a0487..612169988a3d 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -23,7 +23,6 @@ #include #include -#include #include #include diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index cf9d686e8162..c92dd25bed23 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -23,7 +23,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 7f3a8cf5d66f..3c0e8fb2b773 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -29,7 +29,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 8b9aaf24b0a2..33b30d901381 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -31,7 +31,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 12318fa62655..757976b3f640 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -28,7 +28,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index b568582120a3..06cffc6446fe 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -53,7 +53,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 1f22d9e977d4..a67cf1cdeda4 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -14,7 +14,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index c3b8006f0eac..47ee43bbd696 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index f9f6468f4171..afd3c255a427 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -21,7 +21,6 @@ #include #include -#include #include #include "../mm/mmu_decl.h" diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index d0b6b5788afc..d31645491a93 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -21,7 +21,6 @@ #include #include -#include #include #include diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 0e8c20c5eaac..3ccc386b380d 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index b1ca7a0974e3..7d262c6437c4 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index fc5dbbfd09fe..729f02df8290 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 743c8dbe1941..f23a89d8e4ce 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -48,7 +48,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/mmu_context_hash32.c index aa5a7fd89461..921c1e33e941 100644 --- a/arch/powerpc/mm/mmu_context_hash32.c +++ b/arch/powerpc/mm/mmu_context_hash32.c @@ -27,7 +27,6 @@ #include #include -#include /* * On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index c4c0a09a7775..e5d779eed181 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -19,7 +19,6 @@ * */ #include -#include #include #ifdef CONFIG_PPC_MMU_NOHASH diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index 9d16ee251fc0..3327551c8b47 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -17,7 +17,6 @@ #include #include -#include /* * Free all pages allocated for subpage protection maps and pointers. diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 52eeff1297f4..d3992ced0782 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c index 5240d3a74a10..4f8dcf124828 100644 --- a/arch/powerpc/sysdev/cpm1.c +++ b/arch/powerpc/sysdev/cpm1.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include -- cgit From 405cb4024e52b137685213b377ea3f0ce3f91735 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 5 Jul 2018 16:25:21 +0000 Subject: powerpc: split asm/tlbflush.h Split asm/tlbflush.h into: asm/nohash/tlbflush.h asm/book3s/32/tlbflush.h asm/book3s/64/tlbflush.h (already existing) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/tlbflush.h | 25 ++++++++ arch/powerpc/include/asm/book3s/tlbflush.h | 11 ++++ arch/powerpc/include/asm/nohash/tlbflush.h | 53 +++++++++++++++++ arch/powerpc/include/asm/tlbflush.h | 86 ++------------------------- 4 files changed, 94 insertions(+), 81 deletions(-) create mode 100644 arch/powerpc/include/asm/book3s/32/tlbflush.h create mode 100644 arch/powerpc/include/asm/book3s/tlbflush.h create mode 100644 arch/powerpc/include/asm/nohash/tlbflush.h diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h new file mode 100644 index 000000000000..068085b709fb --- /dev/null +++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H +#define _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H + +#define MMU_NO_CONTEXT (0) +/* + * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx + */ +extern void flush_tlb_mm(struct mm_struct *mm); +extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +extern void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr); +extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); +extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); +static inline void local_flush_tlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ + flush_tlb_page(vma, vmaddr); +} +static inline void local_flush_tlb_mm(struct mm_struct *mm) +{ + flush_tlb_mm(mm); +} + +#endif /* _ASM_POWERPC_TLBFLUSH_H */ diff --git a/arch/powerpc/include/asm/book3s/tlbflush.h b/arch/powerpc/include/asm/book3s/tlbflush.h new file mode 100644 index 000000000000..dec11de41055 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/tlbflush.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BOOK3S_TLBFLUSH_H +#define _ASM_POWERPC_BOOK3S_TLBFLUSH_H + +#ifdef CONFIG_PPC64 +#include +#else +#include +#endif + +#endif /* _ASM_POWERPC_BOOK3S_TLBFLUSH_H */ diff --git a/arch/powerpc/include/asm/nohash/tlbflush.h b/arch/powerpc/include/asm/nohash/tlbflush.h new file mode 100644 index 000000000000..b1d8fec29169 --- /dev/null +++ b/arch/powerpc/include/asm/nohash/tlbflush.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_NOHASH_TLBFLUSH_H +#define _ASM_POWERPC_NOHASH_TLBFLUSH_H + +/* + * TLB flushing: + * + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - local_flush_tlb_mm(mm, full) flushes the specified mm context on + * the local processor + * - local_flush_tlb_page(vma, vmaddr) flushes one page on the local processor + * - flush_tlb_page_nohash(vma, vmaddr) flushes one page if SW loaded TLB + * - flush_tlb_range(vma, start, end) flushes a range of pages + * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages + * + */ + +/* + * TLB flushing for software loaded TLB chips + * + * TODO: (CONFIG_FSL_BOOKE) determine if flush_tlb_range & + * flush_tlb_kernel_range are best implemented as tlbia vs + * specific tlbie's + */ + +struct vm_area_struct; +struct mm_struct; + +#define MMU_NO_CONTEXT ((unsigned int)-1) + +extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); +extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); + +extern void local_flush_tlb_mm(struct mm_struct *mm); +extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); + +extern void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, + int tsize, int ind); + +#ifdef CONFIG_SMP +extern void flush_tlb_mm(struct mm_struct *mm); +extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +extern void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, + int tsize, int ind); +#else +#define flush_tlb_mm(mm) local_flush_tlb_mm(mm) +#define flush_tlb_page(vma,addr) local_flush_tlb_page(vma,addr) +#define __flush_tlb_page(mm,addr,p,i) __local_flush_tlb_page(mm,addr,p,i) +#endif + +#endif /* _ASM_POWERPC_NOHASH_TLBFLUSH_H */ diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 7d5a157c7832..61fba43bf8b2 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -1,87 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_POWERPC_TLBFLUSH_H #define _ASM_POWERPC_TLBFLUSH_H -/* - * TLB flushing: - * - * - flush_tlb_mm(mm) flushes the specified mm context TLB's - * - flush_tlb_page(vma, vmaddr) flushes one page - * - local_flush_tlb_mm(mm, full) flushes the specified mm context on - * the local processor - * - local_flush_tlb_page(vma, vmaddr) flushes one page on the local processor - * - flush_tlb_page_nohash(vma, vmaddr) flushes one page if SW loaded TLB - * - flush_tlb_range(vma, start, end) flushes a range of pages - * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#ifdef __KERNEL__ - -#ifdef CONFIG_PPC_MMU_NOHASH -/* - * TLB flushing for software loaded TLB chips - * - * TODO: (CONFIG_FSL_BOOKE) determine if flush_tlb_range & - * flush_tlb_kernel_range are best implemented as tlbia vs - * specific tlbie's - */ - -struct vm_area_struct; -struct mm_struct; - -#define MMU_NO_CONTEXT ((unsigned int)-1) - -extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end); -extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); - -extern void local_flush_tlb_mm(struct mm_struct *mm); -extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); - -extern void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, - int tsize, int ind); - -#ifdef CONFIG_SMP -extern void flush_tlb_mm(struct mm_struct *mm); -extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -extern void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, - int tsize, int ind); -#else -#define flush_tlb_mm(mm) local_flush_tlb_mm(mm) -#define flush_tlb_page(vma,addr) local_flush_tlb_page(vma,addr) -#define __flush_tlb_page(mm,addr,p,i) __local_flush_tlb_page(mm,addr,p,i) -#endif - -#elif defined(CONFIG_PPC_STD_MMU_32) - -#define MMU_NO_CONTEXT (0) -/* - * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx - */ -extern void flush_tlb_mm(struct mm_struct *mm); -extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -extern void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr); -extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end); -extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); -static inline void local_flush_tlb_page(struct vm_area_struct *vma, - unsigned long vmaddr) -{ - flush_tlb_page(vma, vmaddr); -} -static inline void local_flush_tlb_mm(struct mm_struct *mm) -{ - flush_tlb_mm(mm); -} - -#elif defined(CONFIG_PPC_BOOK3S_64) -#include +#ifdef CONFIG_PPC_BOOK3S +#include #else -#error Unsupported MMU type -#endif +#include +#endif /* !CONFIG_PPC_BOOK3S */ -#endif /*__KERNEL__ */ #endif /* _ASM_POWERPC_TLBFLUSH_H */ -- cgit From a984506c542e26b31cbb446438f8439fa2253b2e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 20 Jul 2018 00:33:16 +1000 Subject: powerpc/mm: Don't report PUDs as memory leaks when using kmemleak Paul Menzel reported that kmemleak was producing reports such as: unreferenced object 0xc0000000f8b80000 (size 16384): comm "init", pid 1, jiffies 4294937416 (age 312.240s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000d997deb7>] __pud_alloc+0x80/0x190 [<0000000087f2e8a3>] move_page_tables+0xbac/0xdc0 [<00000000091e51c2>] shift_arg_pages+0xc0/0x210 [<00000000ab88670c>] setup_arg_pages+0x22c/0x2a0 [<0000000060871529>] load_elf_binary+0x41c/0x1648 [<00000000ecd9d2d4>] search_binary_handler.part.11+0xbc/0x280 [<0000000034e0cdd7>] __do_execve_file.isra.13+0x73c/0x940 [<000000005f953a6e>] sys_execve+0x58/0x70 [<000000009700a858>] system_call+0x5c/0x70 Indicating that a PUD was being leaked. However what's really happening is that kmemleak is not able to recognise the references from the PGD to the PUD, because they are not fully qualified pointers. We can confirm that in xmon, eg: Find the task struct for pid 1 "init": 0:mon> P task_struct ->thread.ksp PID PPID S P CMD c0000001fe7c0000 c0000001fe803960 1 0 S 13 systemd Dump virtual address 0 to find the PGD: 0:mon> dv 0 c0000001fe7c0000 pgd @ 0xc0000000f8b01000 Dump the memory of the PGD: 0:mon> d c0000000f8b01000 c0000000f8b01000 00000000f8b90000 0000000000000000 |................| c0000000f8b01010 0000000000000000 0000000000000000 |................| c0000000f8b01020 0000000000000000 0000000000000000 |................| c0000000f8b01030 0000000000000000 00000000f8b80000 |................| ^^^^^^^^^^^^^^^^ There we can see the reference to our supposedly leaked PUD. But because it's missing the leading 0xc, kmemleak won't recognise it. We can confirm it's still in use by translating an address that is mapped via it: 0:mon> dv 7fff94000000 c0000001fe7c0000 pgd @ 0xc0000000f8b01000 pgdp @ 0xc0000000f8b01038 = 0x00000000f8b80000 <-- pudp @ 0xc0000000f8b81ff8 = 0x00000000037c4000 pmdp @ 0xc0000000037c5ca0 = 0x00000000fbd89000 ptep @ 0xc0000000fbd89000 = 0xc0800001d5ce0386 Maps physical address = 0x00000001d5ce0000 Flags = Accessed Dirty Read Write The fix is fairly simple. We need to tell kmemleak to ignore PUD allocations and never report them as leaks. We can also tell it not to scan the PGD, because it will never find pointers in there. However it will still notice if we allocate a PGD and then leak it. Reported-by: Paul Menzel Signed-off-by: Michael Ellerman Tested-by: Paul Menzel Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgalloc.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 01ee40f11f3a..76234a14b97d 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -9,6 +9,7 @@ #include #include +#include #include struct vmemmap_backing { @@ -82,6 +83,13 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), pgtable_gfp_flags(mm, GFP_KERNEL)); + /* + * Don't scan the PGD for pointers, it contains references to PUDs but + * those references are not full pointers and so can't be recognised by + * kmemleak. + */ + kmemleak_no_scan(pgd); + /* * With hugetlb, we don't clear the second half of the page table. * If we share the same slab cache with the pmd or pud level table, @@ -110,8 +118,19 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX), - pgtable_gfp_flags(mm, GFP_KERNEL)); + pud_t *pud; + + pud = kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX), + pgtable_gfp_flags(mm, GFP_KERNEL)); + /* + * Tell kmemleak to ignore the PUD, that means don't scan it for + * pointers and don't consider it a leak. PUDs are typically only + * referred to by their PGD, but kmemleak is not able to recognise those + * as pointers, leading to false leak reports. + */ + kmemleak_ignore(pud); + + return pud; } static inline void pud_free(struct mm_struct *mm, pud_t *pud) -- cgit From f7e2a152231f4a0308cc8f9c2296ba4e419ae945 Mon Sep 17 00:00:00 2001 From: Alexey Spirkov Date: Thu, 26 Jul 2018 12:52:50 +0000 Subject: powerpc/44x: Mark mmu_init_secondary() as __init mmu_init_secondary() calls ppc44x_pin_tlb() which is marked __init, leading to a warning: The function mmu_init_secondary() references the function __init ppc44x_pin_tlb(). There's no CPU hotplug support on 44x so mmu_init_secondary() will only be called at boot. Therefore we should mark it as __init. Signed-off-by: Alexey Spirkov [mpe: Flesh out change log details] Signed-off-by: Michael Ellerman --- arch/powerpc/mm/44x_mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index 82b1ff759e26..12d92518e898 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c @@ -229,7 +229,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, } #ifdef CONFIG_SMP -void mmu_init_secondary(int cpu) +void __init mmu_init_secondary(int cpu) { unsigned long addr; unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1); -- cgit From 73f4447d43484224d7abfba0d9468de4982ef889 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Mon, 2 Jul 2018 04:21:18 -0400 Subject: macintosh/via-pmu: Fix section mismatch warning The pmu_init() function has the __init qualifier, but the ops struct that holds a pointer to it does not. This causes a build warning. The driver works fine because the pointer is only dereferenced early. The function is so small that there's negligible benefit from using the __init qualifier. Remove it to fix the warning, consistent with the other ADB drivers. Tested-by: Stan Johnson Signed-off-by: Finn Thain Reviewed-by: Geert Uytterhoeven Signed-off-by: Michael Ellerman --- drivers/macintosh/via-pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 25c1ce811053..f8a2c917201f 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -378,7 +378,7 @@ static int pmu_probe(void) return vias == NULL? -ENODEV: 0; } -static int __init pmu_init(void) +static int pmu_init(void) { if (vias == NULL) return -ENODEV; -- cgit From 576d5290d678a651b9f36050fc1717e0573aca13 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Mon, 2 Jul 2018 04:21:18 -0400 Subject: macintosh/via-pmu: Add missing mmio accessors Add missing in_8() accessors to init_pmu() and pmu_sr_intr(). This fixes several sparse warnings: drivers/macintosh/via-pmu.c:536:29: warning: dereference of noderef expression drivers/macintosh/via-pmu.c:537:33: warning: dereference of noderef expression drivers/macintosh/via-pmu.c:1455:17: warning: dereference of noderef expression drivers/macintosh/via-pmu.c:1456:69: warning: dereference of noderef expression Tested-by: Stan Johnson Signed-off-by: Finn Thain Reviewed-by: Geert Uytterhoeven Signed-off-by: Michael Ellerman --- drivers/macintosh/via-pmu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index f8a2c917201f..ba41220f618e 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -534,8 +534,9 @@ init_pmu(void) int timeout; struct adb_request req; - out_8(&via[B], via[B] | TREQ); /* negate TREQ */ - out_8(&via[DIRB], (via[DIRB] | TREQ) & ~TACK); /* TACK in, TREQ out */ + /* Negate TREQ. Set TACK to input and TREQ to output. */ + out_8(&via[B], in_8(&via[B]) | TREQ); + out_8(&via[DIRB], (in_8(&via[DIRB]) | TREQ) & ~TACK); pmu_request(&req, NULL, 2, PMU_SET_INTR_MASK, pmu_intr_mask); timeout = 100000; @@ -1418,8 +1419,8 @@ pmu_sr_intr(void) struct adb_request *req; int bite = 0; - if (via[B] & TREQ) { - printk(KERN_ERR "PMU: spurious SR intr (%x)\n", via[B]); + if (in_8(&via[B]) & TREQ) { + printk(KERN_ERR "PMU: spurious SR intr (%x)\n", in_8(&via[B])); out_8(&via[IFR], SR_INT); return NULL; } -- cgit From 7ad94699a94b388780e40158e6954a22b68e9d20 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Mon, 2 Jul 2018 04:21:18 -0400 Subject: macintosh/via-pmu: Don't clear shift register interrupt flag twice The shift register interrupt flag gets cleared in via_pmu_interrupt() and once again in pmu_sr_intr(). Fix this theoretical race condition. Tested-by: Stan Johnson Signed-off-by: Finn Thain Reviewed-by: Geert Uytterhoeven Signed-off-by: Michael Ellerman --- drivers/macintosh/via-pmu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index ba41220f618e..c313ddfdb17a 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -1421,7 +1421,6 @@ pmu_sr_intr(void) if (in_8(&via[B]) & TREQ) { printk(KERN_ERR "PMU: spurious SR intr (%x)\n", in_8(&via[B])); - out_8(&via[IFR], SR_INT); return NULL; } /* The ack may not yet be low when we get the interrupt */ -- cgit From c57902d52e2d61299872ddc89645d3aa299f1b91 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Mon, 2 Jul 2018 04:21:18 -0400 Subject: macintosh/via-pmu: Enhance state machine with new 'uninitialized' state On 68k Macs, the via/vias pointer can't be used to determine whether the PMU driver has been initialized. For portability, add a new state to indicate that via_find_pmu() succeeded. After via_find_pmu() executes, testing vias == NULL is equivalent to testing via == NULL. Replace these tests with pmu_state == uninitialized which is simpler and more consistent. No functional change. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Michael Ellerman --- drivers/macintosh/via-pmu.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index c313ddfdb17a..6a6f1666712e 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -114,6 +114,7 @@ static volatile unsigned char __iomem *via; #define CB1_INT 0x10 /* transition on CB1 input */ static volatile enum pmu_state { + uninitialized = 0, idle, sending, intack, @@ -274,7 +275,7 @@ int __init find_via_pmu(void) u64 taddr; const u32 *reg; - if (via) + if (pmu_state != uninitialized) return 1; vias = of_find_node_by_name(NULL, "via-pmu"); if (vias == NULL) @@ -369,20 +370,19 @@ int __init find_via_pmu(void) fail: of_node_put(vias); vias = NULL; + pmu_state = uninitialized; return 0; } #ifdef CONFIG_ADB static int pmu_probe(void) { - return vias == NULL? -ENODEV: 0; + return pmu_state == uninitialized ? -ENODEV : 0; } static int pmu_init(void) { - if (vias == NULL) - return -ENODEV; - return 0; + return pmu_state == uninitialized ? -ENODEV : 0; } #endif /* CONFIG_ADB */ @@ -397,7 +397,7 @@ static int __init via_pmu_start(void) { unsigned int irq; - if (vias == NULL) + if (pmu_state == uninitialized) return -ENODEV; batt_req.complete = 1; @@ -463,7 +463,7 @@ arch_initcall(via_pmu_start); */ static int __init via_pmu_dev_init(void) { - if (vias == NULL) + if (pmu_state == uninitialized) return -ENODEV; #ifdef CONFIG_PMAC_BACKLIGHT @@ -929,7 +929,7 @@ static int pmu_send_request(struct adb_request *req, int sync) { int i, ret; - if ((vias == NULL) || (!pmu_fully_inited)) { + if (pmu_state == uninitialized || !pmu_fully_inited) { req->complete = 1; return -ENXIO; } @@ -1023,7 +1023,7 @@ static int __pmu_adb_autopoll(int devs) static int pmu_adb_autopoll(int devs) { - if ((vias == NULL) || (!pmu_fully_inited) || !pmu_has_adb) + if (pmu_state == uninitialized || !pmu_fully_inited || !pmu_has_adb) return -ENXIO; adb_dev_map = devs; @@ -1036,7 +1036,7 @@ static int pmu_adb_reset_bus(void) struct adb_request req; int save_autopoll = adb_dev_map; - if ((vias == NULL) || (!pmu_fully_inited) || !pmu_has_adb) + if (pmu_state == uninitialized || !pmu_fully_inited || !pmu_has_adb) return -ENXIO; /* anyone got a better idea?? */ @@ -1072,7 +1072,7 @@ pmu_request(struct adb_request *req, void (*done)(struct adb_request *), va_list list; int i; - if (vias == NULL) + if (pmu_state == uninitialized) return -ENXIO; if (nbytes < 0 || nbytes > 32) { @@ -1097,7 +1097,7 @@ pmu_queue_request(struct adb_request *req) unsigned long flags; int nsend; - if (via == NULL) { + if (pmu_state == uninitialized) { req->complete = 1; return -ENXIO; } @@ -1210,7 +1210,7 @@ pmu_start(void) void pmu_poll(void) { - if (!via) + if (pmu_state == uninitialized) return; if (disable_poll) return; @@ -1220,7 +1220,7 @@ pmu_poll(void) void pmu_poll_adb(void) { - if (!via) + if (pmu_state == uninitialized) return; if (disable_poll) return; @@ -1235,7 +1235,7 @@ pmu_poll_adb(void) void pmu_wait_complete(struct adb_request *req) { - if (!via) + if (pmu_state == uninitialized) return; while((pmu_state != idle && pmu_state != locked) || !req->complete) via_pmu_interrupt(0, NULL); @@ -1251,7 +1251,7 @@ pmu_suspend(void) { unsigned long flags; - if (!via) + if (pmu_state == uninitialized) return; spin_lock_irqsave(&pmu_lock, flags); @@ -1282,7 +1282,7 @@ pmu_resume(void) { unsigned long flags; - if (!via || (pmu_suspended < 1)) + if (pmu_state == uninitialized || pmu_suspended < 1) return; spin_lock_irqsave(&pmu_lock, flags); @@ -1644,7 +1644,7 @@ pmu_enable_irled(int on) { struct adb_request req; - if (vias == NULL) + if (pmu_state == uninitialized) return ; if (pmu_kind == PMU_KEYLARGO_BASED) return ; @@ -1659,7 +1659,7 @@ pmu_restart(void) { struct adb_request req; - if (via == NULL) + if (pmu_state == uninitialized) return; local_irq_disable(); @@ -1684,7 +1684,7 @@ pmu_shutdown(void) { struct adb_request req; - if (via == NULL) + if (pmu_state == uninitialized) return; local_irq_disable(); @@ -1712,7 +1712,7 @@ pmu_shutdown(void) int pmu_present(void) { - return via != NULL; + return pmu_state != uninitialized; } #if defined(CONFIG_SUSPEND) && defined(CONFIG_PPC32) @@ -2378,7 +2378,7 @@ static struct miscdevice pmu_device = { static int pmu_device_init(void) { - if (!via) + if (pmu_state == uninitialized) return 0; if (misc_register(&pmu_device) < 0) printk(KERN_ERR "via-pmu: cannot register misc device.\n"); -- cgit From c70c35da52c064983199b1b1cbd4daa5a07ef60c Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Mon, 2 Jul 2018 04:21:18 -0400 Subject: macintosh/via-pmu: Replace via pointer with via1 and via2 pointers On most PowerPC Macs, the PMU driver uses the shift register and IO port B from a single VIA chip. On 68k and early PowerPC PowerBooks, the driver uses the shift register from one VIA chip together with IO port B from another. Replace via with via1 and via2 to accommodate this. For the CONFIG_PPC_PMAC case, set via1 = via2 so there is no change. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Michael Ellerman --- drivers/macintosh/via-pmu.c | 142 +++++++++++++++++++++----------------------- 1 file changed, 69 insertions(+), 73 deletions(-) diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 6a6f1666712e..2557f3e49f18 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -76,7 +76,6 @@ #define BATTERY_POLLING_COUNT 2 static DEFINE_MUTEX(pmu_info_proc_mutex); -static volatile unsigned char __iomem *via; /* VIA registers - spaced 0x200 bytes apart */ #define RS 0x200 /* skip between registers */ @@ -145,6 +144,8 @@ static struct device_node *vias; static int pmu_kind = PMU_UNKNOWN; static int pmu_fully_inited; static int pmu_has_adb; +static volatile unsigned char __iomem *via1; +static volatile unsigned char __iomem *via2; static struct device_node *gpio_node; static unsigned char __iomem *gpio_reg; static int gpio_irq = 0; @@ -340,14 +341,14 @@ int __init find_via_pmu(void) } else pmu_kind = PMU_UNKNOWN; - via = ioremap(taddr, 0x2000); - if (via == NULL) { + via1 = via2 = ioremap(taddr, 0x2000); + if (via1 == NULL) { printk(KERN_ERR "via-pmu: Can't map address !\n"); goto fail_via_remap; } - out_8(&via[IER], IER_CLR | 0x7f); /* disable all intrs */ - out_8(&via[IFR], 0x7f); /* clear IFR */ + out_8(&via1[IER], IER_CLR | 0x7f); /* disable all intrs */ + out_8(&via1[IFR], 0x7f); /* clear IFR */ pmu_state = idle; @@ -362,8 +363,8 @@ int __init find_via_pmu(void) return 1; fail_init: - iounmap(via); - via = NULL; + iounmap(via1); + via1 = via2 = NULL; fail_via_remap: iounmap(gpio_reg); gpio_reg = NULL; @@ -437,7 +438,7 @@ static int __init via_pmu_start(void) } /* Enable interrupts */ - out_8(&via[IER], IER_SET | SR_INT | CB1_INT); + out_8(&via1[IER], IER_SET | SR_INT | CB1_INT); pmu_fully_inited = 1; @@ -535,8 +536,8 @@ init_pmu(void) struct adb_request req; /* Negate TREQ. Set TACK to input and TREQ to output. */ - out_8(&via[B], in_8(&via[B]) | TREQ); - out_8(&via[DIRB], (in_8(&via[DIRB]) | TREQ) & ~TACK); + out_8(&via2[B], in_8(&via2[B]) | TREQ); + out_8(&via2[DIRB], (in_8(&via2[DIRB]) | TREQ) & ~TACK); pmu_request(&req, NULL, 2, PMU_SET_INTR_MASK, pmu_intr_mask); timeout = 100000; @@ -1137,7 +1138,7 @@ wait_for_ack(void) * reported */ int timeout = 4000; - while ((in_8(&via[B]) & TACK) == 0) { + while ((in_8(&via2[B]) & TACK) == 0) { if (--timeout < 0) { printk(KERN_ERR "PMU not responding (!ack)\n"); return; @@ -1151,23 +1152,19 @@ wait_for_ack(void) static inline void send_byte(int x) { - volatile unsigned char __iomem *v = via; - - out_8(&v[ACR], in_8(&v[ACR]) | SR_OUT | SR_EXT); - out_8(&v[SR], x); - out_8(&v[B], in_8(&v[B]) & ~TREQ); /* assert TREQ */ - (void)in_8(&v[B]); + out_8(&via1[ACR], in_8(&via1[ACR]) | SR_OUT | SR_EXT); + out_8(&via1[SR], x); + out_8(&via2[B], in_8(&via2[B]) & ~TREQ); /* assert TREQ */ + (void)in_8(&via2[B]); } static inline void recv_byte(void) { - volatile unsigned char __iomem *v = via; - - out_8(&v[ACR], (in_8(&v[ACR]) & ~SR_OUT) | SR_EXT); - in_8(&v[SR]); /* resets SR */ - out_8(&v[B], in_8(&v[B]) & ~TREQ); - (void)in_8(&v[B]); + out_8(&via1[ACR], (in_8(&via1[ACR]) & ~SR_OUT) | SR_EXT); + in_8(&via1[SR]); /* resets SR */ + out_8(&via2[B], in_8(&via2[B]) & ~TREQ); + (void)in_8(&via2[B]); } static inline void @@ -1270,7 +1267,7 @@ pmu_suspend(void) if (!adb_int_pending && pmu_state == idle && !req_awaiting_reply) { if (gpio_irq >= 0) disable_irq_nosync(gpio_irq); - out_8(&via[IER], CB1_INT | IER_CLR); + out_8(&via1[IER], CB1_INT | IER_CLR); spin_unlock_irqrestore(&pmu_lock, flags); break; } @@ -1294,7 +1291,7 @@ pmu_resume(void) adb_int_pending = 1; if (gpio_irq >= 0) enable_irq(gpio_irq); - out_8(&via[IER], CB1_INT | IER_SET); + out_8(&via1[IER], CB1_INT | IER_SET); spin_unlock_irqrestore(&pmu_lock, flags); pmu_poll(); } @@ -1419,20 +1416,20 @@ pmu_sr_intr(void) struct adb_request *req; int bite = 0; - if (in_8(&via[B]) & TREQ) { - printk(KERN_ERR "PMU: spurious SR intr (%x)\n", in_8(&via[B])); + if (in_8(&via2[B]) & TREQ) { + printk(KERN_ERR "PMU: spurious SR intr (%x)\n", in_8(&via2[B])); return NULL; } /* The ack may not yet be low when we get the interrupt */ - while ((in_8(&via[B]) & TACK) != 0) + while ((in_8(&via2[B]) & TACK) != 0) ; /* if reading grab the byte, and reset the interrupt */ if (pmu_state == reading || pmu_state == reading_intr) - bite = in_8(&via[SR]); + bite = in_8(&via1[SR]); /* reset TREQ and wait for TACK to go high */ - out_8(&via[B], in_8(&via[B]) | TREQ); + out_8(&via2[B], in_8(&via2[B]) | TREQ); wait_for_ack(); switch (pmu_state) { @@ -1533,17 +1530,17 @@ via_pmu_interrupt(int irq, void *arg) ++disable_poll; for (;;) { - intr = in_8(&via[IFR]) & (SR_INT | CB1_INT); + intr = in_8(&via1[IFR]) & (SR_INT | CB1_INT); if (intr == 0) break; handled = 1; if (++nloop > 1000) { printk(KERN_DEBUG "PMU: stuck in intr loop, " "intr=%x, ier=%x pmu_state=%d\n", - intr, in_8(&via[IER]), pmu_state); + intr, in_8(&via1[IER]), pmu_state); break; } - out_8(&via[IFR], intr); + out_8(&via1[IFR], intr); if (intr & CB1_INT) { adb_int_pending = 1; pmu_irq_stats[0]++; @@ -1725,29 +1722,29 @@ static u32 save_via[8]; static void save_via_state(void) { - save_via[0] = in_8(&via[ANH]); - save_via[1] = in_8(&via[DIRA]); - save_via[2] = in_8(&via[B]); - save_via[3] = in_8(&via[DIRB]); - save_via[4] = in_8(&via[PCR]); - save_via[5] = in_8(&via[ACR]); - save_via[6] = in_8(&via[T1CL]); - save_via[7] = in_8(&via[T1CH]); + save_via[0] = in_8(&via1[ANH]); + save_via[1] = in_8(&via1[DIRA]); + save_via[2] = in_8(&via1[B]); + save_via[3] = in_8(&via1[DIRB]); + save_via[4] = in_8(&via1[PCR]); + save_via[5] = in_8(&via1[ACR]); + save_via[6] = in_8(&via1[T1CL]); + save_via[7] = in_8(&via1[T1CH]); } static void restore_via_state(void) { - out_8(&via[ANH], save_via[0]); - out_8(&via[DIRA], save_via[1]); - out_8(&via[B], save_via[2]); - out_8(&via[DIRB], save_via[3]); - out_8(&via[PCR], save_via[4]); - out_8(&via[ACR], save_via[5]); - out_8(&via[T1CL], save_via[6]); - out_8(&via[T1CH], save_via[7]); - out_8(&via[IER], IER_CLR | 0x7f); /* disable all intrs */ - out_8(&via[IFR], 0x7f); /* clear IFR */ - out_8(&via[IER], IER_SET | SR_INT | CB1_INT); + out_8(&via1[ANH], save_via[0]); + out_8(&via1[DIRA], save_via[1]); + out_8(&via1[B], save_via[2]); + out_8(&via1[DIRB], save_via[3]); + out_8(&via1[PCR], save_via[4]); + out_8(&via1[ACR], save_via[5]); + out_8(&via1[T1CL], save_via[6]); + out_8(&via1[T1CH], save_via[7]); + out_8(&via1[IER], IER_CLR | 0x7f); /* disable all intrs */ + out_8(&via1[IFR], 0x7f); /* clear IFR */ + out_8(&via1[IER], IER_SET | SR_INT | CB1_INT); } #define GRACKLE_PM (1<<7) @@ -2389,33 +2386,33 @@ device_initcall(pmu_device_init); #ifdef DEBUG_SLEEP static inline void -polled_handshake(volatile unsigned char __iomem *via) +polled_handshake(void) { - via[B] &= ~TREQ; eieio(); - while ((via[B] & TACK) != 0) + via2[B] &= ~TREQ; eieio(); + while ((via2[B] & TACK) != 0) ; - via[B] |= TREQ; eieio(); - while ((via[B] & TACK) == 0) + via2[B] |= TREQ; eieio(); + while ((via2[B] & TACK) == 0) ; } static inline void -polled_send_byte(volatile unsigned char __iomem *via, int x) +polled_send_byte(int x) { - via[ACR] |= SR_OUT | SR_EXT; eieio(); - via[SR] = x; eieio(); - polled_handshake(via); + via1[ACR] |= SR_OUT | SR_EXT; eieio(); + via1[SR] = x; eieio(); + polled_handshake(); } static inline int -polled_recv_byte(volatile unsigned char __iomem *via) +polled_recv_byte(void) { int x; - via[ACR] = (via[ACR] & ~SR_OUT) | SR_EXT; eieio(); - x = via[SR]; eieio(); - polled_handshake(via); - x = via[SR]; eieio(); + via1[ACR] = (via1[ACR] & ~SR_OUT) | SR_EXT; eieio(); + x = via1[SR]; eieio(); + polled_handshake(); + x = via1[SR]; eieio(); return x; } @@ -2424,7 +2421,6 @@ pmu_polled_request(struct adb_request *req) { unsigned long flags; int i, l, c; - volatile unsigned char __iomem *v = via; req->complete = 1; c = req->data[0]; @@ -2436,21 +2432,21 @@ pmu_polled_request(struct adb_request *req) while (pmu_state != idle) pmu_poll(); - while ((via[B] & TACK) == 0) + while ((via2[B] & TACK) == 0) ; - polled_send_byte(v, c); + polled_send_byte(c); if (l < 0) { l = req->nbytes - 1; - polled_send_byte(v, l); + polled_send_byte(l); } for (i = 1; i <= l; ++i) - polled_send_byte(v, req->data[i]); + polled_send_byte(req->data[i]); l = pmu_data_len[c][1]; if (l < 0) - l = polled_recv_byte(v); + l = polled_recv_byte(); for (i = 0; i < l; ++i) - req->reply[i + req->reply_len] = polled_recv_byte(v); + req->reply[i + req->reply_len] = polled_recv_byte(); if (req->done) (*req->done)(req); -- cgit From c16a85a5aad47d712860b42f0ca989b0cb62257a Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Mon, 2 Jul 2018 04:21:19 -0400 Subject: macintosh/via-pmu: Add support for m68k PowerBooks Put #ifdefs around the Open Firmware, xmon, interrupt dispatch, battery and suspend code. Add the necessary interrupt handling to support m68k PowerBooks. The pmu_kind value is available to userspace using the PMU_IOC_GET_MODEL ioctl. It is not clear yet what hardware classes are be needed to describe m68k PowerBook models, so pmu_kind is given the provisional value PMU_UNKNOWN. To find out about the hardware, user programs can use /proc/bootinfo or /proc/hardware, or send the PMU_GET_VERSION command using /dev/adb. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Michael Ellerman --- drivers/macintosh/Kconfig | 2 +- drivers/macintosh/via-pmu.c | 101 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 91 insertions(+), 12 deletions(-) diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig index 97a420c11eed..9c6452b38c36 100644 --- a/drivers/macintosh/Kconfig +++ b/drivers/macintosh/Kconfig @@ -65,7 +65,7 @@ config ADB_CUDA If unsure say Y. config ADB_PMU - bool "Support for PMU based PowerMacs" + bool "Support for PMU based PowerMacs and PowerBooks" depends on PPC_PMAC help On PowerBooks, iBooks, and recent iMacs and Power Macintoshes, the diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 2557f3e49f18..a68e7a6f00cc 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Device driver for the via-pmu on Apple Powermacs. + * Device driver for the PMU in Apple PowerBooks and PowerMacs. * * The VIA (versatile interface adapter) interfaces to the PMU, * a 6805 microprocessor core whose primary function is to control @@ -49,20 +49,26 @@ #include #include #include -#include +#include #include #include #include #include #include +#ifdef CONFIG_PPC_PMAC #include #include #include -#include +#include #include #include #include #include +#else +#include +#include +#include +#endif #include "via-pmu-event.h" @@ -97,8 +103,13 @@ static DEFINE_MUTEX(pmu_info_proc_mutex); #define ANH (15*RS) /* A-side data, no handshake */ /* Bits in B data register: both active low */ +#ifdef CONFIG_PPC_PMAC #define TACK 0x08 /* Transfer acknowledge (input) */ #define TREQ 0x10 /* Transfer request (output) */ +#else +#define TACK 0x02 +#define TREQ 0x04 +#endif /* Bits in ACR */ #define SR_CTRL 0x1c /* Shift register control bits */ @@ -140,13 +151,15 @@ static int data_index; static int data_len; static volatile int adb_int_pending; static volatile int disable_poll; -static struct device_node *vias; static int pmu_kind = PMU_UNKNOWN; static int pmu_fully_inited; static int pmu_has_adb; +#ifdef CONFIG_PPC_PMAC static volatile unsigned char __iomem *via1; static volatile unsigned char __iomem *via2; +static struct device_node *vias; static struct device_node *gpio_node; +#endif static unsigned char __iomem *gpio_reg; static int gpio_irq = 0; static int gpio_irq_enabled = -1; @@ -273,6 +286,7 @@ static char *pbook_type[] = { int __init find_via_pmu(void) { +#ifdef CONFIG_PPC_PMAC u64 taddr; const u32 *reg; @@ -355,9 +369,6 @@ int __init find_via_pmu(void) if (!init_pmu()) goto fail_init; - printk(KERN_INFO "PMU driver v%d initialized for %s, firmware: %02x\n", - PMU_DRIVER_VERSION, pbook_type[pmu_kind], pmu_version); - sys_ctrler = SYS_CTRLER_PMU; return 1; @@ -373,6 +384,30 @@ int __init find_via_pmu(void) vias = NULL; pmu_state = uninitialized; return 0; +#else + if (macintosh_config->adb_type != MAC_ADB_PB2) + return 0; + + pmu_kind = PMU_UNKNOWN; + + spin_lock_init(&pmu_lock); + + pmu_has_adb = 1; + + pmu_intr_mask = PMU_INT_PCEJECT | + PMU_INT_SNDBRT | + PMU_INT_ADB | + PMU_INT_TICK; + + pmu_state = idle; + + if (!init_pmu()) { + pmu_state = uninitialized; + return 0; + } + + return 1; +#endif /* !CONFIG_PPC_PMAC */ } #ifdef CONFIG_ADB @@ -396,13 +431,14 @@ static int pmu_init(void) */ static int __init via_pmu_start(void) { - unsigned int irq; + unsigned int __maybe_unused irq; if (pmu_state == uninitialized) return -ENODEV; batt_req.complete = 1; +#ifdef CONFIG_PPC_PMAC irq = irq_of_parse_and_map(vias, 0); if (!irq) { printk(KERN_ERR "via-pmu: can't map interrupt\n"); @@ -439,6 +475,19 @@ static int __init via_pmu_start(void) /* Enable interrupts */ out_8(&via1[IER], IER_SET | SR_INT | CB1_INT); +#else + if (request_irq(IRQ_MAC_ADB_SR, via_pmu_interrupt, IRQF_NO_SUSPEND, + "VIA-PMU-SR", NULL)) { + pr_err("%s: couldn't get SR irq\n", __func__); + return -ENODEV; + } + if (request_irq(IRQ_MAC_ADB_CL, via_pmu_interrupt, IRQF_NO_SUSPEND, + "VIA-PMU-CL", NULL)) { + pr_err("%s: couldn't get CL irq\n", __func__); + free_irq(IRQ_MAC_ADB_SR, NULL); + return -ENODEV; + } +#endif /* !CONFIG_PPC_PMAC */ pmu_fully_inited = 1; @@ -589,6 +638,10 @@ init_pmu(void) option_server_mode ? "enabled" : "disabled"); } } + + printk(KERN_INFO "PMU driver v%d initialized for %s, firmware: %02x\n", + PMU_DRIVER_VERSION, pbook_type[pmu_kind], pmu_version); + return 1; } @@ -627,6 +680,7 @@ static void pmu_set_server_mode(int server_mode) static void done_battery_state_ohare(struct adb_request* req) { +#ifdef CONFIG_PPC_PMAC /* format: * [0] : flags * 0x01 : AC indicator @@ -708,6 +762,7 @@ done_battery_state_ohare(struct adb_request* req) pmu_batteries[pmu_cur_battery].amperage = amperage; pmu_batteries[pmu_cur_battery].voltage = voltage; pmu_batteries[pmu_cur_battery].time_remaining = time; +#endif /* CONFIG_PPC_PMAC */ clear_bit(0, &async_req_locks); } @@ -1356,6 +1411,7 @@ next: } pmu_done(req); } else { +#ifdef CONFIG_XMON if (len == 4 && data[1] == 0x2c) { extern int xmon_wants_key, xmon_adb_keycode; if (xmon_wants_key) { @@ -1363,6 +1419,7 @@ next: return; } } +#endif /* CONFIG_XMON */ #ifdef CONFIG_ADB /* * XXX On the [23]400 the PMU gives us an up @@ -1530,7 +1587,25 @@ via_pmu_interrupt(int irq, void *arg) ++disable_poll; for (;;) { - intr = in_8(&via1[IFR]) & (SR_INT | CB1_INT); + /* On 68k Macs, VIA interrupts are dispatched individually. + * Unless we are polling, the relevant IRQ flag has already + * been cleared. + */ + intr = 0; + if (IS_ENABLED(CONFIG_PPC_PMAC) || !irq) { + intr = in_8(&via1[IFR]) & (SR_INT | CB1_INT); + out_8(&via1[IFR], intr); + } +#ifndef CONFIG_PPC_PMAC + switch (irq) { + case IRQ_MAC_ADB_CL: + intr = CB1_INT; + break; + case IRQ_MAC_ADB_SR: + intr = SR_INT; + break; + } +#endif if (intr == 0) break; handled = 1; @@ -1540,7 +1615,6 @@ via_pmu_interrupt(int irq, void *arg) intr, in_8(&via1[IER]), pmu_state); break; } - out_8(&via1[IFR], intr); if (intr & CB1_INT) { adb_int_pending = 1; pmu_irq_stats[0]++; @@ -1550,6 +1624,9 @@ via_pmu_interrupt(int irq, void *arg) if (req) break; } +#ifndef CONFIG_PPC_PMAC + break; +#endif } recheck: @@ -1616,7 +1693,7 @@ pmu_unlock(void) } -static irqreturn_t +static __maybe_unused irqreturn_t gpio1_interrupt(int irq, void *arg) { unsigned long flags; @@ -2250,6 +2327,7 @@ static int pmu_ioctl(struct file *filp, int error = -EINVAL; switch (cmd) { +#ifdef CONFIG_PPC_PMAC case PMU_IOC_SLEEP: if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -2259,6 +2337,7 @@ static int pmu_ioctl(struct file *filp, return put_user(0, argp); else return put_user(1, argp); +#endif #ifdef CONFIG_PMAC_BACKLIGHT_LEGACY /* Compatibility ioctl's for backlight */ -- cgit From c2f028b6a0b241d1b18d8d0ebcb13d21659dcfe7 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Mon, 2 Jul 2018 04:21:19 -0400 Subject: macintosh/via-pmu: Explicitly specify CONFIG_PPC_PMAC dependencies At present, CONFIG_ADB_PMU depends on CONFIG_PPC_PMAC. When this gets relaxed to CONFIG_PPC_PMAC || CONFIG_MAC, those Kconfig symbols with implicit deps on PPC_PMAC will need explicit deps. Add them now. No functional change. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Michael Ellerman --- drivers/macintosh/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig index 9c6452b38c36..26abae4c899d 100644 --- a/drivers/macintosh/Kconfig +++ b/drivers/macintosh/Kconfig @@ -79,7 +79,7 @@ config ADB_PMU config ADB_PMU_LED bool "Support for the Power/iBook front LED" - depends on ADB_PMU + depends on PPC_PMAC && ADB_PMU select NEW_LEDS select LEDS_CLASS help @@ -122,7 +122,7 @@ config PMAC_MEDIABAY config PMAC_BACKLIGHT bool "Backlight control for LCD screens" - depends on ADB_PMU && FB = y && (BROKEN || !PPC64) + depends on PPC_PMAC && ADB_PMU && FB = y && (BROKEN || !PPC64) select FB_BACKLIGHT help Say Y here to enable Macintosh specific extensions of the generic -- cgit From 54c990775f78113a708f24e15877f6b7bd9a1277 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Mon, 2 Jul 2018 04:21:19 -0400 Subject: macintosh/via-pmu68k: Don't load driver on unsupported hardware Don't load the via-pmu68k driver on early PowerBooks. The M50753 PMU device found in those models was never supported by this driver. Attempting to load the driver usually causes a boot hang. Signed-off-by: Finn Thain Reviewed-by: Michael Schmitz Acked-by: Geert Uytterhoeven Signed-off-by: Michael Ellerman --- arch/m68k/mac/misc.c | 6 ++---- drivers/macintosh/via-pmu68k.c | 4 ---- include/uapi/linux/pmu.h | 2 +- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c index c68054361615..7ccb799eeb57 100644 --- a/arch/m68k/mac/misc.c +++ b/arch/m68k/mac/misc.c @@ -478,8 +478,7 @@ void mac_poweroff(void) cuda_shutdown(); #endif #ifdef CONFIG_ADB_PMU68K - } else if (macintosh_config->adb_type == MAC_ADB_PB1 - || macintosh_config->adb_type == MAC_ADB_PB2) { + } else if (macintosh_config->adb_type == MAC_ADB_PB2) { pmu_shutdown(); #endif } @@ -520,8 +519,7 @@ void mac_reset(void) cuda_restart(); #endif #ifdef CONFIG_ADB_PMU68K - } else if (macintosh_config->adb_type == MAC_ADB_PB1 - || macintosh_config->adb_type == MAC_ADB_PB2) { + } else if (macintosh_config->adb_type == MAC_ADB_PB2) { pmu_restart(); #endif } else if (CPU_IS_030) { diff --git a/drivers/macintosh/via-pmu68k.c b/drivers/macintosh/via-pmu68k.c index d545ed45e482..bec8e1837d7d 100644 --- a/drivers/macintosh/via-pmu68k.c +++ b/drivers/macintosh/via-pmu68k.c @@ -175,9 +175,6 @@ static s8 pmu_data_len[256][2] = { int __init find_via_pmu(void) { switch (macintosh_config->adb_type) { - case MAC_ADB_PB1: - pmu_kind = PMU_68K_V1; - break; case MAC_ADB_PB2: pmu_kind = PMU_68K_V2; break; @@ -785,7 +782,6 @@ pmu_enable_backlight(int on) /* first call: get current backlight value */ if (backlight_level < 0) { switch(pmu_kind) { - case PMU_68K_V1: case PMU_68K_V2: pmu_request(&req, NULL, 3, PMU_READ_NVRAM, 0x14, 0xe); while (!req.complete) diff --git a/include/uapi/linux/pmu.h b/include/uapi/linux/pmu.h index 89cb1acea93a..e128f609281a 100644 --- a/include/uapi/linux/pmu.h +++ b/include/uapi/linux/pmu.h @@ -93,7 +93,7 @@ enum { PMU_HEATHROW_BASED, /* PowerBook G3 series */ PMU_PADDINGTON_BASED, /* 1999 PowerBook G3 */ PMU_KEYLARGO_BASED, /* Core99 motherboard (PMU99) */ - PMU_68K_V1, /* 68K PMU, version 1 */ + PMU_68K_V1, /* Unused/deprecated */ PMU_68K_V2, /* 68K PMU, version 2 */ }; -- cgit From ebd722275f9cfc6752e29d2412fa3816ca05764b Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Mon, 2 Jul 2018 04:21:19 -0400 Subject: macintosh/via-pmu: Replace via-pmu68k driver with via-pmu driver Now that the PowerMac via-pmu driver supports m68k PowerBooks, switch over to that driver and remove the via-pmu68k driver. Tested-by: Stan Johnson Signed-off-by: Finn Thain Reviewed-by: Geert Uytterhoeven Acked-by: Geert Uytterhoeven Signed-off-by: Michael Ellerman --- arch/m68k/configs/mac_defconfig | 2 +- arch/m68k/configs/multi_defconfig | 2 +- arch/m68k/mac/config.c | 2 +- arch/m68k/mac/misc.c | 48 +-- drivers/macintosh/Kconfig | 13 +- drivers/macintosh/Makefile | 1 - drivers/macintosh/adb.c | 2 +- drivers/macintosh/via-pmu68k.c | 846 -------------------------------------- include/uapi/linux/pmu.h | 2 +- 9 files changed, 14 insertions(+), 904 deletions(-) delete mode 100644 drivers/macintosh/via-pmu68k.c diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index b52e597899eb..087ca15e32f1 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -369,7 +369,7 @@ CONFIG_TCM_PSCSI=m CONFIG_ADB=y CONFIG_ADB_MACII=y CONFIG_ADB_IOP=y -CONFIG_ADB_PMU68K=y +CONFIG_ADB_PMU=y CONFIG_ADB_CUDA=y CONFIG_INPUT_ADBHID=y CONFIG_MAC_EMUMOUSEBTN=y diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 2a84eeec5b02..3f9334084d55 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -402,7 +402,7 @@ CONFIG_TCM_PSCSI=m CONFIG_ADB=y CONFIG_ADB_MACII=y CONFIG_ADB_IOP=y -CONFIG_ADB_PMU68K=y +CONFIG_ADB_PMU=y CONFIG_ADB_CUDA=y CONFIG_INPUT_ADBHID=y CONFIG_MAC_EMUMOUSEBTN=y diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index e522307db47c..92e80cf0d8aa 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -891,7 +891,7 @@ static void __init mac_identify(void) #ifdef CONFIG_ADB_CUDA find_via_cuda(); #endif -#ifdef CONFIG_ADB_PMU68K +#ifdef CONFIG_ADB_PMU find_via_pmu(); #endif } diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c index 7ccb799eeb57..28090a44fa09 100644 --- a/arch/m68k/mac/misc.c +++ b/arch/m68k/mac/misc.c @@ -85,7 +85,7 @@ static void cuda_write_pram(int offset, __u8 data) } #endif /* CONFIG_ADB_CUDA */ -#ifdef CONFIG_ADB_PMU68K +#ifdef CONFIG_ADB_PMU static long pmu_read_time(void) { struct adb_request req; @@ -136,7 +136,7 @@ static void pmu_write_pram(int offset, __u8 data) while (!req.complete) pmu_poll(); } -#endif /* CONFIG_ADB_PMU68K */ +#endif /* CONFIG_ADB_PMU */ /* * VIA PRAM/RTC access routines @@ -367,38 +367,6 @@ static void cuda_shutdown(void) } #endif /* CONFIG_ADB_CUDA */ -#ifdef CONFIG_ADB_PMU68K - -void pmu_restart(void) -{ - struct adb_request req; - if (pmu_request(&req, NULL, - 2, PMU_SET_INTR_MASK, PMU_INT_ADB|PMU_INT_TICK) < 0) - return; - while (!req.complete) - pmu_poll(); - if (pmu_request(&req, NULL, 1, PMU_RESET) < 0) - return; - while (!req.complete) - pmu_poll(); -} - -void pmu_shutdown(void) -{ - struct adb_request req; - if (pmu_request(&req, NULL, - 2, PMU_SET_INTR_MASK, PMU_INT_ADB|PMU_INT_TICK) < 0) - return; - while (!req.complete) - pmu_poll(); - if (pmu_request(&req, NULL, 5, PMU_SHUTDOWN, 'M', 'A', 'T', 'T') < 0) - return; - while (!req.complete) - pmu_poll(); -} - -#endif - /* *------------------------------------------------------------------- * Below this point are the generic routines; they'll dispatch to the @@ -423,7 +391,7 @@ void mac_pram_read(int offset, __u8 *buffer, int len) func = cuda_read_pram; break; #endif -#ifdef CONFIG_ADB_PMU68K +#ifdef CONFIG_ADB_PMU case MAC_ADB_PB2: func = pmu_read_pram; break; @@ -453,7 +421,7 @@ void mac_pram_write(int offset, __u8 *buffer, int len) func = cuda_write_pram; break; #endif -#ifdef CONFIG_ADB_PMU68K +#ifdef CONFIG_ADB_PMU case MAC_ADB_PB2: func = pmu_write_pram; break; @@ -477,7 +445,7 @@ void mac_poweroff(void) macintosh_config->adb_type == MAC_ADB_CUDA) { cuda_shutdown(); #endif -#ifdef CONFIG_ADB_PMU68K +#ifdef CONFIG_ADB_PMU } else if (macintosh_config->adb_type == MAC_ADB_PB2) { pmu_shutdown(); #endif @@ -518,7 +486,7 @@ void mac_reset(void) macintosh_config->adb_type == MAC_ADB_CUDA) { cuda_restart(); #endif -#ifdef CONFIG_ADB_PMU68K +#ifdef CONFIG_ADB_PMU } else if (macintosh_config->adb_type == MAC_ADB_PB2) { pmu_restart(); #endif @@ -670,7 +638,7 @@ int mac_hwclk(int op, struct rtc_time *t) now = cuda_read_time(); break; #endif -#ifdef CONFIG_ADB_PMU68K +#ifdef CONFIG_ADB_PMU case MAC_ADB_PB2: now = pmu_read_time(); break; @@ -706,7 +674,7 @@ int mac_hwclk(int op, struct rtc_time *t) cuda_write_time(now); break; #endif -#ifdef CONFIG_ADB_PMU68K +#ifdef CONFIG_ADB_PMU case MAC_ADB_PB2: pmu_write_time(now); break; diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig index 26abae4c899d..47c350cdfb12 100644 --- a/drivers/macintosh/Kconfig +++ b/drivers/macintosh/Kconfig @@ -39,17 +39,6 @@ config ADB_IOP to enable direct support for it, say 'Y' here. -config ADB_PMU68K - bool "Include PMU (Powerbook) ADB driver" - depends on ADB && MAC - help - Say Y here if want your kernel to support the m68k based Powerbooks. - This includes the PowerBook 140, PowerBook 145, PowerBook 150, - PowerBook 160, PowerBook 165, PowerBook 165c, PowerBook 170, - PowerBook 180, PowerBook, 180c, PowerBook 190cs, PowerBook 520, - PowerBook Duo 210, PowerBook Duo 230, PowerBook Duo 250, - PowerBook Duo 270c, PowerBook Duo 280 and PowerBook Duo 280c. - # we want to change this to something like CONFIG_SYSCTRL_CUDA/PMU config ADB_CUDA bool "Support for Cuda/Egret based Macs and PowerMacs" @@ -66,7 +55,7 @@ config ADB_CUDA config ADB_PMU bool "Support for PMU based PowerMacs and PowerBooks" - depends on PPC_PMAC + depends on PPC_PMAC || MAC help On PowerBooks, iBooks, and recent iMacs and Power Macintoshes, the PMU is an embedded microprocessor whose primary function is to diff --git a/drivers/macintosh/Makefile b/drivers/macintosh/Makefile index ee803638e595..49819b1b6f20 100644 --- a/drivers/macintosh/Makefile +++ b/drivers/macintosh/Makefile @@ -22,7 +22,6 @@ obj-$(CONFIG_PMAC_SMU) += smu.o obj-$(CONFIG_ADB) += adb.o obj-$(CONFIG_ADB_MACII) += via-macii.o obj-$(CONFIG_ADB_IOP) += adb-iop.o -obj-$(CONFIG_ADB_PMU68K) += via-pmu68k.o obj-$(CONFIG_ADB_MACIO) += macio-adb.o obj-$(CONFIG_THERM_WINDTUNNEL) += therm_windtunnel.o diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index 4c8097e0e6fe..76e98f0f7a3e 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c @@ -65,7 +65,7 @@ static struct adb_driver *adb_driver_list[] = { #ifdef CONFIG_ADB_IOP &adb_iop_driver, #endif -#if defined(CONFIG_ADB_PMU) || defined(CONFIG_ADB_PMU68K) +#ifdef CONFIG_ADB_PMU &via_pmu_driver, #endif #ifdef CONFIG_ADB_MACIO diff --git a/drivers/macintosh/via-pmu68k.c b/drivers/macintosh/via-pmu68k.c deleted file mode 100644 index bec8e1837d7d..000000000000 --- a/drivers/macintosh/via-pmu68k.c +++ /dev/null @@ -1,846 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Device driver for the PMU on 68K-based Apple PowerBooks - * - * The VIA (versatile interface adapter) interfaces to the PMU, - * a 6805 microprocessor core whose primary function is to control - * battery charging and system power on the PowerBooks. - * The PMU also controls the ADB (Apple Desktop Bus) which connects - * to the keyboard and mouse, as well as the non-volatile RAM - * and the RTC (real time clock) chip. - * - * Adapted for 68K PMU by Joshua M. Thompson - * - * Based largely on the PowerMac PMU code by Paul Mackerras and - * Fabio Riccardi. - * - * Also based on the PMU driver from MkLinux by Apple Computer, Inc. - * and the Open Software Foundation, Inc. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include - -#include -#include -#include - -/* Misc minor number allocated for /dev/pmu */ -#define PMU_MINOR 154 - -/* VIA registers - spaced 0x200 bytes apart */ -#define RS 0x200 /* skip between registers */ -#define B 0 /* B-side data */ -#define A RS /* A-side data */ -#define DIRB (2*RS) /* B-side direction (1=output) */ -#define DIRA (3*RS) /* A-side direction (1=output) */ -#define T1CL (4*RS) /* Timer 1 ctr/latch (low 8 bits) */ -#define T1CH (5*RS) /* Timer 1 counter (high 8 bits) */ -#define T1LL (6*RS) /* Timer 1 latch (low 8 bits) */ -#define T1LH (7*RS) /* Timer 1 latch (high 8 bits) */ -#define T2CL (8*RS) /* Timer 2 ctr/latch (low 8 bits) */ -#define T2CH (9*RS) /* Timer 2 counter (high 8 bits) */ -#define SR (10*RS) /* Shift register */ -#define ACR (11*RS) /* Auxiliary control register */ -#define PCR (12*RS) /* Peripheral control register */ -#define IFR (13*RS) /* Interrupt flag register */ -#define IER (14*RS) /* Interrupt enable register */ -#define ANH (15*RS) /* A-side data, no handshake */ - -/* Bits in B data register: both active low */ -#define TACK 0x02 /* Transfer acknowledge (input) */ -#define TREQ 0x04 /* Transfer request (output) */ - -/* Bits in ACR */ -#define SR_CTRL 0x1c /* Shift register control bits */ -#define SR_EXT 0x0c /* Shift on external clock */ -#define SR_OUT 0x10 /* Shift out if 1 */ - -/* Bits in IFR and IER */ -#define SR_INT 0x04 /* Shift register full/empty */ -#define CB1_INT 0x10 /* transition on CB1 input */ - -static enum pmu_state { - idle, - sending, - intack, - reading, - reading_intr, -} pmu_state; - -static struct adb_request *current_req; -static struct adb_request *last_req; -static struct adb_request *req_awaiting_reply; -static unsigned char interrupt_data[32]; -static unsigned char *reply_ptr; -static int data_index; -static int data_len; -static int adb_int_pending; -static int pmu_adb_flags; -static int adb_dev_map; -static struct adb_request bright_req_1, bright_req_2, bright_req_3; -static int pmu_kind = PMU_UNKNOWN; -static int pmu_fully_inited; - -int asleep; - -static int pmu_probe(void); -static int pmu_init(void); -static void pmu_start(void); -static irqreturn_t pmu_interrupt(int irq, void *arg); -static int pmu_send_request(struct adb_request *req, int sync); -static int pmu_autopoll(int devs); -void pmu_poll(void); -static int pmu_reset_bus(void); - -static int init_pmu(void); -static void pmu_start(void); -static void send_byte(int x); -static void recv_byte(void); -static void pmu_done(struct adb_request *req); -static void pmu_handle_data(unsigned char *data, int len); -static void set_volume(int level); -static void pmu_enable_backlight(int on); -static void pmu_set_brightness(int level); - -struct adb_driver via_pmu_driver = { - .name = "68K PMU", - .probe = pmu_probe, - .init = pmu_init, - .send_request = pmu_send_request, - .autopoll = pmu_autopoll, - .poll = pmu_poll, - .reset_bus = pmu_reset_bus, -}; - -/* - * This table indicates for each PMU opcode: - * - the number of data bytes to be sent with the command, or -1 - * if a length byte should be sent, - * - the number of response bytes which the PMU will return, or - * -1 if it will send a length byte. - */ -static s8 pmu_data_len[256][2] = { -/* 0 1 2 3 4 5 6 7 */ -/*00*/ {-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*08*/ {-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1}, -/*10*/ { 1, 0},{ 1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*18*/ { 0, 1},{ 0, 1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{ 0, 0}, -/*20*/ {-1, 0},{ 0, 0},{ 2, 0},{ 1, 0},{ 1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*28*/ { 0,-1},{ 0,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{ 0,-1}, -/*30*/ { 4, 0},{20, 0},{-1, 0},{ 3, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*38*/ { 0, 4},{ 0,20},{ 2,-1},{ 2, 1},{ 3,-1},{-1,-1},{-1,-1},{ 4, 0}, -/*40*/ { 1, 0},{ 1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*48*/ { 0, 1},{ 0, 1},{-1,-1},{ 1, 0},{ 1, 0},{-1,-1},{-1,-1},{-1,-1}, -/*50*/ { 1, 0},{ 0, 0},{ 2, 0},{ 2, 0},{-1, 0},{ 1, 0},{ 3, 0},{ 1, 0}, -/*58*/ { 0, 1},{ 1, 0},{ 0, 2},{ 0, 2},{ 0,-1},{-1,-1},{-1,-1},{-1,-1}, -/*60*/ { 2, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*68*/ { 0, 3},{ 0, 3},{ 0, 2},{ 0, 8},{ 0,-1},{ 0,-1},{-1,-1},{-1,-1}, -/*70*/ { 1, 0},{ 1, 0},{ 1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*78*/ { 0,-1},{ 0,-1},{-1,-1},{-1,-1},{-1,-1},{ 5, 1},{ 4, 1},{ 4, 1}, -/*80*/ { 4, 0},{-1, 0},{ 0, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*88*/ { 0, 5},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1}, -/*90*/ { 1, 0},{ 2, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*98*/ { 0, 1},{ 0, 1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1}, -/*a0*/ { 2, 0},{ 2, 0},{ 2, 0},{ 4, 0},{-1, 0},{ 0, 0},{-1, 0},{-1, 0}, -/*a8*/ { 1, 1},{ 1, 0},{ 3, 0},{ 2, 0},{-1,-1},{-1,-1},{-1,-1},{-1,-1}, -/*b0*/ {-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*b8*/ {-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1}, -/*c0*/ {-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*c8*/ {-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1}, -/*d0*/ { 0, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*d8*/ { 1, 1},{ 1, 1},{-1,-1},{-1,-1},{ 0, 1},{ 0,-1},{-1,-1},{-1,-1}, -/*e0*/ {-1, 0},{ 4, 0},{ 0, 1},{-1, 0},{-1, 0},{ 4, 0},{-1, 0},{-1, 0}, -/*e8*/ { 3,-1},{-1,-1},{ 0, 1},{-1,-1},{ 0,-1},{-1,-1},{-1,-1},{ 0, 0}, -/*f0*/ {-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0}, -/*f8*/ {-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1}, -}; - -int __init find_via_pmu(void) -{ - switch (macintosh_config->adb_type) { - case MAC_ADB_PB2: - pmu_kind = PMU_68K_V2; - break; - default: - pmu_kind = PMU_UNKNOWN; - return -ENODEV; - } - - pmu_state = idle; - - if (!init_pmu()) - goto fail_init; - - pr_info("adb: PMU 68K driver v0.5 for Unified ADB\n"); - - return 1; - -fail_init: - pmu_kind = PMU_UNKNOWN; - return 0; -} - -static int pmu_probe(void) -{ - if (pmu_kind == PMU_UNKNOWN) - return -ENODEV; - return 0; -} - -static int pmu_init(void) -{ - if (pmu_kind == PMU_UNKNOWN) - return -ENODEV; - return 0; -} - -static int __init via_pmu_start(void) -{ - if (pmu_kind == PMU_UNKNOWN) - return -ENODEV; - - if (request_irq(IRQ_MAC_ADB_SR, pmu_interrupt, 0, "PMU_SR", - pmu_interrupt)) { - pr_err("%s: can't get SR irq\n", __func__); - return -ENODEV; - } - if (request_irq(IRQ_MAC_ADB_CL, pmu_interrupt, 0, "PMU_CL", - pmu_interrupt)) { - pr_err("%s: can't get CL irq\n", __func__); - free_irq(IRQ_MAC_ADB_SR, pmu_interrupt); - return -ENODEV; - } - - pmu_fully_inited = 1; - - /* Enable backlight */ - pmu_enable_backlight(1); - - return 0; -} - -arch_initcall(via_pmu_start); - -static int __init init_pmu(void) -{ - int timeout; - volatile struct adb_request req; - - via2[B] |= TREQ; /* negate TREQ */ - via2[DIRB] = (via2[DIRB] | TREQ) & ~TACK; /* TACK in, TREQ out */ - - pmu_request((struct adb_request *) &req, NULL, 2, PMU_SET_INTR_MASK, PMU_INT_ADB); - timeout = 100000; - while (!req.complete) { - if (--timeout < 0) { - printk(KERN_ERR "pmu_init: no response from PMU\n"); - return -EAGAIN; - } - udelay(10); - pmu_poll(); - } - - /* ack all pending interrupts */ - timeout = 100000; - interrupt_data[0] = 1; - while (interrupt_data[0] || pmu_state != idle) { - if (--timeout < 0) { - printk(KERN_ERR "pmu_init: timed out acking intrs\n"); - return -EAGAIN; - } - if (pmu_state == idle) { - adb_int_pending = 1; - pmu_interrupt(0, NULL); - } - pmu_poll(); - udelay(10); - } - - pmu_request((struct adb_request *) &req, NULL, 2, PMU_SET_INTR_MASK, - PMU_INT_ADB_AUTO|PMU_INT_SNDBRT|PMU_INT_ADB); - timeout = 100000; - while (!req.complete) { - if (--timeout < 0) { - printk(KERN_ERR "pmu_init: no response from PMU\n"); - return -EAGAIN; - } - udelay(10); - pmu_poll(); - } - - bright_req_1.complete = 1; - bright_req_2.complete = 1; - bright_req_3.complete = 1; - - return 1; -} - -int -pmu_get_model(void) -{ - return pmu_kind; -} - -/* Send an ADB command */ -static int -pmu_send_request(struct adb_request *req, int sync) -{ - int i, ret; - - if (!pmu_fully_inited) - { - req->complete = 1; - return -ENXIO; - } - - ret = -EINVAL; - - switch (req->data[0]) { - case PMU_PACKET: - for (i = 0; i < req->nbytes - 1; ++i) - req->data[i] = req->data[i+1]; - --req->nbytes; - if (pmu_data_len[req->data[0]][1] != 0) { - req->reply[0] = ADB_RET_OK; - req->reply_len = 1; - } else - req->reply_len = 0; - ret = pmu_queue_request(req); - break; - case CUDA_PACKET: - switch (req->data[1]) { - case CUDA_GET_TIME: - if (req->nbytes != 2) - break; - req->data[0] = PMU_READ_RTC; - req->nbytes = 1; - req->reply_len = 3; - req->reply[0] = CUDA_PACKET; - req->reply[1] = 0; - req->reply[2] = CUDA_GET_TIME; - ret = pmu_queue_request(req); - break; - case CUDA_SET_TIME: - if (req->nbytes != 6) - break; - req->data[0] = PMU_SET_RTC; - req->nbytes = 5; - for (i = 1; i <= 4; ++i) - req->data[i] = req->data[i+1]; - req->reply_len = 3; - req->reply[0] = CUDA_PACKET; - req->reply[1] = 0; - req->reply[2] = CUDA_SET_TIME; - ret = pmu_queue_request(req); - break; - case CUDA_GET_PRAM: - if (req->nbytes != 4) - break; - req->data[0] = PMU_READ_NVRAM; - req->data[1] = req->data[2]; - req->data[2] = req->data[3]; - req->nbytes = 3; - req->reply_len = 3; - req->reply[0] = CUDA_PACKET; - req->reply[1] = 0; - req->reply[2] = CUDA_GET_PRAM; - ret = pmu_queue_request(req); - break; - case CUDA_SET_PRAM: - if (req->nbytes != 5) - break; - req->data[0] = PMU_WRITE_NVRAM; - req->data[1] = req->data[2]; - req->data[2] = req->data[3]; - req->data[3] = req->data[4]; - req->nbytes = 4; - req->reply_len = 3; - req->reply[0] = CUDA_PACKET; - req->reply[1] = 0; - req->reply[2] = CUDA_SET_PRAM; - ret = pmu_queue_request(req); - break; - } - break; - case ADB_PACKET: - for (i = req->nbytes - 1; i > 1; --i) - req->data[i+2] = req->data[i]; - req->data[3] = req->nbytes - 2; - req->data[2] = pmu_adb_flags; - /*req->data[1] = req->data[1];*/ - req->data[0] = PMU_ADB_CMD; - req->nbytes += 2; - req->reply_expected = 1; - req->reply_len = 0; - ret = pmu_queue_request(req); - break; - } - if (ret) - { - req->complete = 1; - return ret; - } - - if (sync) { - while (!req->complete) - pmu_poll(); - } - - return 0; -} - -/* Enable/disable autopolling */ -static int -pmu_autopoll(int devs) -{ - struct adb_request req; - - if (!pmu_fully_inited) return -ENXIO; - - if (devs) { - adb_dev_map = devs; - pmu_request(&req, NULL, 5, PMU_ADB_CMD, 0, 0x86, - adb_dev_map >> 8, adb_dev_map); - pmu_adb_flags = 2; - } else { - pmu_request(&req, NULL, 1, PMU_ADB_POLL_OFF); - pmu_adb_flags = 0; - } - while (!req.complete) - pmu_poll(); - return 0; -} - -/* Reset the ADB bus */ -static int -pmu_reset_bus(void) -{ - struct adb_request req; - long timeout; - int save_autopoll = adb_dev_map; - - if (!pmu_fully_inited) return -ENXIO; - - /* anyone got a better idea?? */ - pmu_autopoll(0); - - req.nbytes = 5; - req.done = NULL; - req.data[0] = PMU_ADB_CMD; - req.data[1] = 0; - req.data[2] = 3; /* ADB_BUSRESET ??? */ - req.data[3] = 0; - req.data[4] = 0; - req.reply_len = 0; - req.reply_expected = 1; - if (pmu_queue_request(&req) != 0) - { - printk(KERN_ERR "pmu_adb_reset_bus: pmu_queue_request failed\n"); - return -EIO; - } - while (!req.complete) - pmu_poll(); - timeout = 100000; - while (!req.complete) { - if (--timeout < 0) { - printk(KERN_ERR "pmu_adb_reset_bus (reset): no response from PMU\n"); - return -EIO; - } - udelay(10); - pmu_poll(); - } - - if (save_autopoll != 0) - pmu_autopoll(save_autopoll); - - return 0; -} - -/* Construct and send a pmu request */ -int -pmu_request(struct adb_request *req, void (*done)(struct adb_request *), - int nbytes, ...) -{ - va_list list; - int i; - - if (nbytes < 0 || nbytes > 32) { - printk(KERN_ERR "pmu_request: bad nbytes (%d)\n", nbytes); - req->complete = 1; - return -EINVAL; - } - req->nbytes = nbytes; - req->done = done; - va_start(list, nbytes); - for (i = 0; i < nbytes; ++i) - req->data[i] = va_arg(list, int); - va_end(list); - if (pmu_data_len[req->data[0]][1] != 0) { - req->reply[0] = ADB_RET_OK; - req->reply_len = 1; - } else - req->reply_len = 0; - req->reply_expected = 0; - return pmu_queue_request(req); -} - -int -pmu_queue_request(struct adb_request *req) -{ - unsigned long flags; - int nsend; - - if (req->nbytes <= 0) { - req->complete = 1; - return 0; - } - nsend = pmu_data_len[req->data[0]][0]; - if (nsend >= 0 && req->nbytes != nsend + 1) { - req->complete = 1; - return -EINVAL; - } - - req->next = NULL; - req->sent = 0; - req->complete = 0; - local_irq_save(flags); - - if (current_req != 0) { - last_req->next = req; - last_req = req; - } else { - current_req = req; - last_req = req; - if (pmu_state == idle) - pmu_start(); - } - - local_irq_restore(flags); - return 0; -} - -static void -send_byte(int x) -{ - via1[ACR] |= SR_CTRL; - via1[SR] = x; - via2[B] &= ~TREQ; /* assert TREQ */ -} - -static void -recv_byte(void) -{ - char c; - - via1[ACR] = (via1[ACR] | SR_EXT) & ~SR_OUT; - c = via1[SR]; /* resets SR */ - via2[B] &= ~TREQ; -} - -static void -pmu_start(void) -{ - unsigned long flags; - struct adb_request *req; - - /* assert pmu_state == idle */ - /* get the packet to send */ - local_irq_save(flags); - req = current_req; - if (req == 0 || pmu_state != idle - || (req->reply_expected && req_awaiting_reply)) - goto out; - - pmu_state = sending; - data_index = 1; - data_len = pmu_data_len[req->data[0]][0]; - - /* set the shift register to shift out and send a byte */ - send_byte(req->data[0]); - -out: - local_irq_restore(flags); -} - -void -pmu_poll(void) -{ - unsigned long flags; - - local_irq_save(flags); - if (via1[IFR] & SR_INT) { - via1[IFR] = SR_INT; - pmu_interrupt(IRQ_MAC_ADB_SR, NULL); - } - if (via1[IFR] & CB1_INT) { - via1[IFR] = CB1_INT; - pmu_interrupt(IRQ_MAC_ADB_CL, NULL); - } - local_irq_restore(flags); -} - -static irqreturn_t -pmu_interrupt(int irq, void *dev_id) -{ - struct adb_request *req; - int timeout, bite = 0; /* to prevent compiler warning */ - -#if 0 - printk("pmu_interrupt: irq %d state %d acr %02X, b %02X data_index %d/%d adb_int_pending %d\n", - irq, pmu_state, (uint) via1[ACR], (uint) via2[B], data_index, data_len, adb_int_pending); -#endif - - if (irq == IRQ_MAC_ADB_CL) { /* CB1 interrupt */ - adb_int_pending = 1; - } else if (irq == IRQ_MAC_ADB_SR) { /* SR interrupt */ - if (via2[B] & TACK) { - printk(KERN_DEBUG "PMU: SR_INT but ack still high! (%x)\n", via2[B]); - } - - /* if reading grab the byte */ - if ((via1[ACR] & SR_OUT) == 0) bite = via1[SR]; - - /* reset TREQ and wait for TACK to go high */ - via2[B] |= TREQ; - timeout = 3200; - while (!(via2[B] & TACK)) { - if (--timeout < 0) { - printk(KERN_ERR "PMU not responding (!ack)\n"); - goto finish; - } - udelay(10); - } - - switch (pmu_state) { - case sending: - req = current_req; - if (data_len < 0) { - data_len = req->nbytes - 1; - send_byte(data_len); - break; - } - if (data_index <= data_len) { - send_byte(req->data[data_index++]); - break; - } - req->sent = 1; - data_len = pmu_data_len[req->data[0]][1]; - if (data_len == 0) { - pmu_state = idle; - current_req = req->next; - if (req->reply_expected) - req_awaiting_reply = req; - else - pmu_done(req); - } else { - pmu_state = reading; - data_index = 0; - reply_ptr = req->reply + req->reply_len; - recv_byte(); - } - break; - - case intack: - data_index = 0; - data_len = -1; - pmu_state = reading_intr; - reply_ptr = interrupt_data; - recv_byte(); - break; - - case reading: - case reading_intr: - if (data_len == -1) { - data_len = bite; - if (bite > 32) - printk(KERN_ERR "PMU: bad reply len %d\n", - bite); - } else { - reply_ptr[data_index++] = bite; - } - if (data_index < data_len) { - recv_byte(); - break; - } - - if (pmu_state == reading_intr) { - pmu_handle_data(interrupt_data, data_index); - } else { - req = current_req; - current_req = req->next; - req->reply_len += data_index; - pmu_done(req); - } - pmu_state = idle; - - break; - - default: - printk(KERN_ERR "pmu_interrupt: unknown state %d?\n", - pmu_state); - } - } -finish: - if (pmu_state == idle) { - if (adb_int_pending) { - pmu_state = intack; - send_byte(PMU_INT_ACK); - adb_int_pending = 0; - } else if (current_req) { - pmu_start(); - } - } - -#if 0 - printk("pmu_interrupt: exit state %d acr %02X, b %02X data_index %d/%d adb_int_pending %d\n", - pmu_state, (uint) via1[ACR], (uint) via2[B], data_index, data_len, adb_int_pending); -#endif - return IRQ_HANDLED; -} - -static void -pmu_done(struct adb_request *req) -{ - req->complete = 1; - if (req->done) - (*req->done)(req); -} - -/* Interrupt data could be the result data from an ADB cmd */ -static void -pmu_handle_data(unsigned char *data, int len) -{ - static int show_pmu_ints = 1; - - asleep = 0; - if (len < 1) { - adb_int_pending = 0; - return; - } - if (data[0] & PMU_INT_ADB) { - if ((data[0] & PMU_INT_ADB_AUTO) == 0) { - struct adb_request *req = req_awaiting_reply; - if (req == 0) { - printk(KERN_ERR "PMU: extra ADB reply\n"); - return; - } - req_awaiting_reply = NULL; - if (len <= 2) - req->reply_len = 0; - else { - memcpy(req->reply, data + 1, len - 1); - req->reply_len = len - 1; - } - pmu_done(req); - } else { - adb_input(data+1, len-1, 1); - } - } else { - if (data[0] == 0x08 && len == 3) { - /* sound/brightness buttons pressed */ - pmu_set_brightness(data[1] >> 3); - set_volume(data[2]); - } else if (show_pmu_ints - && !(data[0] == PMU_INT_TICK && len == 1)) { - int i; - printk(KERN_DEBUG "pmu intr"); - for (i = 0; i < len; ++i) - printk(" %.2x", data[i]); - printk("\n"); - } - } -} - -static int backlight_level = -1; -static int backlight_enabled = 0; - -#define LEVEL_TO_BRIGHT(lev) ((lev) < 1? 0x7f: 0x4a - ((lev) << 1)) - -static void -pmu_enable_backlight(int on) -{ - struct adb_request req; - - if (on) { - /* first call: get current backlight value */ - if (backlight_level < 0) { - switch(pmu_kind) { - case PMU_68K_V2: - pmu_request(&req, NULL, 3, PMU_READ_NVRAM, 0x14, 0xe); - while (!req.complete) - pmu_poll(); - printk(KERN_DEBUG "pmu: nvram returned bright: %d\n", (int)req.reply[1]); - backlight_level = req.reply[1]; - break; - default: - backlight_enabled = 0; - return; - } - } - pmu_request(&req, NULL, 2, PMU_BACKLIGHT_BRIGHT, - LEVEL_TO_BRIGHT(backlight_level)); - while (!req.complete) - pmu_poll(); - } - pmu_request(&req, NULL, 2, PMU_POWER_CTRL, - PMU_POW_BACKLIGHT | (on ? PMU_POW_ON : PMU_POW_OFF)); - while (!req.complete) - pmu_poll(); - backlight_enabled = on; -} - -static void -pmu_set_brightness(int level) -{ - int bright; - - backlight_level = level; - bright = LEVEL_TO_BRIGHT(level); - if (!backlight_enabled) - return; - if (bright_req_1.complete) - pmu_request(&bright_req_1, NULL, 2, PMU_BACKLIGHT_BRIGHT, - bright); - if (bright_req_2.complete) - pmu_request(&bright_req_2, NULL, 2, PMU_POWER_CTRL, - PMU_POW_BACKLIGHT | (bright < 0x7f ? PMU_POW_ON : PMU_POW_OFF)); -} - -void -pmu_enable_irled(int on) -{ - struct adb_request req; - - pmu_request(&req, NULL, 2, PMU_POWER_CTRL, PMU_POW_IRLED | - (on ? PMU_POW_ON : PMU_POW_OFF)); - while (!req.complete) - pmu_poll(); -} - -static void -set_volume(int level) -{ -} - -int -pmu_present(void) -{ - return (pmu_kind != PMU_UNKNOWN); -} diff --git a/include/uapi/linux/pmu.h b/include/uapi/linux/pmu.h index e128f609281a..97256f90e6df 100644 --- a/include/uapi/linux/pmu.h +++ b/include/uapi/linux/pmu.h @@ -94,7 +94,7 @@ enum { PMU_PADDINGTON_BASED, /* 1999 PowerBook G3 */ PMU_KEYLARGO_BASED, /* Core99 motherboard (PMU99) */ PMU_68K_V1, /* Unused/deprecated */ - PMU_68K_V2, /* 68K PMU, version 2 */ + PMU_68K_V2, /* Unused/deprecated */ }; /* PMU PMU_POWER_EVENTS commands */ -- cgit From b5c7cccaacdfa8d707da3e65970b15070f56df5e Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Mon, 2 Jul 2018 04:21:19 -0400 Subject: macintosh/via-pmu: Clean up interrupt statistics Replace an open-coded ffs() with the function call. Simplify an if-else cascade using a switch statement. Correct a typo and an indentation issue. Tested-by: Stan Johnson Signed-off-by: Finn Thain Reviewed-by: Geert Uytterhoeven Signed-off-by: Michael Ellerman --- drivers/macintosh/via-pmu.c | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index a68e7a6f00cc..3da5d40309d4 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -1355,7 +1355,8 @@ pmu_resume(void) static void pmu_handle_data(unsigned char *data, int len) { - unsigned char ints, pirq; + unsigned char ints; + int idx; int i = 0; asleep = 0; @@ -1377,25 +1378,24 @@ pmu_handle_data(unsigned char *data, int len) ints &= ~(PMU_INT_ADB_AUTO | PMU_INT_AUTO_SRQ_POLL); next: - if (ints == 0) { if (i > pmu_irq_stats[10]) pmu_irq_stats[10] = i; return; } - - for (pirq = 0; pirq < 8; pirq++) - if (ints & (1 << pirq)) - break; - pmu_irq_stats[pirq]++; i++; - ints &= ~(1 << pirq); + + idx = ffs(ints) - 1; + ints &= ~BIT(idx); + + pmu_irq_stats[idx]++; /* Note: for some reason, we get an interrupt with len=1, * data[0]==0 after each normal ADB interrupt, at least * on the Pismo. Still investigating... --BenH */ - if ((1 << pirq) & PMU_INT_ADB) { + switch (BIT(idx)) { + case PMU_INT_ADB: if ((data[0] & PMU_INT_ADB_AUTO) == 0) { struct adb_request *req = req_awaiting_reply; if (!req) { @@ -1433,25 +1433,28 @@ next: adb_input(data+1, len-1, 1); #endif /* CONFIG_ADB */ } - } + break; + /* Sound/brightness button pressed */ - else if ((1 << pirq) & PMU_INT_SNDBRT) { + case PMU_INT_SNDBRT: #ifdef CONFIG_PMAC_BACKLIGHT if (len == 3) pmac_backlight_set_legacy_brightness_pmu(data[1] >> 4); #endif - } + break; + /* Tick interrupt */ - else if ((1 << pirq) & PMU_INT_TICK) { - /* Environement or tick interrupt, query batteries */ + case PMU_INT_TICK: + /* Environment or tick interrupt, query batteries */ if (pmu_battery_count) { if ((--query_batt_timer) == 0) { query_battery_state(); query_batt_timer = BATTERY_POLLING_COUNT; } } - } - else if ((1 << pirq) & PMU_INT_ENVIRONMENT) { + break; + + case PMU_INT_ENVIRONMENT: if (pmu_battery_count) query_battery_state(); pmu_pass_intr(data, len); @@ -1461,7 +1464,9 @@ next: via_pmu_event(PMU_EVT_POWER, !!(data[1]&8)); via_pmu_event(PMU_EVT_LID, data[1]&1); } - } else { + break; + + default: pmu_pass_intr(data, len); } goto next; -- cgit From 6edc22fc9cbb80fdf929df3e5b912d8c42e0ff3a Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Mon, 2 Jul 2018 04:21:19 -0400 Subject: macintosh/via-pmu: Disambiguate interrupt statistics Some of the event counters are overloaded which makes it very difficult to interpret their values. Counter 0 is supposed to report CB1 interrupts but it can also count PMU_INT_WAITING_CHARGER events. Counter 1 is supposed to report GPIO interrupts but it can also count other events (depending upon the value of the PMU_INT_ADB bit). Disambiguate these statistics with dedicated counters for GPIO and CB1 interrupts. Comments in the MkLinux source code say that the type 0 and type 1 interrupts are model-specific. Label them as "unknown". This change to the contents of /proc/pmu/interrupts is by necessity visible in userland. However, packages which interact with the PMU (that is, pbbuttonsd, pmac-utils and pmud) don't open this file. AFAIK, user software has no need to poll these counters. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Michael Ellerman --- drivers/macintosh/via-pmu.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 3da5d40309d4..d72c450aebe5 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -172,7 +172,9 @@ static int drop_interrupts; static int option_lid_wakeup = 1; #endif /* CONFIG_SUSPEND && CONFIG_PPC32 */ static unsigned long async_req_locks; -static unsigned int pmu_irq_stats[11]; + +#define NUM_IRQ_STATS 13 +static unsigned int pmu_irq_stats[NUM_IRQ_STATS]; static struct proc_dir_entry *proc_pmu_root; static struct proc_dir_entry *proc_pmu_info; @@ -873,9 +875,9 @@ static int pmu_info_proc_show(struct seq_file *m, void *v) static int pmu_irqstats_proc_show(struct seq_file *m, void *v) { int i; - static const char *irq_names[] = { - "Total CB1 triggered events", - "Total GPIO1 triggered events", + static const char *irq_names[NUM_IRQ_STATS] = { + "Unknown interrupt (type 0)", + "Unknown interrupt (type 1)", "PC-Card eject button", "Sound/Brightness button", "ADB message", @@ -884,10 +886,12 @@ static int pmu_irqstats_proc_show(struct seq_file *m, void *v) "Tick timer", "Ghost interrupt (zero len)", "Empty interrupt (empty mask)", - "Max irqs in a row" + "Max irqs in a row", + "Total CB1 triggered events", + "Total GPIO1 triggered events", }; - for (i=0; i<11; i++) { + for (i = 0; i < NUM_IRQ_STATS; i++) { seq_printf(m, " %2u: %10u (%s)\n", i, pmu_irq_stats[i], irq_names[i]); } @@ -1622,7 +1626,7 @@ via_pmu_interrupt(int irq, void *arg) } if (intr & CB1_INT) { adb_int_pending = 1; - pmu_irq_stats[0]++; + pmu_irq_stats[11]++; } if (intr & SR_INT) { req = pmu_sr_intr(); @@ -1709,7 +1713,7 @@ gpio1_interrupt(int irq, void *arg) disable_irq_nosync(gpio_irq); gpio_irq_enabled = 0; } - pmu_irq_stats[1]++; + pmu_irq_stats[12]++; adb_int_pending = 1; spin_unlock_irqrestore(&pmu_lock, flags); via_pmu_interrupt(0, NULL); -- cgit From 9c7b185ab2fe313b4426bf55da3624bef71f342b Mon Sep 17 00:00:00 2001 From: Akshay Adiga Date: Thu, 5 Jul 2018 17:10:21 +0530 Subject: powernv/cpuidle: Parse dt idle properties into global structure Device-tree parsing happens twice, once while deciding idle state to be used for hotplug and once during cpuidle init. Hence, parsing the device tree and caching it will reduce code duplication. Parsing code has been moved to pnv_parse_cpuidle_dt() from pnv_probe_idle_states(). In addition to the properties in the device tree the number of available states is also required. Signed-off-by: Akshay Adiga Reviewed-by: Nicholas Piggin Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpuidle.h | 11 ++ arch/powerpc/platforms/powernv/idle.c | 216 ++++++++++++++++++++++------------ 2 files changed, 149 insertions(+), 78 deletions(-) diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index e210a83eb196..574b0ce1d671 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -79,6 +79,17 @@ struct stop_sprs { u64 mmcra; }; +#define PNV_IDLE_NAME_LEN 16 +struct pnv_idle_states_t { + char name[PNV_IDLE_NAME_LEN]; + u32 latency_ns; + u32 residency_ns; + u64 psscr_val; + u64 psscr_mask; + u32 flags; + bool valid; +}; + extern u32 pnv_fastsleep_workaround_at_entry[]; extern u32 pnv_fastsleep_workaround_at_exit[]; diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 12f13acee1f6..3116bab10aa3 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -36,6 +36,8 @@ #define P9_STOP_SPR_PSSCR 855 static u32 supported_cpuidle_states; +struct pnv_idle_states_t *pnv_idle_states; +int nr_pnv_idle_states; /* * The default stop state that will be used by ppc_md.power_save @@ -617,48 +619,10 @@ int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) * @dt_idle_states: Number of idle state entries * Returns 0 on success */ -static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags, - int dt_idle_states) +static int __init pnv_power9_idle_init(void) { - u64 *psscr_val = NULL; - u64 *psscr_mask = NULL; - u32 *residency_ns = NULL; u64 max_residency_ns = 0; - int rc = 0, i; - - psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), GFP_KERNEL); - psscr_mask = kcalloc(dt_idle_states, sizeof(*psscr_mask), GFP_KERNEL); - residency_ns = kcalloc(dt_idle_states, sizeof(*residency_ns), - GFP_KERNEL); - - if (!psscr_val || !psscr_mask || !residency_ns) { - rc = -1; - goto out; - } - - if (of_property_read_u64_array(np, - "ibm,cpu-idle-state-psscr", - psscr_val, dt_idle_states)) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); - rc = -1; - goto out; - } - - if (of_property_read_u64_array(np, - "ibm,cpu-idle-state-psscr-mask", - psscr_mask, dt_idle_states)) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n"); - rc = -1; - goto out; - } - - if (of_property_read_u32_array(np, - "ibm,cpu-idle-state-residency-ns", - residency_ns, dt_idle_states)) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n"); - rc = -1; - goto out; - } + int i; /* * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask}, @@ -674,33 +638,36 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags, * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state. */ pnv_first_deep_stop_state = MAX_STOP_STATE; - for (i = 0; i < dt_idle_states; i++) { + for (i = 0; i < nr_pnv_idle_states; i++) { int err; - u64 psscr_rl = psscr_val[i] & PSSCR_RL_MASK; + struct pnv_idle_states_t *state = &pnv_idle_states[i]; + u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK; - if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) && - (pnv_first_deep_stop_state > psscr_rl)) + if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) && + pnv_first_deep_stop_state > psscr_rl) pnv_first_deep_stop_state = psscr_rl; - err = validate_psscr_val_mask(&psscr_val[i], &psscr_mask[i], - flags[i]); + err = validate_psscr_val_mask(&state->psscr_val, + &state->psscr_mask, + state->flags); if (err) { - report_invalid_psscr_val(psscr_val[i], err); + state->valid = false; + report_invalid_psscr_val(state->psscr_val, err); continue; } - if (max_residency_ns < residency_ns[i]) { - max_residency_ns = residency_ns[i]; - pnv_deepest_stop_psscr_val = psscr_val[i]; - pnv_deepest_stop_psscr_mask = psscr_mask[i]; - pnv_deepest_stop_flag = flags[i]; + if (max_residency_ns < state->residency_ns) { + max_residency_ns = state->residency_ns; + pnv_deepest_stop_psscr_val = state->psscr_val; + pnv_deepest_stop_psscr_mask = state->psscr_mask; + pnv_deepest_stop_flag = state->flags; deepest_stop_found = true; } if (!default_stop_found && - (flags[i] & OPAL_PM_STOP_INST_FAST)) { - pnv_default_stop_val = psscr_val[i]; - pnv_default_stop_mask = psscr_mask[i]; + (state->flags & OPAL_PM_STOP_INST_FAST)) { + pnv_default_stop_val = state->psscr_val; + pnv_default_stop_mask = state->psscr_mask; default_stop_found = true; } } @@ -723,11 +690,8 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags, pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n", pnv_first_deep_stop_state); -out: - kfree(psscr_val); - kfree(psscr_mask); - kfree(residency_ns); - return rc; + + return 0; } /* @@ -735,50 +699,146 @@ out: */ static void __init pnv_probe_idle_states(void) { - struct device_node *np; - int dt_idle_states; - u32 *flags = NULL; int i; + if (nr_pnv_idle_states < 0) { + pr_warn("cpuidle-powernv: no idle states found in the DT\n"); + return; + } + + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (pnv_power9_idle_init()) + return; + } + + for (i = 0; i < nr_pnv_idle_states; i++) + supported_cpuidle_states |= pnv_idle_states[i].flags; +} + +/* + * This function parses device-tree and populates all the information + * into pnv_idle_states structure. It also sets up nr_pnv_idle_states + * which is the number of cpuidle states discovered through device-tree. + */ + +static int pnv_parse_cpuidle_dt(void) +{ + struct device_node *np; + int nr_idle_states, i; + int rc = 0; + u32 *temp_u32; + u64 *temp_u64; + const char **temp_string; + np = of_find_node_by_path("/ibm,opal/power-mgt"); if (!np) { pr_warn("opal: PowerMgmt Node not found\n"); - goto out; + return -ENODEV; } - dt_idle_states = of_property_count_u32_elems(np, - "ibm,cpu-idle-state-flags"); - if (dt_idle_states < 0) { - pr_warn("cpuidle-powernv: no idle states found in the DT\n"); + nr_idle_states = of_property_count_u32_elems(np, + "ibm,cpu-idle-state-flags"); + + pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states), + GFP_KERNEL); + temp_u32 = kcalloc(nr_idle_states, sizeof(u32), GFP_KERNEL); + temp_u64 = kcalloc(nr_idle_states, sizeof(u64), GFP_KERNEL); + temp_string = kcalloc(nr_idle_states, sizeof(char *), GFP_KERNEL); + + if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) { + pr_err("Could not allocate memory for dt parsing\n"); + rc = -ENOMEM; goto out; } - flags = kcalloc(dt_idle_states, sizeof(*flags), GFP_KERNEL); - - if (of_property_read_u32_array(np, - "ibm,cpu-idle-state-flags", flags, dt_idle_states)) { + /* Read flags */ + if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags", + temp_u32, nr_idle_states)) { pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); + rc = -EINVAL; goto out; } + for (i = 0; i < nr_idle_states; i++) + pnv_idle_states[i].flags = temp_u32[i]; + + /* Read latencies */ + if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns", + temp_u32, nr_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); + rc = -EINVAL; + goto out; + } + for (i = 0; i < nr_idle_states; i++) + pnv_idle_states[i].latency_ns = temp_u32[i]; + + /* Read residencies */ + if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns", + temp_u32, nr_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); + rc = -EINVAL; + goto out; + } + for (i = 0; i < nr_idle_states; i++) + pnv_idle_states[i].residency_ns = temp_u32[i]; + /* For power9 */ if (cpu_has_feature(CPU_FTR_ARCH_300)) { - if (pnv_power9_idle_init(np, flags, dt_idle_states)) + /* Read pm_crtl_val */ + if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr", + temp_u64, nr_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); + rc = -EINVAL; + goto out; + } + for (i = 0; i < nr_idle_states; i++) + pnv_idle_states[i].psscr_val = temp_u64[i]; + + /* Read pm_crtl_mask */ + if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask", + temp_u64, nr_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n"); + rc = -EINVAL; goto out; + } + for (i = 0; i < nr_idle_states; i++) + pnv_idle_states[i].psscr_mask = temp_u64[i]; } - for (i = 0; i < dt_idle_states; i++) - supported_cpuidle_states |= flags[i]; + /* + * power8 specific properties ibm,cpu-idle-state-pmicr-mask and + * ibm,cpu-idle-state-pmicr-val were never used and there is no + * plan to use it in near future. Hence, not parsing these properties + */ + if (of_property_read_string_array(np, "ibm,cpu-idle-state-names", + temp_string, nr_idle_states) < 0) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n"); + rc = -EINVAL; + goto out; + } + for (i = 0; i < nr_idle_states; i++) + strncpy(pnv_idle_states[i].name, temp_string[i], + PNV_IDLE_NAME_LEN); + nr_pnv_idle_states = nr_idle_states; + rc = 0; out: - kfree(flags); + kfree(temp_u32); + kfree(temp_u64); + kfree(temp_string); + return rc; } + static int __init pnv_init_idle_states(void) { - + int rc = 0; supported_cpuidle_states = 0; + /* In case we error out nr_pnv_idle_states will be zero */ + nr_pnv_idle_states = 0; if (cpuidle_disable != IDLE_NO_OVERRIDE) goto out; - + rc = pnv_parse_cpuidle_dt(); + if (rc) + return rc; pnv_probe_idle_states(); if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { -- cgit From 1961acad2f88559c2cdd2ef67c58c3627f1f6e54 Mon Sep 17 00:00:00 2001 From: Akshay Adiga Date: Thu, 5 Jul 2018 17:10:22 +0530 Subject: powernv/cpuidle: Use parsed device tree values for cpuidle_init Export pnv_idle_states and nr_pnv_idle_states so that its accessible to cpuidle driver. Use properties from pnv_idle_states structure for powernv cpuidle_init. Signed-off-by: Akshay Adiga Reviewed-by: Nicholas Piggin Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpuidle.h | 2 + drivers/cpuidle/cpuidle-powernv.c | 158 ++++++------------------------------- 2 files changed, 28 insertions(+), 132 deletions(-) diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 574b0ce1d671..43e5f31fe64d 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -90,6 +90,8 @@ struct pnv_idle_states_t { bool valid; }; +extern struct pnv_idle_states_t *pnv_idle_states; +extern int nr_pnv_idle_states; extern u32 pnv_fastsleep_workaround_at_entry[]; extern u32 pnv_fastsleep_workaround_at_exit[]; diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index d29e4f041efe..84b1ebe212b3 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -242,6 +242,7 @@ static inline void add_powernv_state(int index, const char *name, powernv_states[index].target_residency = target_residency; powernv_states[index].exit_latency = exit_latency; powernv_states[index].enter = idle_fn; + /* For power8 and below psscr_* will be 0 */ stop_psscr_table[index].val = psscr_val; stop_psscr_table[index].mask = psscr_mask; } @@ -263,186 +264,80 @@ static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len, extern u32 pnv_get_supported_cpuidle_states(void); static int powernv_add_idle_states(void) { - struct device_node *power_mgt; int nr_idle_states = 1; /* Snooze */ - int dt_idle_states, count; - u32 latency_ns[CPUIDLE_STATE_MAX]; - u32 residency_ns[CPUIDLE_STATE_MAX]; - u32 flags[CPUIDLE_STATE_MAX]; - u64 psscr_val[CPUIDLE_STATE_MAX]; - u64 psscr_mask[CPUIDLE_STATE_MAX]; - const char *names[CPUIDLE_STATE_MAX]; + int dt_idle_states; u32 has_stop_states = 0; - int i, rc; + int i; u32 supported_flags = pnv_get_supported_cpuidle_states(); /* Currently we have snooze statically defined */ - - power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); - if (!power_mgt) { - pr_warn("opal: PowerMgmt Node not found\n"); - goto out; - } - - /* Read values of any property to determine the num of idle states */ - dt_idle_states = of_property_count_u32_elems(power_mgt, "ibm,cpu-idle-state-flags"); - if (dt_idle_states < 0) { - pr_warn("cpuidle-powernv: no idle states found in the DT\n"); + if (nr_pnv_idle_states <= 0) { + pr_warn("cpuidle-powernv : Only Snooze is available\n"); goto out; } - count = of_property_count_u32_elems(power_mgt, - "ibm,cpu-idle-state-latencies-ns"); - - if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", dt_idle_states, - "ibm,cpu-idle-state-latencies-ns", - count) != 0) - goto out; - - count = of_property_count_strings(power_mgt, - "ibm,cpu-idle-state-names"); - if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", dt_idle_states, - "ibm,cpu-idle-state-names", - count) != 0) - goto out; + /* TODO: Count only states which are eligible for cpuidle */ + dt_idle_states = nr_pnv_idle_states; /* * Since snooze is used as first idle state, max idle states allowed is * CPUIDLE_STATE_MAX -1 */ - if (dt_idle_states > CPUIDLE_STATE_MAX - 1) { + if (nr_pnv_idle_states > CPUIDLE_STATE_MAX - 1) { pr_warn("cpuidle-powernv: discovered idle states more than allowed"); dt_idle_states = CPUIDLE_STATE_MAX - 1; } - if (of_property_read_u32_array(power_mgt, - "ibm,cpu-idle-state-flags", flags, dt_idle_states)) { - pr_warn("cpuidle-powernv : missing ibm,cpu-idle-state-flags in DT\n"); - goto out; - } - - if (of_property_read_u32_array(power_mgt, - "ibm,cpu-idle-state-latencies-ns", latency_ns, - dt_idle_states)) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); - goto out; - } - if (of_property_read_string_array(power_mgt, - "ibm,cpu-idle-state-names", names, dt_idle_states) < 0) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n"); - goto out; - } - /* * If the idle states use stop instruction, probe for psscr values * and psscr mask which are necessary to specify required stop level. */ - has_stop_states = (flags[0] & + has_stop_states = (pnv_idle_states[0].flags & (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP)); - if (has_stop_states) { - count = of_property_count_u64_elems(power_mgt, - "ibm,cpu-idle-state-psscr"); - if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", - dt_idle_states, - "ibm,cpu-idle-state-psscr", - count) != 0) - goto out; - - count = of_property_count_u64_elems(power_mgt, - "ibm,cpu-idle-state-psscr-mask"); - if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", - dt_idle_states, - "ibm,cpu-idle-state-psscr-mask", - count) != 0) - goto out; - - if (of_property_read_u64_array(power_mgt, - "ibm,cpu-idle-state-psscr", psscr_val, dt_idle_states)) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); - goto out; - } - - if (of_property_read_u64_array(power_mgt, - "ibm,cpu-idle-state-psscr-mask", - psscr_mask, dt_idle_states)) { - pr_warn("cpuidle-powernv:Missing ibm,cpu-idle-state-psscr-mask in DT\n"); - goto out; - } - } - - count = of_property_count_u32_elems(power_mgt, - "ibm,cpu-idle-state-residency-ns"); - - if (count < 0) { - rc = count; - } else if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", - dt_idle_states, - "ibm,cpu-idle-state-residency-ns", - count) != 0) { - goto out; - } else { - rc = of_property_read_u32_array(power_mgt, - "ibm,cpu-idle-state-residency-ns", - residency_ns, dt_idle_states); - } for (i = 0; i < dt_idle_states; i++) { unsigned int exit_latency, target_residency; bool stops_timebase = false; + struct pnv_idle_states_t *state = &pnv_idle_states[i]; /* * Skip the platform idle state whose flag isn't in * the supported_cpuidle_states flag mask. */ - if ((flags[i] & supported_flags) != flags[i]) + if ((state->flags & supported_flags) != state->flags) continue; /* * If an idle state has exit latency beyond * POWERNV_THRESHOLD_LATENCY_NS then don't use it * in cpu-idle. */ - if (latency_ns[i] > POWERNV_THRESHOLD_LATENCY_NS) + if (state->latency_ns > POWERNV_THRESHOLD_LATENCY_NS) continue; /* * Firmware passes residency and latency values in ns. * cpuidle expects it in us. */ - exit_latency = DIV_ROUND_UP(latency_ns[i], 1000); - if (!rc) - target_residency = DIV_ROUND_UP(residency_ns[i], 1000); - else - target_residency = 0; - - if (has_stop_states) { - int err = validate_psscr_val_mask(&psscr_val[i], - &psscr_mask[i], - flags[i]); - if (err) { - report_invalid_psscr_val(psscr_val[i], err); + exit_latency = DIV_ROUND_UP(state->latency_ns, 1000); + target_residency = DIV_ROUND_UP(state->residency_ns, 1000); + + if (has_stop_states && !(state->valid)) continue; - } - } - if (flags[i] & OPAL_PM_TIMEBASE_STOP) + if (state->flags & OPAL_PM_TIMEBASE_STOP) stops_timebase = true; - /* - * For nap and fastsleep, use default target_residency - * values if f/w does not expose it. - */ - if (flags[i] & OPAL_PM_NAP_ENABLED) { - if (!rc) - target_residency = 100; + if (state->flags & OPAL_PM_NAP_ENABLED) { /* Add NAP state */ add_powernv_state(nr_idle_states, "Nap", CPUIDLE_FLAG_NONE, nap_loop, target_residency, exit_latency, 0, 0); } else if (has_stop_states && !stops_timebase) { - add_powernv_state(nr_idle_states, names[i], + add_powernv_state(nr_idle_states, state->name, CPUIDLE_FLAG_NONE, stop_loop, target_residency, exit_latency, - psscr_val[i], psscr_mask[i]); + state->psscr_val, + state->psscr_mask); } /* @@ -450,20 +345,19 @@ static int powernv_add_idle_states(void) * within this config dependency check. */ #ifdef CONFIG_TICK_ONESHOT - else if (flags[i] & OPAL_PM_SLEEP_ENABLED || - flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) { - if (!rc) - target_residency = 300000; + else if (state->flags & OPAL_PM_SLEEP_ENABLED || + state->flags & OPAL_PM_SLEEP_ENABLED_ER1) { /* Add FASTSLEEP state */ add_powernv_state(nr_idle_states, "FastSleep", CPUIDLE_FLAG_TIMER_STOP, fastsleep_loop, target_residency, exit_latency, 0, 0); } else if (has_stop_states && stops_timebase) { - add_powernv_state(nr_idle_states, names[i], + add_powernv_state(nr_idle_states, state->name, CPUIDLE_FLAG_TIMER_STOP, stop_loop, target_residency, exit_latency, - psscr_val[i], psscr_mask[i]); + state->psscr_val, + state->psscr_mask); } #endif else -- cgit From 04baaf28f40c68c35a413cd9d0db7139c67e6caf Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Tue, 24 Jul 2018 14:43:08 +0530 Subject: powerpc/powernv: Add support to enable sensor groups Adds support to enable/disable a sensor group at runtime. This can be used to select the sensor groups that needs to be copied to main memory by OCC. Sensor groups like power, temperature, current, voltage, frequency, utilization can be enabled/disabled at runtime. Signed-off-by: Shilpasri G Bhat Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal-api.h | 1 + arch/powerpc/include/asm/opal.h | 2 ++ .../powerpc/platforms/powernv/opal-sensor-groups.c | 28 ++++++++++++++++++++++ arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + 4 files changed, 32 insertions(+) diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 3bab299eda49..56a94a1bd754 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -206,6 +206,7 @@ #define OPAL_NPU_SPA_CLEAR_CACHE 160 #define OPAL_NPU_TL_SET 161 #define OPAL_SENSOR_READ_U64 162 +#define OPAL_SENSOR_GROUP_ENABLE 163 #define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164 #define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165 #define OPAL_LAST 165 diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index cd1acc8be8ee..2e81555de643 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -292,6 +292,7 @@ int opal_set_powercap(u32 handle, int token, u32 pcap); int opal_get_power_shift_ratio(u32 handle, int token, u32 *psr); int opal_set_power_shift_ratio(u32 handle, int token, u32 psr); int opal_sensor_group_clear(u32 group_hndl, int token); +int opal_sensor_group_enable(u32 group_hndl, int token, bool enable); s64 opal_signal_system_reset(s32 cpu); s64 opal_quiesce(u64 shutdown_type, s32 cpu); @@ -328,6 +329,7 @@ extern int opal_async_wait_response_interruptible(uint64_t token, struct opal_msg *msg); extern int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data); extern int opal_get_sensor_data_u64(u32 sensor_hndl, u64 *sensor_data); +extern int sensor_group_enable(u32 grp_hndl, bool enable); struct rtc_time; extern time64_t opal_get_boot_time(void); diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c index 541c9ea04a32..f7d04b6a2d7a 100644 --- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c +++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c @@ -32,6 +32,34 @@ static struct sensor_group { struct sg_attr *sgattrs; } *sgs; +int sensor_group_enable(u32 handle, bool enable) +{ + struct opal_msg msg; + int token, ret; + + token = opal_async_get_token_interruptible(); + if (token < 0) + return token; + + ret = opal_sensor_group_enable(handle, token, enable); + if (ret == OPAL_ASYNC_COMPLETION) { + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_devel("Failed to wait for the async response\n"); + ret = -EIO; + goto out; + } + ret = opal_error_code(opal_get_async_rc(msg)); + } else { + ret = opal_error_code(ret); + } + +out: + opal_async_release_token(token); + return ret; +} +EXPORT_SYMBOL_GPL(sensor_group_enable); + static ssize_t sg_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 3508be7d758a..029b37c04f35 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -329,3 +329,4 @@ OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET); OPAL_CALL(opal_pci_get_pbcq_tunnel_bar, OPAL_PCI_GET_PBCQ_TUNNEL_BAR); OPAL_CALL(opal_pci_set_pbcq_tunnel_bar, OPAL_PCI_SET_PBCQ_TUNNEL_BAR); OPAL_CALL(opal_sensor_read_u64, OPAL_SENSOR_READ_U64); +OPAL_CALL(opal_sensor_group_enable, OPAL_SENSOR_GROUP_ENABLE); -- cgit From e0da99123f3c80f679d1b40a4321c1478bef14f7 Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Tue, 24 Jul 2018 14:43:09 +0530 Subject: hwmon: (ibmpowernv) Add attributes to enable/disable sensor groups OPAL firmware provides the facility for some groups of sensors to be enabled/disabled at runtime to give the user the option of using the system resources for collecting these sensors or not. For example, on POWER9 systems, the On Chip Controller (OCC) gathers various system and chip level sensors and maintains their values in main memory. This patch provides support for enabling/disabling the sensor groups like power, temperature, current and voltage. Signed-off-by: Shilpasri G Bhat [stewart@linux.vnet.ibm.com: Commit message] Acked-by: Guenter Roeck Signed-off-by: Michael Ellerman --- Documentation/hwmon/ibmpowernv | 43 +++++++- drivers/hwmon/ibmpowernv.c | 238 +++++++++++++++++++++++++++++++++++------ 2 files changed, 247 insertions(+), 34 deletions(-) diff --git a/Documentation/hwmon/ibmpowernv b/Documentation/hwmon/ibmpowernv index 8826ba29db36..56468258711f 100644 --- a/Documentation/hwmon/ibmpowernv +++ b/Documentation/hwmon/ibmpowernv @@ -33,9 +33,48 @@ fanX_input Measured RPM value. fanX_min Threshold RPM for alert generation. fanX_fault 0: No fail condition 1: Failing fan + tempX_input Measured ambient temperature. tempX_max Threshold ambient temperature for alert generation. -inX_input Measured power supply voltage +tempX_highest Historical maximum temperature +tempX_lowest Historical minimum temperature +tempX_enable Enable/disable all temperature sensors belonging to the + sub-group. In POWER9, this attribute corresponds to + each OCC. Using this attribute each OCC can be asked to + disable/enable all of its temperature sensors. + 1: Enable + 0: Disable + +inX_input Measured power supply voltage (millivolt) inX_fault 0: No fail condition. 1: Failing power supply. -power1_input System power consumption (microWatt) +inX_highest Historical maximum voltage +inX_lowest Historical minimum voltage +inX_enable Enable/disable all voltage sensors belonging to the + sub-group. In POWER9, this attribute corresponds to + each OCC. Using this attribute each OCC can be asked to + disable/enable all of its voltage sensors. + 1: Enable + 0: Disable + +powerX_input Power consumption (microWatt) +powerX_input_highest Historical maximum power +powerX_input_lowest Historical minimum power +powerX_enable Enable/disable all power sensors belonging to the + sub-group. In POWER9, this attribute corresponds to + each OCC. Using this attribute each OCC can be asked to + disable/enable all of its power sensors. + 1: Enable + 0: Disable + +currX_input Measured current (milliampere) +currX_highest Historical maximum current +currX_lowest Historical minimum current +currX_enable Enable/disable all current sensors belonging to the + sub-group. In POWER9, this attribute corresponds to + each OCC. Using this attribute each OCC can be asked to + disable/enable all of its current sensors. + 1: Enable + 0: Disable + +energyX_input Cumulative energy (microJoule) diff --git a/drivers/hwmon/ibmpowernv.c b/drivers/hwmon/ibmpowernv.c index f829dadfd5a0..83472808c816 100644 --- a/drivers/hwmon/ibmpowernv.c +++ b/drivers/hwmon/ibmpowernv.c @@ -90,11 +90,20 @@ struct sensor_data { char label[MAX_LABEL_LEN]; char name[MAX_ATTR_LEN]; struct device_attribute dev_attr; + struct sensor_group_data *sgrp_data; +}; + +struct sensor_group_data { + struct mutex mutex; + u32 gid; + bool enable; }; struct platform_data { const struct attribute_group *attr_groups[MAX_SENSOR_TYPE + 1]; + struct sensor_group_data *sgrp_data; u32 sensors_count; /* Total count of sensors from each group */ + u32 nr_sensor_groups; /* Total number of sensor groups */ }; static ssize_t show_sensor(struct device *dev, struct device_attribute *devattr, @@ -105,6 +114,9 @@ static ssize_t show_sensor(struct device *dev, struct device_attribute *devattr, ssize_t ret; u64 x; + if (sdata->sgrp_data && !sdata->sgrp_data->enable) + return -ENODATA; + ret = opal_get_sensor_data_u64(sdata->id, &x); if (ret) @@ -120,6 +132,46 @@ static ssize_t show_sensor(struct device *dev, struct device_attribute *devattr, return sprintf(buf, "%llu\n", x); } +static ssize_t show_enable(struct device *dev, + struct device_attribute *devattr, char *buf) +{ + struct sensor_data *sdata = container_of(devattr, struct sensor_data, + dev_attr); + + return sprintf(buf, "%u\n", sdata->sgrp_data->enable); +} + +static ssize_t store_enable(struct device *dev, + struct device_attribute *devattr, + const char *buf, size_t count) +{ + struct sensor_data *sdata = container_of(devattr, struct sensor_data, + dev_attr); + struct sensor_group_data *sgrp_data = sdata->sgrp_data; + int ret; + bool data; + + ret = kstrtobool(buf, &data); + if (ret) + return ret; + + ret = mutex_lock_interruptible(&sgrp_data->mutex); + if (ret) + return ret; + + if (data != sgrp_data->enable) { + ret = sensor_group_enable(sgrp_data->gid, data); + if (!ret) + sgrp_data->enable = data; + } + + if (!ret) + ret = count; + + mutex_unlock(&sgrp_data->mutex); + return ret; +} + static ssize_t show_label(struct device *dev, struct device_attribute *devattr, char *buf) { @@ -292,12 +344,115 @@ static u32 get_sensor_hwmon_index(struct sensor_data *sdata, return ++sensor_groups[sdata->type].hwmon_index; } +static int init_sensor_group_data(struct platform_device *pdev, + struct platform_data *pdata) +{ + struct sensor_group_data *sgrp_data; + struct device_node *groups, *sgrp; + int count = 0, ret = 0; + enum sensors type; + + groups = of_find_compatible_node(NULL, NULL, "ibm,opal-sensor-group"); + if (!groups) + return ret; + + for_each_child_of_node(groups, sgrp) { + type = get_sensor_type(sgrp); + if (type != MAX_SENSOR_TYPE) + pdata->nr_sensor_groups++; + } + + if (!pdata->nr_sensor_groups) + goto out; + + sgrp_data = devm_kcalloc(&pdev->dev, pdata->nr_sensor_groups, + sizeof(*sgrp_data), GFP_KERNEL); + if (!sgrp_data) { + ret = -ENOMEM; + goto out; + } + + for_each_child_of_node(groups, sgrp) { + u32 gid; + + type = get_sensor_type(sgrp); + if (type == MAX_SENSOR_TYPE) + continue; + + if (of_property_read_u32(sgrp, "sensor-group-id", &gid)) + continue; + + if (of_count_phandle_with_args(sgrp, "sensors", NULL) <= 0) + continue; + + sensor_groups[type].attr_count++; + sgrp_data[count].gid = gid; + mutex_init(&sgrp_data[count].mutex); + sgrp_data[count++].enable = false; + } + + pdata->sgrp_data = sgrp_data; +out: + of_node_put(groups); + return ret; +} + +static struct sensor_group_data *get_sensor_group(struct platform_data *pdata, + struct device_node *node, + enum sensors gtype) +{ + struct sensor_group_data *sgrp_data = pdata->sgrp_data; + struct device_node *groups, *sgrp; + + groups = of_find_compatible_node(NULL, NULL, "ibm,opal-sensor-group"); + if (!groups) + return NULL; + + for_each_child_of_node(groups, sgrp) { + struct of_phandle_iterator it; + u32 gid; + int rc, i; + enum sensors type; + + type = get_sensor_type(sgrp); + if (type != gtype) + continue; + + if (of_property_read_u32(sgrp, "sensor-group-id", &gid)) + continue; + + of_for_each_phandle(&it, rc, sgrp, "sensors", NULL, 0) + if (it.phandle == node->phandle) { + of_node_put(it.node); + break; + } + + if (rc) + continue; + + for (i = 0; i < pdata->nr_sensor_groups; i++) + if (gid == sgrp_data[i].gid) { + of_node_put(sgrp); + of_node_put(groups); + return &sgrp_data[i]; + } + } + + of_node_put(groups); + return NULL; +} + static int populate_attr_groups(struct platform_device *pdev) { struct platform_data *pdata = platform_get_drvdata(pdev); const struct attribute_group **pgroups = pdata->attr_groups; struct device_node *opal, *np; enum sensors type; + int ret; + + ret = init_sensor_group_data(pdev, pdata); + if (ret) + return ret; opal = of_find_node_by_path("/ibm,opal/sensors"); for_each_child_of_node(opal, np) { @@ -344,7 +499,10 @@ static int populate_attr_groups(struct platform_device *pdev) static void create_hwmon_attr(struct sensor_data *sdata, const char *attr_name, ssize_t (*show)(struct device *dev, struct device_attribute *attr, - char *buf)) + char *buf), + ssize_t (*store)(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count)) { snprintf(sdata->name, MAX_ATTR_LEN, "%s%d_%s", sensor_groups[sdata->type].name, sdata->hwmon_index, @@ -352,23 +510,33 @@ static void create_hwmon_attr(struct sensor_data *sdata, const char *attr_name, sysfs_attr_init(&sdata->dev_attr.attr); sdata->dev_attr.attr.name = sdata->name; - sdata->dev_attr.attr.mode = S_IRUGO; sdata->dev_attr.show = show; + if (store) { + sdata->dev_attr.store = store; + sdata->dev_attr.attr.mode = 0664; + } else { + sdata->dev_attr.attr.mode = 0444; + } } static void populate_sensor(struct sensor_data *sdata, int od, int hd, int sid, const char *attr_name, enum sensors type, const struct attribute_group *pgroup, + struct sensor_group_data *sgrp_data, ssize_t (*show)(struct device *dev, struct device_attribute *attr, - char *buf)) + char *buf), + ssize_t (*store)(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count)) { sdata->id = sid; sdata->type = type; sdata->opal_index = od; sdata->hwmon_index = hd; - create_hwmon_attr(sdata, attr_name, show); + create_hwmon_attr(sdata, attr_name, show, store); pgroup->attrs[sensor_groups[type].attr_count++] = &sdata->dev_attr.attr; + sdata->sgrp_data = sgrp_data; } static char *get_max_attr(enum sensors type) @@ -403,24 +571,23 @@ static int create_device_attrs(struct platform_device *pdev) const struct attribute_group **pgroups = pdata->attr_groups; struct device_node *opal, *np; struct sensor_data *sdata; - u32 sensor_id; - enum sensors type; u32 count = 0; - int err = 0; + u32 group_attr_id[MAX_SENSOR_TYPE] = {0}; - opal = of_find_node_by_path("/ibm,opal/sensors"); sdata = devm_kcalloc(&pdev->dev, pdata->sensors_count, sizeof(*sdata), GFP_KERNEL); - if (!sdata) { - err = -ENOMEM; - goto exit_put_node; - } + if (!sdata) + return -ENOMEM; + opal = of_find_node_by_path("/ibm,opal/sensors"); for_each_child_of_node(opal, np) { + struct sensor_group_data *sgrp_data; const char *attr_name; - u32 opal_index; + u32 opal_index, hw_id; + u32 sensor_id; const char *label; + enum sensors type; if (np->name == NULL) continue; @@ -456,14 +623,12 @@ static int create_device_attrs(struct platform_device *pdev) opal_index = INVALID_INDEX; } - sdata[count].opal_index = opal_index; - sdata[count].hwmon_index = - get_sensor_hwmon_index(&sdata[count], sdata, count); - - create_hwmon_attr(&sdata[count], attr_name, show_sensor); - - pgroups[type]->attrs[sensor_groups[type].attr_count++] = - &sdata[count++].dev_attr.attr; + hw_id = get_sensor_hwmon_index(&sdata[count], sdata, count); + sgrp_data = get_sensor_group(pdata, np, type); + populate_sensor(&sdata[count], opal_index, hw_id, sensor_id, + attr_name, type, pgroups[type], sgrp_data, + show_sensor, NULL); + count++; if (!of_property_read_string(np, "label", &label)) { /* @@ -474,35 +639,43 @@ static int create_device_attrs(struct platform_device *pdev) */ make_sensor_label(np, &sdata[count], label); - populate_sensor(&sdata[count], opal_index, - sdata[count - 1].hwmon_index, + populate_sensor(&sdata[count], opal_index, hw_id, sensor_id, "label", type, pgroups[type], - show_label); + NULL, show_label, NULL); count++; } if (!of_property_read_u32(np, "sensor-data-max", &sensor_id)) { attr_name = get_max_attr(type); - populate_sensor(&sdata[count], opal_index, - sdata[count - 1].hwmon_index, + populate_sensor(&sdata[count], opal_index, hw_id, sensor_id, attr_name, type, - pgroups[type], show_sensor); + pgroups[type], sgrp_data, show_sensor, + NULL); count++; } if (!of_property_read_u32(np, "sensor-data-min", &sensor_id)) { attr_name = get_min_attr(type); - populate_sensor(&sdata[count], opal_index, - sdata[count - 1].hwmon_index, + populate_sensor(&sdata[count], opal_index, hw_id, sensor_id, attr_name, type, - pgroups[type], show_sensor); + pgroups[type], sgrp_data, show_sensor, + NULL); + count++; + } + + if (sgrp_data && !sgrp_data->enable) { + sgrp_data->enable = true; + hw_id = ++group_attr_id[type]; + populate_sensor(&sdata[count], opal_index, hw_id, + sgrp_data->gid, "enable", type, + pgroups[type], sgrp_data, show_enable, + store_enable); count++; } } -exit_put_node: of_node_put(opal); - return err; + return 0; } static int ibmpowernv_probe(struct platform_device *pdev) @@ -517,6 +690,7 @@ static int ibmpowernv_probe(struct platform_device *pdev) platform_set_drvdata(pdev, pdata); pdata->sensors_count = 0; + pdata->nr_sensor_groups = 0; err = populate_attr_groups(pdev); if (err) return err; -- cgit From b87b9cf4935325c98522823caeddd333022a1c62 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Mon, 30 Jul 2018 11:59:14 +1000 Subject: powerpc/pseries: fix EEH recovery of some IOV devices EEH recovery currently fails on pSeries for some IOV capable PCI devices, if CONFIG_PCI_IOV is on and the hypervisor doesn't provide certain device tree properties for the device. (Found on an IOV capable device using the ipr driver.) Recovery fails in pci_enable_resources() at the check on r->parent, because r->flags is set and r->parent is not. This state is due to sriov_init() setting the start, end and flags members of the IOV BARs but the parent not being set later in pseries_pci_fixup_iov_resources(), because the "ibm,open-sriov-vf-bar-info" property is missing. Correct this by zeroing the resource flags for IOV BARs when they can't be configured (this is the same method used by sriov_init() and __pci_read_base()). VFs cleared this way can't be enabled later, because that requires another device tree property, "ibm,number-of-configurable-vfs" as well as support for the RTAS function "ibm_map_pes". These are all part of hypervisor support for IOV and it seems unlikely that a hypervisor would ever partially, but not fully, support it. (None are currently provided by QEMU/KVM.) Signed-off-by: Sam Bobroff Reviewed-by: Bryant G. Ly Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/setup.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index e14ccf32a97d..9948ad16f788 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -647,6 +647,15 @@ void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes) } } +static void pseries_disable_sriov_resources(struct pci_dev *pdev) +{ + int i; + + pci_warn(pdev, "No hypervisor support for SR-IOV on this device, IOV BARs disabled.\n"); + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) + pdev->resource[i + PCI_IOV_RESOURCES].flags = 0; +} + static void pseries_pci_fixup_resources(struct pci_dev *pdev) { const int *indexes; @@ -654,10 +663,10 @@ static void pseries_pci_fixup_resources(struct pci_dev *pdev) /*Firmware must support open sriov otherwise dont configure*/ indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); - if (!indexes) - return; - /* Assign the addresses from device tree*/ - of_pci_set_vf_bar_size(pdev, indexes); + if (indexes) + of_pci_set_vf_bar_size(pdev, indexes); + else + pseries_disable_sriov_resources(pdev); } static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev) @@ -669,10 +678,10 @@ static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev) return; /*Firmware must support open sriov otherwise dont configure*/ indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); - if (!indexes) - return; - /* Assign the addresses from device tree*/ - of_pci_parse_iov_addrs(pdev, indexes); + if (indexes) + of_pci_parse_iov_addrs(pdev, indexes); + else + pseries_disable_sriov_resources(pdev); } static resource_size_t pseries_pci_iov_resource_alignment(struct pci_dev *pdev, -- cgit From 3127692deba6eeb7ed6d416b25e91f179e17be8f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 3 Aug 2018 01:39:51 +1000 Subject: powernv/cpuidle: Fix idle states all being marked invalid Commit 9c7b185ab2fe ("powernv/cpuidle: Parse dt idle properties into global structure") parses dt idle states into structs, but never marks them valid. This results in all idle states being lost. Fixes: 9c7b185ab2fe ("powernv/cpuidle: Parse dt idle properties into global structure") Signed-off-by: Nicholas Piggin Acked-by: Akshay Adiga Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/idle.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 3116bab10aa3..ecb002c5db83 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -651,11 +651,12 @@ static int __init pnv_power9_idle_init(void) &state->psscr_mask, state->flags); if (err) { - state->valid = false; report_invalid_psscr_val(state->psscr_val, err); continue; } + state->valid = true; + if (max_residency_ns < state->residency_ns) { max_residency_ns = state->residency_ns; pnv_deepest_stop_psscr_val = state->psscr_val; -- cgit From 6e0495c2e8ac39b1aad0a4588fe64413ce9028c0 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 30 Jul 2018 18:44:14 -0700 Subject: powerpc/4xx: Fix error return path in ppc4xx_msi_probe() An arbitrary error in ppc4xx_msi_probe() quite likely results in a crash similar to the following, seen after dma_alloc_coherent() returned an error. Unable to handle kernel paging request for data at address 0x00000000 Faulting instruction address: 0xc001bff0 Oops: Kernel access of bad area, sig: 11 [#1] BE Canyonlands Modules linked in: CPU: 0 PID: 1 Comm: swapper Tainted: G W 4.18.0-rc6-00010-gff33d1030a6c #1 NIP: c001bff0 LR: c001c418 CTR: c01faa7c REGS: cf82db40 TRAP: 0300 Tainted: G W (4.18.0-rc6-00010-gff33d1030a6c) MSR: 00029000 CR: 28002024 XER: 00000000 DEAR: 00000000 ESR: 00000000 GPR00: c001c418 cf82dbf0 cf828000 cf8de400 00000000 00000000 000000c4 000000c4 GPR08: c0481ea4 00000000 00000000 000000c4 22002024 00000000 c00025e8 00000000 GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 c0492380 0000004a GPR24: 00029000 0000000c 00000000 cf8de410 c0494d60 c0494d60 cf8bebc0 00000001 NIP [c001bff0] ppc4xx_of_msi_remove+0x48/0xa0 LR [c001c418] ppc4xx_msi_probe+0x294/0x3b8 Call Trace: [cf82dbf0] [00029000] 0x29000 (unreliable) [cf82dc10] [c001c418] ppc4xx_msi_probe+0x294/0x3b8 [cf82dc70] [c0209fbc] platform_drv_probe+0x40/0x9c [cf82dc90] [c0208240] driver_probe_device+0x2a8/0x350 [cf82dcc0] [c0206204] bus_for_each_drv+0x60/0xac [cf82dcf0] [c0207e88] __device_attach+0xe8/0x160 [cf82dd20] [c02071e0] bus_probe_device+0xa0/0xbc [cf82dd40] [c02050c8] device_add+0x404/0x5c4 [cf82dd90] [c0288978] of_platform_device_create_pdata+0x88/0xd8 [cf82ddb0] [c0288b70] of_platform_bus_create+0x134/0x220 [cf82de10] [c0288bcc] of_platform_bus_create+0x190/0x220 [cf82de70] [c0288cf4] of_platform_bus_probe+0x98/0xec [cf82de90] [c0449650] __machine_initcall_canyonlands_ppc460ex_device_probe+0x38/0x54 [cf82dea0] [c0002404] do_one_initcall+0x40/0x188 [cf82df00] [c043daec] kernel_init_freeable+0x130/0x1d0 [cf82df30] [c0002600] kernel_init+0x18/0x104 [cf82df40] [c000c23c] ret_from_kernel_thread+0x14/0x1c Instruction dump: 90010024 813d0024 2f890000 83c30058 41bd0014 48000038 813d0024 7f89f800 409d002c 813e000c 57ea103a 3bff0001 <7c69502e> 2f830000 419effe0 4803b26d ---[ end trace 8cf551077ecfc42a ]--- Fix it up. Specifically, - Return valid error codes from ppc4xx_setup_pcieh_hw(), have it clean up after itself, and only access hardware after all possible error conditions have been handled. - Use devm_kzalloc() instead of kzalloc() in ppc4xx_msi_probe() Signed-off-by: Guenter Roeck Reviewed-by: Christoph Hellwig Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/4xx/msi.c | 51 +++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/platforms/4xx/msi.c b/arch/powerpc/platforms/4xx/msi.c index 81b2cbce7df8..7c324eff2f22 100644 --- a/arch/powerpc/platforms/4xx/msi.c +++ b/arch/powerpc/platforms/4xx/msi.c @@ -146,13 +146,19 @@ static int ppc4xx_setup_pcieh_hw(struct platform_device *dev, const u32 *sdr_addr; dma_addr_t msi_phys; void *msi_virt; + int err; sdr_addr = of_get_property(dev->dev.of_node, "sdr-base", NULL); if (!sdr_addr) - return -1; + return -EINVAL; - mtdcri(SDR0, *sdr_addr, upper_32_bits(res.start)); /*HIGH addr */ - mtdcri(SDR0, *sdr_addr + 1, lower_32_bits(res.start)); /* Low addr */ + msi_data = of_get_property(dev->dev.of_node, "msi-data", NULL); + if (!msi_data) + return -EINVAL; + + msi_mask = of_get_property(dev->dev.of_node, "msi-mask", NULL); + if (!msi_mask) + return -EINVAL; msi->msi_dev = of_find_node_by_name(NULL, "ppc4xx-msi"); if (!msi->msi_dev) @@ -160,30 +166,30 @@ static int ppc4xx_setup_pcieh_hw(struct platform_device *dev, msi->msi_regs = of_iomap(msi->msi_dev, 0); if (!msi->msi_regs) { - dev_err(&dev->dev, "of_iomap problem failed\n"); - return -ENOMEM; + dev_err(&dev->dev, "of_iomap failed\n"); + err = -ENOMEM; + goto node_put; } dev_dbg(&dev->dev, "PCIE-MSI: msi register mapped 0x%x 0x%x\n", (u32) (msi->msi_regs + PEIH_TERMADH), (u32) (msi->msi_regs)); msi_virt = dma_alloc_coherent(&dev->dev, 64, &msi_phys, GFP_KERNEL); - if (!msi_virt) - return -ENOMEM; + if (!msi_virt) { + err = -ENOMEM; + goto iounmap; + } msi->msi_addr_hi = upper_32_bits(msi_phys); msi->msi_addr_lo = lower_32_bits(msi_phys & 0xffffffff); dev_dbg(&dev->dev, "PCIE-MSI: msi address high 0x%x, low 0x%x\n", msi->msi_addr_hi, msi->msi_addr_lo); + mtdcri(SDR0, *sdr_addr, upper_32_bits(res.start)); /*HIGH addr */ + mtdcri(SDR0, *sdr_addr + 1, lower_32_bits(res.start)); /* Low addr */ + /* Progam the Interrupt handler Termination addr registers */ out_be32(msi->msi_regs + PEIH_TERMADH, msi->msi_addr_hi); out_be32(msi->msi_regs + PEIH_TERMADL, msi->msi_addr_lo); - msi_data = of_get_property(dev->dev.of_node, "msi-data", NULL); - if (!msi_data) - return -1; - msi_mask = of_get_property(dev->dev.of_node, "msi-mask", NULL); - if (!msi_mask) - return -1; /* Program MSI Expected data and Mask bits */ out_be32(msi->msi_regs + PEIH_MSIED, *msi_data); out_be32(msi->msi_regs + PEIH_MSIMK, *msi_mask); @@ -191,6 +197,12 @@ static int ppc4xx_setup_pcieh_hw(struct platform_device *dev, dma_free_coherent(&dev->dev, 64, msi_virt, msi_phys); return 0; + +iounmap: + iounmap(msi->msi_regs); +node_put: + of_node_put(msi->msi_dev); + return err; } static int ppc4xx_of_msi_remove(struct platform_device *dev) @@ -209,7 +221,6 @@ static int ppc4xx_of_msi_remove(struct platform_device *dev) msi_bitmap_free(&msi->bitmap); iounmap(msi->msi_regs); of_node_put(msi->msi_dev); - kfree(msi); return 0; } @@ -223,18 +234,16 @@ static int ppc4xx_msi_probe(struct platform_device *dev) dev_dbg(&dev->dev, "PCIE-MSI: Setting up MSI support...\n"); - msi = kzalloc(sizeof(*msi), GFP_KERNEL); - if (!msi) { - dev_err(&dev->dev, "No memory for MSI structure\n"); + msi = devm_kzalloc(&dev->dev, sizeof(*msi), GFP_KERNEL); + if (!msi) return -ENOMEM; - } dev->dev.platform_data = msi; /* Get MSI ranges */ err = of_address_to_resource(dev->dev.of_node, 0, &res); if (err) { dev_err(&dev->dev, "%pOF resource error!\n", dev->dev.of_node); - goto error_out; + return err; } msi_irqs = of_irq_count(dev->dev.of_node); @@ -243,7 +252,7 @@ static int ppc4xx_msi_probe(struct platform_device *dev) err = ppc4xx_setup_pcieh_hw(dev, res, msi); if (err) - goto error_out; + return err; err = ppc4xx_msi_init_allocator(dev, msi); if (err) { @@ -256,7 +265,7 @@ static int ppc4xx_msi_probe(struct platform_device *dev) phb->controller_ops.setup_msi_irqs = ppc4xx_setup_msi_irqs; phb->controller_ops.teardown_msi_irqs = ppc4xx_teardown_msi_irqs; } - return err; + return 0; error_out: ppc4xx_of_msi_remove(dev); -- cgit From 06832fc004815b4b43628d21fc81715e8e7cadff Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 30 Jul 2018 09:37:21 +0200 Subject: powerpc: Do not redefine NEED_DMA_MAP_STATE kernel/dma/Kconfig already defines NEED_DMA_MAP_STATE, just select it from CONFIG_PPC using the same condition as an if guard. Signed-off-by: Christoph Hellwig [mpe: Move it under PPC] Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 5eb4d969afbf..ee38fce075ee 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -226,6 +226,7 @@ config PPC select IRQ_DOMAIN select IRQ_FORCED_THREADING select MODULES_USE_ELF_RELA + select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE select NEED_SG_DMA_LENGTH select NO_BOOTMEM select OF @@ -885,9 +886,6 @@ config ZONE_DMA bool default y -config NEED_DMA_MAP_STATE - def_bool (PPC64 || NOT_COHERENT_CACHE) - config GENERIC_ISA_DMA bool depends on ISA_DMA_API -- cgit From 9eab9901b015f489199105c470de1ffc337cfabb Mon Sep 17 00:00:00 2001 From: Reza Arbab Date: Thu, 2 Aug 2018 23:03:36 -0500 Subject: powerpc/powernv: Fix concurrency issue with npu->mmio_atsd_usage We've encountered a performance issue when multiple processors stress {get,put}_mmio_atsd_reg(). These functions contend for mmio_atsd_usage, an unsigned long used as a bitmask. The accesses to mmio_atsd_usage are done using test_and_set_bit_lock() and clear_bit_unlock(). As implemented, both of these will require a (successful) stwcx to that same cache line. What we end up with is thread A, attempting to unlock, being slowed by other threads repeatedly attempting to lock. A's stwcx instructions fail and retry because the memory reservation is lost every time a different thread beats it to the punch. There may be a long-term way to fix this at a larger scale, but for now resolve the immediate problem by gating our call to test_and_set_bit_lock() with one to test_bit(), which is obviously implemented without using a store. Fixes: 1ab66d1fbada ("powerpc/powernv: Introduce address translation services for Nvlink2") Signed-off-by: Reza Arbab Acked-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/npu-dma.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 458315319bd3..8006c54a91e3 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -440,8 +440,9 @@ static int get_mmio_atsd_reg(struct npu *npu) int i; for (i = 0; i < npu->mmio_atsd_count; i++) { - if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage)) - return i; + if (!test_bit(i, &npu->mmio_atsd_usage)) + if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage)) + return i; } return -ENOSPC; -- cgit From 4da1f79227ad42550e3e70e7b4812ec019be3b6e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 6 Aug 2018 20:30:49 +1000 Subject: powerpc/64: Disable irq restore warning for now We recently added a warning in arch_local_irq_restore() to check that the soft masking state matches reality. Unfortunately it trips in a few places, which are not entirely trivial to fix. The key problem is if we're doing function_graph tracing of restore_math(), the warning pops and then seems to recurse. It's not entirely clear because the system continuously oopses on all CPUs, with the output interleaved and unreadable. It's also been observed on a G5 coming out of idle. Until we can fix those cases disable the warning for now. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/irq.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index ca941f1e83a9..916ddc4aac44 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -261,9 +261,16 @@ notrace void arch_local_irq_restore(unsigned long mask) */ irq_happened = get_irq_happened(); if (!irq_happened) { -#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - WARN_ON(!(mfmsr() & MSR_EE)); -#endif + /* + * FIXME. Here we'd like to be able to do: + * + * #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + * WARN_ON(!(mfmsr() & MSR_EE)); + * #endif + * + * But currently it hits in a few paths, we should fix those and + * enable the warning. + */ return; } -- cgit From 7cd129b4b5370030a5c0b8031a54b2d1d92fa546 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Jul 2018 00:03:46 +1000 Subject: powerpc: Add a checkpatch wrapper with our preferred settings This makes it easy to run checkpatch with settings that I like. Usage is eg: $ ./arch/powerpc/tools/checkpatch.sh -g origin/master.. To check all commits since origin/master. Signed-off-by: Michael Ellerman Reviewed-by: Russell Currey --- arch/powerpc/tools/checkpatch.sh | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100755 arch/powerpc/tools/checkpatch.sh diff --git a/arch/powerpc/tools/checkpatch.sh b/arch/powerpc/tools/checkpatch.sh new file mode 100755 index 000000000000..1fad3fb90e7c --- /dev/null +++ b/arch/powerpc/tools/checkpatch.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# Copyright 2018, Michael Ellerman, IBM Corporation. +# +# Wrapper around checkpatch that uses our preferred settings + +script_base=$(realpath $(dirname $0)) + +exec $script_base/../../../scripts/checkpatch.pl \ + --subjective \ + --no-summary \ + --max-line-length=90 \ + --show-types \ + --ignore ARCH_INCLUDE_LINUX \ + --ignore BIT_MACRO \ + --ignore COMPARISON_TO_NULL \ + --ignore EMAIL_SUBJECT \ + --ignore FILE_PATH_CHANGES \ + --ignore GLOBAL_INITIALISERS \ + --ignore LINE_SPACING \ + --ignore MULTIPLE_ASSIGNMENTS \ + $@ -- cgit From 95f9b3af401f5b4daeb908a2c658e820e969f4e3 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 22:24:57 +1000 Subject: selftests/powerpc: Add a helper for checking if we're on ppc64le Some of our selftests have only been tested on ppc64le and crash or behave weirdly on ppc64/ppc32. So add a helper for checking the UTS machine. Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/include/utils.h | 2 ++ tools/testing/selftests/powerpc/utils.c | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index 735815b3ad7f..c58c370828b4 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -48,6 +48,8 @@ static inline bool have_hwcap2(unsigned long ftr2) } #endif +bool is_ppc64le(void); + /* Yes, this is evil */ #define FAIL_IF(x) \ do { \ diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c index d46916867a6f..aa8fc1e6365b 100644 --- a/tools/testing/selftests/powerpc/utils.c +++ b/tools/testing/selftests/powerpc/utils.c @@ -11,8 +11,10 @@ #include #include #include +#include #include #include +#include #include #include "utils.h" @@ -104,3 +106,18 @@ int pick_online_cpu(void) printf("No cpus in affinity mask?!\n"); return -1; } + +bool is_ppc64le(void) +{ + struct utsname uts; + int rc; + + errno = 0; + rc = uname(&uts); + if (rc) { + perror("uname"); + return false; + } + + return strcmp(uts.machine, "ppc64le") == 0; +} -- cgit From 1cdc6c14b0ddef699c947babd93992a8c0e874ed Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 22:24:58 +1000 Subject: selftests/powerpc: Only run some tests on ppc64le These tests are currently failing on (some) big endian systems. Until we can fix that, skip them unless we're on ppc64le. Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/tm/tm-sigreturn.c | 1 + tools/testing/selftests/powerpc/tm/tm-tar.c | 1 + tools/testing/selftests/powerpc/tm/tm-vmxcopy.c | 1 + 3 files changed, 3 insertions(+) diff --git a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c index 85d63449243b..9a6017a1d769 100644 --- a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c +++ b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c @@ -55,6 +55,7 @@ int tm_sigreturn(void) uint64_t ret = 0; SKIP_IF(!have_htm()); + SKIP_IF(!is_ppc64le()); memset(&sa, 0, sizeof(sa)); sa.sa_handler = handler; diff --git a/tools/testing/selftests/powerpc/tm/tm-tar.c b/tools/testing/selftests/powerpc/tm/tm-tar.c index 2d2fcc2b7a60..f31fe5a28ddb 100644 --- a/tools/testing/selftests/powerpc/tm/tm-tar.c +++ b/tools/testing/selftests/powerpc/tm/tm-tar.c @@ -26,6 +26,7 @@ int test_tar(void) int i; SKIP_IF(!have_htm()); + SKIP_IF(!is_ppc64le()); for (i = 0; i < num_loops; i++) { diff --git a/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c index 0274de7b11f3..fe52811584ae 100644 --- a/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c +++ b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c @@ -46,6 +46,7 @@ int test_vmxcopy() uint64_t aborted = 0; SKIP_IF(!have_htm()); + SKIP_IF(!is_ppc64le()); fd = mkstemp(tmpfile); assert(fd >= 0); -- cgit From d97e7f198b10ea9d17ea64606f18102260870782 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 22:24:59 +1000 Subject: selftests/powerpc: Give some tests longer to run Some of these long running tests can time out on heavily loaded systems, give them longer to run. Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/benchmarks/futex_bench.c | 1 + tools/testing/selftests/powerpc/benchmarks/mmap_bench.c | 2 ++ tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c | 1 + tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c | 1 + tools/testing/selftests/powerpc/stringloops/memcmp.c | 1 + 5 files changed, 6 insertions(+) diff --git a/tools/testing/selftests/powerpc/benchmarks/futex_bench.c b/tools/testing/selftests/powerpc/benchmarks/futex_bench.c index 2fc711d9150d..d58e4dc50fcd 100644 --- a/tools/testing/selftests/powerpc/benchmarks/futex_bench.c +++ b/tools/testing/selftests/powerpc/benchmarks/futex_bench.c @@ -38,5 +38,6 @@ int test_futex(void) int main(void) { + test_harness_set_timeout(300); return test_harness(test_futex, "futex_bench"); } diff --git a/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c index 7a0a462a2272..033de0560d99 100644 --- a/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c +++ b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c @@ -84,5 +84,7 @@ int main(int argc, char *argv[]) exit(1); } } + + test_harness_set_timeout(300); return test_harness(test_mmap, "mmap_bench"); } diff --git a/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c b/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c index ae9a79086111..35a3426e341c 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c @@ -162,5 +162,6 @@ int instruction_count(void) int main(void) { + test_harness_set_timeout(300); return test_harness(instruction_count, "instruction_count"); } diff --git a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c index eb8acb78bc6c..2ed7ad33f7a3 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c @@ -98,5 +98,6 @@ static int lost_exception(void) int main(void) { + test_harness_set_timeout(300); return test_harness(lost_exception, "lost_exception"); } diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c index b5cf71727b2d..b1fa7546957f 100644 --- a/tools/testing/selftests/powerpc/stringloops/memcmp.c +++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c @@ -154,5 +154,6 @@ static int testcases(void) int main(void) { + test_harness_set_timeout(300); return test_harness(testcases, "memcmp"); } -- cgit From 5a6099346c41792f1ba23aea6f74ad816e7fecd4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 25 Jul 2018 23:58:06 +1000 Subject: powerpc/64s/radix: tlb do not flush on page size when fullmm When the mm is being torn down there will be a full PID flush so there is no need to flush the TLB on page size changes. Signed-off-by: Nicholas Piggin Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/tlb.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index 6d2ba7c779dc..97ecef697e1b 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -49,6 +49,9 @@ static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, unsigned int page_size) { + if (tlb->fullmm) + return; + if (!tlb->page_size) tlb->page_size = page_size; else if (tlb->page_size != page_size) { -- cgit From 34c604d27590fdc9a2c944be8c50ae1fc80f5f25 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 25 Jul 2018 19:54:28 +1000 Subject: powerpc/64s: free page table caches at exit_mmap time The kernel page table caches are tied to init_mm, so there is no more need for them after userspace is finished. destroy_context() gets called when we drop the last reference for an mm, which can be much later than the task exit due to other lazy mm references to it. We can free the page table cache pages on task exit because they only cache the userspace page tables and kernel threads should not access user space addresses. The mapping for kernel threads itself is maintained in init_mm and page table cache for that is attached to init_mm. Signed-off-by: Nicholas Piggin Reviewed-by: Aneesh Kumar K.V [mpe: Merge change log additions from Aneesh] Signed-off-by: Michael Ellerman --- arch/powerpc/mm/mmu_context_book3s64.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 39e9ef0eb78b..8b24168ea8c4 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -221,7 +221,7 @@ static void pmd_frag_destroy(void *pmd_frag) } } -static void destroy_pagetable_page(struct mm_struct *mm) +static void destroy_pagetable_cache(struct mm_struct *mm) { void *frag; @@ -244,13 +244,14 @@ void destroy_context(struct mm_struct *mm) WARN_ON(process_tb[mm->context.id].prtb0 != 0); else subpage_prot_free(mm); - destroy_pagetable_page(mm); destroy_contexts(&mm->context); mm->context.id = MMU_NO_CONTEXT; } void arch_exit_mmap(struct mm_struct *mm) { + destroy_pagetable_cache(mm); + if (radix_enabled()) { /* * Radix doesn't have a valid bit in the process table -- cgit From dbc5740247961d6b060737620520dc5435926ffe Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 11 Apr 2018 15:18:00 +1000 Subject: powerpc/xive: Remove now useless pr_debug statements Those overly verbose statement in the setup of the pool VP aren't particularly useful (esp. considering we don't actually use the pool, we configure it bcs HW requires it only). So remove them which improves the code readability. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/xive/native.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 39ab5ad58297..5b20a678d755 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -395,7 +395,6 @@ static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc) /* Enable the pool VP */ vp = xive_pool_vps + cpu; - pr_debug("CPU %d setting up pool VP 0x%x\n", cpu, vp); for (;;) { rc = opal_xive_set_vp_info(vp, OPAL_XIVE_VP_ENABLED, 0); if (rc != OPAL_BUSY) @@ -415,16 +414,9 @@ static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc) } vp_cam = be64_to_cpu(vp_cam_be); - pr_debug("VP CAM = %llx\n", vp_cam); - /* Push it on the CPU (set LSMFB to 0xff to skip backlog scan) */ - pr_debug("(Old HW value: %08x)\n", - in_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2)); out_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD0, 0xff); - out_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2, - TM_QW2W2_VP | vp_cam); - pr_debug("(New HW value: %08x)\n", - in_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2)); + out_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2, TM_QW2W2_VP | vp_cam); } static void xive_native_teardown_cpu(unsigned int cpu, struct xive_cpu *xc) -- cgit From e27e0a94651e30912443e88642e698240e01cd57 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 11 Apr 2018 15:18:01 +1000 Subject: powerpc/xive: Remove xive_kexec_teardown_cpu() It's identical to xive_teardown_cpu() so just use the latter Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/xive.h | 1 - arch/powerpc/platforms/powernv/setup.c | 2 +- arch/powerpc/platforms/pseries/kexec.c | 2 +- arch/powerpc/sysdev/xive/common.c | 22 ---------------------- 4 files changed, 2 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 8d1a2792484f..3c704f5dd3ae 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -87,7 +87,6 @@ extern int xive_smp_prepare_cpu(unsigned int cpu); extern void xive_smp_setup_cpu(void); extern void xive_smp_disable_cpu(void); extern void xive_teardown_cpu(void); -extern void xive_kexec_teardown_cpu(int secondary); extern void xive_shutdown(void); extern void xive_flush_interrupt(void); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index f96df0a25d05..7b09f66ee8c6 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -314,7 +314,7 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) u64 reinit_flags; if (xive_enabled()) - xive_kexec_teardown_cpu(secondary); + xive_teardown_cpu(); else xics_kexec_teardown_cpu(secondary); diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c index 46fbaef69a59..23f54223ed56 100644 --- a/arch/powerpc/platforms/pseries/kexec.c +++ b/arch/powerpc/platforms/pseries/kexec.c @@ -58,7 +58,7 @@ void pseries_kexec_cpu_down(int crash_shutdown, int secondary) } if (xive_enabled()) { - xive_kexec_teardown_cpu(secondary); + xive_teardown_cpu(); if (!secondary) xive_shutdown(); diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 4758173df426..e8f5b0551095 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1408,28 +1408,6 @@ void xive_teardown_cpu(void) xive_cleanup_cpu_queues(cpu, xc); } -void xive_kexec_teardown_cpu(int secondary) -{ - struct xive_cpu *xc = __this_cpu_read(xive_cpu); - unsigned int cpu = smp_processor_id(); - - /* Set CPPR to 0 to disable flow of interrupts */ - xc->cppr = 0; - out_8(xive_tima + xive_tima_offset + TM_CPPR, 0); - - /* Backend cleanup if any */ - if (xive_ops->teardown_cpu) - xive_ops->teardown_cpu(cpu, xc); - -#ifdef CONFIG_SMP - /* Get rid of IPI */ - xive_cleanup_cpu_ipi(cpu, xc); -#endif - - /* Disable and free the queues */ - xive_cleanup_cpu_queues(cpu, xc); -} - void xive_shutdown(void) { xive_ops->shutdown(); -- cgit From 74e96bf44f430cf7a01de19ba6cf49b361cdfd6e Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 4 Jul 2018 23:27:02 +0530 Subject: powerpc/pseries: Avoid using the size greater than RTAS_ERROR_LOG_MAX. The global mce data buffer that used to copy rtas error log is of 2048 (RTAS_ERROR_LOG_MAX) bytes in size. Before the copy we read extended_log_length from rtas error log header, then use max of extended_log_length and RTAS_ERROR_LOG_MAX as a size of data to be copied. Ideally the platform (phyp) will never send extended error log with size > 2048. But if that happens, then we have a risk of buffer overrun and corruption. Fix this by using min_t instead. Fixes: d368514c3097 ("powerpc: Fix corruption when grabbing FWNMI data") Reported-by: Michal Suchanek Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/ras.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 5e1ef9150182..ef104144d4bc 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -371,7 +371,7 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) int len, error_log_length; error_log_length = 8 + rtas_error_extended_log_length(h); - len = max_t(int, error_log_length, RTAS_ERROR_LOG_MAX); + len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX); memset(global_mce_data_buf, 0, RTAS_ERROR_LOG_MAX); memcpy(global_mce_data_buf, h, len); errhdr = (struct rtas_error_log *)global_mce_data_buf; -- cgit From 94675cceacaec27a30eefb142c4c59a9d3131742 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 4 Jul 2018 23:27:21 +0530 Subject: powerpc/pseries: Defer the logging of rtas error to irq work queue. rtas_log_buf is a buffer to hold RTAS event data that are communicated to kernel by hypervisor. This buffer is then used to pass RTAS event data to user through proc fs. This buffer is allocated from vmalloc (non-linear mapping) area. On Machine check interrupt, register r3 points to RTAS extended event log passed by hypervisor that contains the MCE event. The pseries machine check handler then logs this error into rtas_log_buf. The rtas_log_buf is a vmalloc-ed (non-linear) buffer we end up taking up a page fault (vector 0x300) while accessing it. Since machine check interrupt handler runs in NMI context we can not afford to take any page fault. Page faults are not honored in NMI context and causes kernel panic. Apart from that, as Nick pointed out, pSeries_log_error() also takes a spin_lock while logging error which is not safe in NMI context. It may endup in deadlock if we get another MCE before releasing the lock. Fix this by deferring the logging of rtas error to irq work queue. Current implementation uses two different buffers to hold rtas error log depending on whether extended log is provided or not. This makes bit difficult to identify which buffer has valid data that needs to logged later in irq work. Simplify this using single buffer, one per paca, and copy rtas log to it irrespective of whether extended log is provided or not. Allocate this buffer below RMA region so that it can be accessed in real mode mce handler. Fixes: b96672dd840f ("powerpc: Machine check interrupt is a non-maskable interrupt") Cc: stable@vger.kernel.org # v4.14+ Reviewed-by: Nicholas Piggin Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/paca.h | 3 +++ arch/powerpc/platforms/pseries/ras.c | 47 +++++++++++++++++++++++----------- arch/powerpc/platforms/pseries/setup.c | 16 ++++++++++++ 3 files changed, 51 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 4e9cede5a7e7..ad4f16164619 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -247,6 +247,9 @@ struct paca_struct { void *rfi_flush_fallback_area; u64 l1d_flush_size; #endif +#ifdef CONFIG_PPC_PSERIES + u8 *mce_data_buf; /* buffer to hold per cpu rtas errlog */ +#endif /* CONFIG_PPC_PSERIES */ } ____cacheline_aligned; extern void copy_mm_to_paca(struct mm_struct *mm); diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index ef104144d4bc..14a46b07ab2f 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -32,11 +33,13 @@ static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX]; static DEFINE_SPINLOCK(ras_log_buf_lock); -static char global_mce_data_buf[RTAS_ERROR_LOG_MAX]; -static DEFINE_PER_CPU(__u64, mce_data_buf); - static int ras_check_exception_token; +static void mce_process_errlog_event(struct irq_work *work); +static struct irq_work mce_errlog_process_work = { + .func = mce_process_errlog_event, +}; + #define EPOW_SENSOR_TOKEN 9 #define EPOW_SENSOR_INDEX 0 @@ -330,16 +333,20 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) ((((A) >= 0x7000) && ((A) < 0x7ff0)) || \ (((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16)))) +static inline struct rtas_error_log *fwnmi_get_errlog(void) +{ + return (struct rtas_error_log *)local_paca->mce_data_buf; +} + /* * Get the error information for errors coming through the * FWNMI vectors. The pt_regs' r3 will be updated to reflect * the actual r3 if possible, and a ptr to the error log entry * will be returned if found. * - * If the RTAS error is not of the extended type, then we put it in a per - * cpu 64bit buffer. If it is the extended type we use global_mce_data_buf. + * Use one buffer mce_data_buf per cpu to store RTAS error. * - * The global_mce_data_buf does not have any locks or protection around it, + * The mce_data_buf does not have any locks or protection around it, * if a second machine check comes in, or a system reset is done * before we have logged the error, then we will get corruption in the * error log. This is preferable over holding off on calling @@ -349,7 +356,7 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) { unsigned long *savep; - struct rtas_error_log *h, *errhdr = NULL; + struct rtas_error_log *h; /* Mask top two bits */ regs->gpr[3] &= ~(0x3UL << 62); @@ -362,22 +369,20 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) savep = __va(regs->gpr[3]); regs->gpr[3] = savep[0]; /* restore original r3 */ - /* If it isn't an extended log we can use the per cpu 64bit buffer */ h = (struct rtas_error_log *)&savep[1]; + /* Use the per cpu buffer from paca to store rtas error log */ + memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX); if (!rtas_error_extended(h)) { - memcpy(this_cpu_ptr(&mce_data_buf), h, sizeof(__u64)); - errhdr = (struct rtas_error_log *)this_cpu_ptr(&mce_data_buf); + memcpy(local_paca->mce_data_buf, h, sizeof(__u64)); } else { int len, error_log_length; error_log_length = 8 + rtas_error_extended_log_length(h); len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX); - memset(global_mce_data_buf, 0, RTAS_ERROR_LOG_MAX); - memcpy(global_mce_data_buf, h, len); - errhdr = (struct rtas_error_log *)global_mce_data_buf; + memcpy(local_paca->mce_data_buf, h, len); } - return errhdr; + return (struct rtas_error_log *)local_paca->mce_data_buf; } /* Call this when done with the data returned by FWNMI_get_errinfo. @@ -422,6 +427,17 @@ int pSeries_system_reset_exception(struct pt_regs *regs) return 0; /* need to perform reset */ } +/* + * Process MCE rtas errlog event. + */ +static void mce_process_errlog_event(struct irq_work *work) +{ + struct rtas_error_log *err; + + err = fwnmi_get_errlog(); + log_error((char *)err, ERR_TYPE_RTAS_LOG, 0); +} + /* * See if we can recover from a machine check exception. * This is only called on power4 (or above) and only via @@ -466,7 +482,8 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err) recovered = 1; } - log_error((char *)err, ERR_TYPE_RTAS_LOG, 0); + /* Queue irq work to log this rtas event later. */ + irq_work_queue(&mce_errlog_process_work); return recovered; } diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 9948ad16f788..b411a74b861d 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -102,6 +103,9 @@ static void pSeries_show_cpuinfo(struct seq_file *m) static void __init fwnmi_init(void) { unsigned long system_reset_addr, machine_check_addr; + u8 *mce_data_buf; + unsigned int i; + int nr_cpus = num_possible_cpus(); int ibm_nmi_register = rtas_token("ibm,nmi-register"); if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE) @@ -115,6 +119,18 @@ static void __init fwnmi_init(void) if (0 == rtas_call(ibm_nmi_register, 2, 1, NULL, system_reset_addr, machine_check_addr)) fwnmi_active = 1; + + /* + * Allocate a chunk for per cpu buffer to hold rtas errorlog. + * It will be used in real mode mce handler, hence it needs to be + * below RMA. + */ + mce_data_buf = __va(memblock_alloc_base(RTAS_ERROR_LOG_MAX * nr_cpus, + RTAS_ERROR_LOG_MAX, ppc64_rma_size)); + for_each_possible_cpu(i) { + paca_ptrs[i]->mce_data_buf = mce_data_buf + + (RTAS_ERROR_LOG_MAX * i); + } } static void pseries_8259_cascade(struct irq_desc *desc) -- cgit From 1bb07b593adc1934a526eb04acfe8bf786decfd2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 1 Aug 2018 09:01:10 +0000 Subject: selftests/powerpc: Add test for 32 bits memcmp This patch renames memcmp test to memcmp_64 and adds a memcmp_32 test for testing the 32 bits version of memcmp() Signed-off-by: Christophe Leroy [mpe: Fix 64-bit build by adding build_32bit test] Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/stringloops/Makefile | 20 +++++++++++++++++--- .../selftests/powerpc/stringloops/memcmp_32.S | 1 + 2 files changed, 18 insertions(+), 3 deletions(-) create mode 120000 tools/testing/selftests/powerpc/stringloops/memcmp_32.S diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile index c60c6172dd3c..b682be18dc66 100644 --- a/tools/testing/selftests/powerpc/stringloops/Makefile +++ b/tools/testing/selftests/powerpc/stringloops/Makefile @@ -1,10 +1,24 @@ # SPDX-License-Identifier: GPL-2.0 # The loops are all 64-bit code -CFLAGS += -m64 -maltivec CFLAGS += -I$(CURDIR) -TEST_GEN_PROGS := memcmp -EXTRA_SOURCES := memcmp_64.S ../harness.c +EXTRA_SOURCES := ../harness.c + +build_32bit = $(shell if ($(CC) $(CFLAGS) -m32 -o /dev/null memcmp.c >/dev/null 2>&1) then echo "1"; fi) + +TEST_GEN_PROGS := memcmp_64 + +$(OUTPUT)/memcmp_64: memcmp.c +$(OUTPUT)/memcmp_64: CFLAGS += -m64 -maltivec + +ifeq ($(build_32bit),1) +$(OUTPUT)/memcmp_32: memcmp.c +$(OUTPUT)/memcmp_32: CFLAGS += -m32 + +TEST_GEN_PROGS += memcmp_32 +endif + +ASFLAGS = $(CFLAGS) include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp_32.S b/tools/testing/selftests/powerpc/stringloops/memcmp_32.S new file mode 120000 index 000000000000..056f2b3af789 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/memcmp_32.S @@ -0,0 +1 @@ +../../../../../arch/powerpc/lib/memcmp_32.S \ No newline at end of file -- cgit From f0abbfd89fed4abd8301b35fbf65a26d85b16e7f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 1 Aug 2018 09:01:12 +0000 Subject: selftests/powerpc: Add test for strlen() This patch adds a test for strlen() string.c contains a copy of strlen() from lib/string.c The test first tests the correctness of strlen() by comparing the result with libc strlen(). It tests all cases of alignment. It them tests the duration of an aligned strlen() on a 4 bytes string, on a 16 bytes string and on a 256 bytes string. Signed-off-by: Christophe Leroy [mpe: Drop change log from copy of string.c] Signed-off-by: Michael Ellerman --- .../testing/selftests/powerpc/stringloops/Makefile | 4 +- .../testing/selftests/powerpc/stringloops/string.c | 21 ++++ .../testing/selftests/powerpc/stringloops/strlen.c | 127 +++++++++++++++++++++ 3 files changed, 151 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/stringloops/string.c create mode 100644 tools/testing/selftests/powerpc/stringloops/strlen.c diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile index b682be18dc66..31499587b483 100644 --- a/tools/testing/selftests/powerpc/stringloops/Makefile +++ b/tools/testing/selftests/powerpc/stringloops/Makefile @@ -6,7 +6,7 @@ EXTRA_SOURCES := ../harness.c build_32bit = $(shell if ($(CC) $(CFLAGS) -m32 -o /dev/null memcmp.c >/dev/null 2>&1) then echo "1"; fi) -TEST_GEN_PROGS := memcmp_64 +TEST_GEN_PROGS := memcmp_64 strlen $(OUTPUT)/memcmp_64: memcmp.c $(OUTPUT)/memcmp_64: CFLAGS += -m64 -maltivec @@ -18,6 +18,8 @@ $(OUTPUT)/memcmp_32: CFLAGS += -m32 TEST_GEN_PROGS += memcmp_32 endif +$(OUTPUT)/strlen: strlen.c string.c + ASFLAGS = $(CFLAGS) include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/stringloops/string.c b/tools/testing/selftests/powerpc/stringloops/string.c new file mode 100644 index 000000000000..45e7775415c7 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/string.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copied from linux/lib/string.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include + +/** + * strlen - Find the length of a string + * @s: The string to be sized + */ +size_t test_strlen(const char *s) +{ + const char *sc; + + for (sc = s; *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} diff --git a/tools/testing/selftests/powerpc/stringloops/strlen.c b/tools/testing/selftests/powerpc/stringloops/strlen.c new file mode 100644 index 000000000000..9055ebc484d0 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/strlen.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include "utils.h" + +#define SIZE 256 +#define ITERATIONS 1000 +#define ITERATIONS_BENCH 100000 + +int test_strlen(const void *s); + +/* test all offsets and lengths */ +static void test_one(char *s) +{ + unsigned long offset; + + for (offset = 0; offset < SIZE; offset++) { + int x, y; + unsigned long i; + + y = strlen(s + offset); + x = test_strlen(s + offset); + + if (x != y) { + printf("strlen() returned %d, should have returned %d (%p offset %ld)\n", x, y, s, offset); + + for (i = offset; i < SIZE; i++) + printf("%02x ", s[i]); + printf("\n"); + } + } +} + +static void bench_test(char *s) +{ + struct timespec ts_start, ts_end; + int i; + + clock_gettime(CLOCK_MONOTONIC, &ts_start); + + for (i = 0; i < ITERATIONS_BENCH; i++) + test_strlen(s); + + clock_gettime(CLOCK_MONOTONIC, &ts_end); + + printf("len %3.3d : time = %.6f\n", test_strlen(s), ts_end.tv_sec - ts_start.tv_sec + (ts_end.tv_nsec - ts_start.tv_nsec) / 1e9); +} + +static int testcase(void) +{ + char *s; + unsigned long i; + + s = memalign(128, SIZE); + if (!s) { + perror("memalign"); + exit(1); + } + + srandom(1); + + memset(s, 0, SIZE); + for (i = 0; i < SIZE; i++) { + char c; + + do { + c = random() & 0x7f; + } while (!c); + s[i] = c; + test_one(s); + } + + for (i = 0; i < ITERATIONS; i++) { + unsigned long j; + + for (j = 0; j < SIZE; j++) { + char c; + + do { + c = random() & 0x7f; + } while (!c); + s[j] = c; + } + for (j = 0; j < sizeof(long); j++) { + s[SIZE - 1 - j] = 0; + test_one(s); + } + } + + for (i = 0; i < SIZE; i++) { + char c; + + do { + c = random() & 0x7f; + } while (!c); + s[i] = c; + } + + bench_test(s); + + s[16] = 0; + bench_test(s); + + s[8] = 0; + bench_test(s); + + s[4] = 0; + bench_test(s); + + s[3] = 0; + bench_test(s); + + s[2] = 0; + bench_test(s); + + s[1] = 0; + bench_test(s); + + return 0; +} + +int main(void) +{ + return test_harness(testcase, "strlen"); +} -- cgit From 9412b234501eef1d273cc759387a232f0027c574 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 1 Aug 2018 09:01:14 +0000 Subject: powerpc/lib: Implement strlen() in assembly for PPC32 The generic implementation of strlen() reads strings byte per byte. This patch implements strlen() in assembly based on a read of entire words, in the same spirit as what some other arches and glibc do. On a 8xx the time spent in strlen is reduced by 3/4 for long strings. strlen() selftest on an 8xx provides the following values: Before the patch (ie with the generic strlen() in lib/string.c): len 256 : time = 1.195055 len 016 : time = 0.083745 len 008 : time = 0.046828 len 004 : time = 0.028390 After the patch: len 256 : time = 0.272185 ==> 78% improvment len 016 : time = 0.040632 ==> 51% improvment len 008 : time = 0.033060 ==> 29% improvment len 004 : time = 0.029149 ==> 2% degradation On a 832x: Before the patch: len 256 : time = 0.236125 len 016 : time = 0.018136 len 008 : time = 0.011000 len 004 : time = 0.007229 After the patch: len 256 : time = 0.094950 ==> 60% improvment len 016 : time = 0.013357 ==> 26% improvment len 008 : time = 0.010586 ==> 4% improvment len 004 : time = 0.008784 Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/string.h | 2 + arch/powerpc/lib/Makefile | 2 +- arch/powerpc/lib/strlen_32.S | 78 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/lib/strlen_32.S diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index 9b8cedf618f4..1647de15a31e 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -50,6 +50,8 @@ static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n) return __memset64(p, v, n * 8); } #else +#define __HAVE_ARCH_STRLEN + extern void *memset16(uint16_t *, uint16_t, __kernel_size_t); #endif #endif /* __KERNEL__ */ diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index d0ca13ad8231..670286808928 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -12,7 +12,7 @@ CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE) obj-y += string.o alloc.o code-patching.o feature-fixups.o -obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o +obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o strlen_32.o # See corresponding test in arch/powerpc/Makefile # 64-bit linker creates .sfpr on demand for final link (vmlinux), diff --git a/arch/powerpc/lib/strlen_32.S b/arch/powerpc/lib/strlen_32.S new file mode 100644 index 000000000000..0a8d3f64d493 --- /dev/null +++ b/arch/powerpc/lib/strlen_32.S @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * strlen() for PPC32 + * + * Copyright (C) 2018 Christophe Leroy CS Systemes d'Information. + * + * Inspired from glibc implementation + */ +#include +#include +#include + + .text + +/* + * Algorithm: + * + * 1) Given a word 'x', we can test to see if it contains any 0 bytes + * by subtracting 0x01010101, and seeing if any of the high bits of each + * byte changed from 0 to 1. This works because the least significant + * 0 byte must have had no incoming carry (otherwise it's not the least + * significant), so it is 0x00 - 0x01 == 0xff. For all other + * byte values, either they have the high bit set initially, or when + * 1 is subtracted you get a value in the range 0x00-0x7f, none of which + * have their high bit set. The expression here is + * (x - 0x01010101) & ~x & 0x80808080), which gives 0x00000000 when + * there were no 0x00 bytes in the word. You get 0x80 in bytes that + * match, but possibly false 0x80 matches in the next more significant + * byte to a true match due to carries. For little-endian this is + * of no consequence since the least significant match is the one + * we're interested in, but big-endian needs method 2 to find which + * byte matches. + * 2) Given a word 'x', we can test to see _which_ byte was zero by + * calculating ~(((x & ~0x80808080) - 0x80808080 - 1) | x | ~0x80808080). + * This produces 0x80 in each byte that was zero, and 0x00 in all + * the other bytes. The '| ~0x80808080' clears the low 7 bits in each + * byte, and the '| x' part ensures that bytes with the high bit set + * produce 0x00. The addition will carry into the high bit of each byte + * iff that byte had one of its low 7 bits set. We can then just see + * which was the most significant bit set and divide by 8 to find how + * many to add to the index. + * This is from the book 'The PowerPC Compiler Writer's Guide', + * by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren. + */ + +_GLOBAL(strlen) + andi. r0, r3, 3 + lis r7, 0x0101 + addi r10, r3, -4 + addic r7, r7, 0x0101 /* r7 = 0x01010101 (lomagic) & clear XER[CA] */ + rotlwi r6, r7, 31 /* r6 = 0x80808080 (himagic) */ + bne- 3f + .balign IFETCH_ALIGN_BYTES +1: lwzu r9, 4(r10) +2: subf r8, r7, r9 + and. r8, r8, r6 + beq+ 1b + andc. r8, r8, r9 + beq+ 1b + andc r8, r9, r6 + orc r9, r9, r6 + subfe r8, r6, r8 + nor r8, r8, r9 + cntlzw r8, r8 + subf r3, r3, r10 + srwi r8, r8, 3 + add r3, r3, r8 + blr + + /* Missaligned string: make sure bytes before string are seen not 0 */ +3: xor r10, r10, r0 + orc r8, r8, r8 + lwzu r9, 4(r10) + slwi r0, r0, 3 + srw r8, r8, r0 + orc r9, r9, r8 + b 2b +EXPORT_SYMBOL(strlen) -- cgit From 396ab6ab284a9fff4154e9bd491372f43de8284c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 1 Aug 2018 09:01:16 +0000 Subject: selftests/powerpc: Update strlen() test to test the new assembly function for PPC32 This patch adds a test for testing the new assembly strlen() for PPC32 Signed-off-by: Christophe Leroy [mpe: Fix 64-bit build] Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/stringloops/Makefile | 7 +++++++ tools/testing/selftests/powerpc/stringloops/asm/cache.h | 1 + tools/testing/selftests/powerpc/stringloops/strlen_32.S | 1 + 3 files changed, 9 insertions(+) create mode 100644 tools/testing/selftests/powerpc/stringloops/asm/cache.h create mode 120000 tools/testing/selftests/powerpc/stringloops/strlen_32.S diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile index 31499587b483..10b35c87a4f4 100644 --- a/tools/testing/selftests/powerpc/stringloops/Makefile +++ b/tools/testing/selftests/powerpc/stringloops/Makefile @@ -20,6 +20,13 @@ endif $(OUTPUT)/strlen: strlen.c string.c +ifeq ($(build_32bit),1) +$(OUTPUT)/strlen_32: strlen.c +$(OUTPUT)/strlen_32: CFLAGS += -m32 + +TEST_GEN_PROGS += strlen_32 +endif + ASFLAGS = $(CFLAGS) include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/stringloops/asm/cache.h b/tools/testing/selftests/powerpc/stringloops/asm/cache.h new file mode 100644 index 000000000000..8a2840831122 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/asm/cache.h @@ -0,0 +1 @@ +#define IFETCH_ALIGN_BYTES 4 diff --git a/tools/testing/selftests/powerpc/stringloops/strlen_32.S b/tools/testing/selftests/powerpc/stringloops/strlen_32.S new file mode 120000 index 000000000000..72b13731b24c --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/strlen_32.S @@ -0,0 +1 @@ +../../../../../arch/powerpc/lib/strlen_32.S \ No newline at end of file -- cgit From 250a93501d6265bbb9ddf06af25ac9ae64782297 Mon Sep 17 00:00:00 2001 From: Darren Stevens Date: Wed, 25 Jul 2018 21:55:18 +0100 Subject: powerpc/pasemi: Search for PCI root bus by compatible property Pasemi arch code finds the root of the PCI-e bus by searching the device-tree for a node called 'pxp'. But the root bus has a compatible property of 'pasemi,rootbus' so search for that instead. Signed-off-by: Darren Stevens Acked-by: Olof Johansson Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pasemi/pci.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c index aea9ff2c8e6d..3e3e807d5b6b 100644 --- a/arch/powerpc/platforms/pasemi/pci.c +++ b/arch/powerpc/platforms/pasemi/pci.c @@ -216,6 +216,7 @@ static int __init pas_add_bridge(struct device_node *dev) void __init pas_pci_init(void) { struct device_node *np, *root; + int res; root = of_find_node_by_path("/"); if (!root) { @@ -226,11 +227,11 @@ void __init pas_pci_init(void) pci_set_flags(PCI_SCAN_ALL_PCIE_DEVS); - for (np = NULL; (np = of_get_next_child(root, np)) != NULL;) - if (np->name && !strcmp(np->name, "pxp") && !pas_add_bridge(np)) - of_node_get(np); - - of_node_put(root); + np = of_find_compatible_node(root, NULL, "pasemi,rootbus"); + if (np) { + res = pas_add_bridge(np); + of_node_put(np); + } } void __iomem *pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset) -- cgit From 4a7a0a8444ba4cebb3a6744e9c14fc6391d7f266 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 23:07:02 +1000 Subject: powerpc/64s: Move SET_SCRATCH0() into EXCEPTION_PROLOG_PSERIES() EXCEPTION_PROLOG_PSERIES() only has two users, STD_EXCEPTION_PSERIES() and STD_EXCEPTION_HV() both of which "call" SET_SCRATCH0(), so just move SET_SCRATCH0() into EXCEPTION_PROLOG_PSERIES(). Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 1f2efc1a9769..311f6a753791 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -344,6 +344,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) __EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) #define EXCEPTION_PROLOG_PSERIES(area, label, h, extra, vec) \ + SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(area); \ EXCEPTION_PROLOG_1(area, extra, vec); \ EXCEPTION_PROLOG_PSERIES_1(label, h); @@ -552,7 +553,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) * Exception vectors. */ #define STD_EXCEPTION_PSERIES(vec, label) \ - SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label, \ EXC_STD, KVMTEST_PR, vec); \ @@ -567,7 +567,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD) #define STD_EXCEPTION_HV(loc, vec, label) \ - SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label, \ EXC_HV, KVMTEST_HV, vec); -- cgit From 92b6d65c079450b7eb6a59ceb4d9855640914007 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 23:07:03 +1000 Subject: powerpc/64s: Move SET_SCRATCH0() into EXCEPTION_RELON_PROLOG_PSERIES() EXCEPTION_RELON_PROLOG_PSERIES() only has two users, STD_RELON_EXCEPTION_PSERIES() and STD_RELON_EXCEPTION_HV() both of which "call" SET_SCRATCH0(), so just move SET_SCRATCH0() into EXCEPTION_RELON_PROLOG_PSERIES(). Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 311f6a753791..138ea976f730 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -183,6 +183,7 @@ * case EXCEPTION_RELON_PROLOG_PSERIES_1 will be using lr. */ #define EXCEPTION_RELON_PROLOG_PSERIES(area, label, h, extra, vec) \ + SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(area); \ EXCEPTION_PROLOG_1(area, extra, vec); \ EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) @@ -576,7 +577,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define STD_RELON_EXCEPTION_PSERIES(loc, vec, label) \ /* No guest interrupts come through here */ \ - SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, EXC_STD, NOTEST, vec); #define STD_RELON_EXCEPTION_PSERIES_OOL(vec, label) \ @@ -584,7 +584,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_STD) #define STD_RELON_EXCEPTION_HV(loc, vec, label) \ - SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, \ EXC_HV, KVMTEST_HV, vec); -- cgit From e899fce509f527e7e8efd5dd8fff22fcc0c05013 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 23:07:04 +1000 Subject: powerpc/64s: Rename STD_EXCEPTION_PSERIES to STD_EXCEPTION The "PSERIES" in STD_EXCEPTION_PSERIES is to differentiate the macros from the legacy iSeries versions, which are called STD_EXCEPTION_ISERIES. It is not anything to do with pseries vs powernv or powermac etc. We removed the legacy iSeries code in 2012, in commit 8ee3e0d69623x ("powerpc: Remove the main legacy iSerie platform code"). So remove "PSERIES" from the macros. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 2 +- arch/powerpc/include/asm/head-64.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 138ea976f730..3ff8473aa063 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -553,7 +553,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) /* * Exception vectors. */ -#define STD_EXCEPTION_PSERIES(vec, label) \ +#define STD_EXCEPTION(vec, label) \ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label, \ EXC_STD, KVMTEST_PR, vec); \ diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index 7e0e93f24cb7..66ea28dc21dd 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -260,7 +260,7 @@ name: #define EXC_REAL(name, start, size) \ EXC_REAL_BEGIN(name, start, size); \ - STD_EXCEPTION_PSERIES(start, name##_common); \ + STD_EXCEPTION(start, name##_common); \ EXC_REAL_END(name, start, size); #define EXC_VIRT(name, start, size, realvec) \ -- cgit From 75e8bef3d6b5c8d25d56f0c0b804ec97e9736645 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 23:07:05 +1000 Subject: powerpc/64s: Rename STD_EXCEPTION_PSERIES_OOL to STD_EXCEPTION_OOL Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 2 +- arch/powerpc/include/asm/head-64.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 3ff8473aa063..ff51aa504895 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -563,7 +563,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) EXCEPTION_PROLOG_0(PACA_EXGEN) \ b hdlr; -#define STD_EXCEPTION_PSERIES_OOL(vec, label) \ +#define STD_EXCEPTION_OOL(vec, label) \ EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, vec); \ EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD) diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index 66ea28dc21dd..3e955889f615 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -295,7 +295,7 @@ name: #define __TRAMP_REAL_OOL(name, vec) \ TRAMP_REAL_BEGIN(tramp_real_##name); \ - STD_EXCEPTION_PSERIES_OOL(vec, name##_common); \ + STD_EXCEPTION_OOL(vec, name##_common); #define EXC_REAL_OOL(name, start, size) \ __EXC_REAL_OOL(name, start, size); \ -- cgit From e42389c5f19fed0a7c578769b00edc2cf23ee319 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 23:07:06 +1000 Subject: powerpc/64s: Rename STD_RELON_EXCEPTION_PSERIES to STD_RELON_EXCEPTION Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 2 +- arch/powerpc/include/asm/head-64.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index ff51aa504895..c991a6138b82 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -575,7 +575,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec); \ EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV) -#define STD_RELON_EXCEPTION_PSERIES(loc, vec, label) \ +#define STD_RELON_EXCEPTION(loc, vec, label) \ /* No guest interrupts come through here */ \ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, EXC_STD, NOTEST, vec); diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index 3e955889f615..cf0e1ac1149a 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -265,7 +265,7 @@ name: #define EXC_VIRT(name, start, size, realvec) \ EXC_VIRT_BEGIN(name, start, size); \ - STD_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \ + STD_RELON_EXCEPTION(start, realvec, name##_common); \ EXC_VIRT_END(name, start, size); #define EXC_REAL_MASKABLE(name, start, size, bitmask) \ -- cgit From b706f42362c3601271dd0fcd4fd9acb45fe7cd86 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 23:07:07 +1000 Subject: powerpc/64s: Rename STD_RELON_EXCEPTION_PSERIES_OOL to STD_RELON_EXCEPTION_OOL Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 2 +- arch/powerpc/include/asm/head-64.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index c991a6138b82..542000850758 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -579,7 +579,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) /* No guest interrupts come through here */ \ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, EXC_STD, NOTEST, vec); -#define STD_RELON_EXCEPTION_PSERIES_OOL(vec, label) \ +#define STD_RELON_EXCEPTION_OOL(vec, label) \ EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \ EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_STD) diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index cf0e1ac1149a..640d354a3ab3 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -346,7 +346,7 @@ name: #define __TRAMP_VIRT_OOL(name, realvec) \ TRAMP_VIRT_BEGIN(tramp_virt_##name); \ - STD_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common); \ + STD_RELON_EXCEPTION_OOL(realvec, name##_common); #define EXC_VIRT_OOL(name, start, size, realvec) \ __EXC_VIRT_OOL(name, start, size); \ -- cgit From cb58a4a4b370b6e92e3f2654b2468a565fee9d87 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 23:07:08 +1000 Subject: powerpc/64s: Rename EXCEPTION_PROLOG_PSERIES_1 to EXCEPTION_PROLOG_2 As with the other patches in this series, we are removing the "PSERIES" from the name as it's no longer meaningful. In this case it's not simply a case of removing the "PSERIES" as that would result in a clash with the existing EXCEPTION_PROLOG_1. Instead we name this one EXCEPTION_PROLOG_2, as it's usually used in sequence after 0 and 1. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 20 ++++++++++---------- arch/powerpc/kernel/exceptions-64s.S | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 542000850758..0d798d627b4b 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -317,7 +317,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define EXCEPTION_PROLOG_1(area, extra, vec) \ _EXCEPTION_PROLOG_1(area, extra, vec) -#define __EXCEPTION_PROLOG_PSERIES_1(label, h) \ +#define __EXCEPTION_PROLOG_2(label, h) \ ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ LOAD_HANDLER(r12,label) \ @@ -326,8 +326,8 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) mtspr SPRN_##h##SRR1,r10; \ h##RFI_TO_KERNEL; \ b . /* prevent speculative execution */ -#define EXCEPTION_PROLOG_PSERIES_1(label, h) \ - __EXCEPTION_PROLOG_PSERIES_1(label, h) +#define EXCEPTION_PROLOG_2(label, h) \ + __EXCEPTION_PROLOG_2(label, h) /* _NORI variant keeps MSR_RI clear */ #define __EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) \ @@ -348,7 +348,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(area); \ EXCEPTION_PROLOG_1(area, extra, vec); \ - EXCEPTION_PROLOG_PSERIES_1(label, h); + EXCEPTION_PROLOG_2(label, h); #define __KVMTEST(h, n) \ lbz r10,HSTATE_IN_GUEST(r13); \ @@ -565,7 +565,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define STD_EXCEPTION_OOL(vec, label) \ EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, vec); \ - EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD) + EXCEPTION_PROLOG_2(label, EXC_STD) #define STD_EXCEPTION_HV(loc, vec, label) \ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label, \ @@ -573,7 +573,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define STD_EXCEPTION_HV_OOL(vec, label) \ EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec); \ - EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV) + EXCEPTION_PROLOG_2(label, EXC_HV) #define STD_RELON_EXCEPTION(loc, vec, label) \ /* No guest interrupts come through here */ \ @@ -630,7 +630,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(PACA_EXGEN); \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \ - EXCEPTION_PROLOG_PSERIES_1(label, h); + EXCEPTION_PROLOG_2(label, h); #define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \ __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) @@ -641,7 +641,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define MASKABLE_EXCEPTION_PSERIES_OOL(vec, label, bitmask) \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec, bitmask);\ - EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD) + EXCEPTION_PROLOG_2(label, EXC_STD) #define MASKABLE_EXCEPTION_HV(loc, vec, label, bitmask) \ _MASKABLE_EXCEPTION_PSERIES(vec, label, \ @@ -649,7 +649,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define MASKABLE_EXCEPTION_HV_OOL(vec, label, bitmask) \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\ - EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV) + EXCEPTION_PROLOG_2(label, EXC_HV) #define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \ SET_SCRATCH0(r13); /* save r13 */ \ @@ -666,7 +666,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define MASKABLE_RELON_EXCEPTION_PSERIES_OOL(vec, label, bitmask) \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_PR, vec, bitmask);\ - EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD); + EXCEPTION_PROLOG_2(label, EXC_STD); #define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label, bitmask) \ _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 7a672dafd94f..21cdea3f5a30 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1327,7 +1327,7 @@ EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100) #endif KVMTEST_HV(0x1500) - EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV) + EXCEPTION_PROLOG_2(denorm_common, EXC_HV) EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100) #ifdef CONFIG_PPC_DENORMALISATION @@ -1447,7 +1447,7 @@ EXC_VIRT_NONE(0x5800, 0x100) std r12,PACA_EXGEN+EX_R12(r13); \ GET_SCRATCH0(r10); \ std r10,PACA_EXGEN+EX_R13(r13); \ - EXCEPTION_PROLOG_PSERIES_1(soft_nmi_common, _H) + EXCEPTION_PROLOG_2(soft_nmi_common, _H) /* * Branch to soft_nmi_interrupt using the emergency stack. The emergency -- cgit From 94f3cc8e361c3e4a4689a7e5d4fe06bdb2c0961f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 23:07:09 +1000 Subject: powerpc/64s: Remove PSERIES from the NORI macros Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 10 +++++----- arch/powerpc/kernel/exceptions-64s.S | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 0d798d627b4b..d6f058b88e6d 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -330,7 +330,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) __EXCEPTION_PROLOG_2(label, h) /* _NORI variant keeps MSR_RI clear */ -#define __EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) \ +#define __EXCEPTION_PROLOG_2_NORI(label, h) \ ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \ xori r10,r10,MSR_RI; /* Clear MSR_RI */ \ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ @@ -341,8 +341,8 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) h##RFI_TO_KERNEL; \ b . /* prevent speculative execution */ -#define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) \ - __EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) +#define EXCEPTION_PROLOG_2_NORI(label, h) \ + __EXCEPTION_PROLOG_2_NORI(label, h) #define EXCEPTION_PROLOG_PSERIES(area, label, h, extra, vec) \ SET_SCRATCH0(r13); /* save r13 */ \ @@ -419,10 +419,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #endif /* Do not enable RI */ -#define EXCEPTION_PROLOG_PSERIES_NORI(area, label, h, extra, vec) \ +#define EXCEPTION_PROLOG_NORI(area, label, h, extra, vec) \ EXCEPTION_PROLOG_0(area); \ EXCEPTION_PROLOG_1(area, extra, vec); \ - EXCEPTION_PROLOG_PSERIES_1_NORI(label, h); + EXCEPTION_PROLOG_2_NORI(label, h); #define __KVM_HANDLER(area, h, n) \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 21cdea3f5a30..373f6ea823a8 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -127,8 +127,8 @@ EXC_REAL_BEGIN(system_reset, 0x100, 0x100) * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is * being used, so a nested NMI exception would corrupt it. */ - EXCEPTION_PROLOG_PSERIES_NORI(PACA_EXNMI, system_reset_common, EXC_STD, - IDLETEST, 0x100) + EXCEPTION_PROLOG_NORI(PACA_EXNMI, system_reset_common, EXC_STD, + IDLETEST, 0x100) EXC_REAL_END(system_reset, 0x100, 0x100) EXC_VIRT_NONE(0x4100, 0x100) @@ -231,8 +231,8 @@ EXC_COMMON_BEGIN(system_reset_common) TRAMP_REAL_BEGIN(system_reset_fwnmi) SET_SCRATCH0(r13) /* save r13 */ /* See comment at system_reset exception */ - EXCEPTION_PROLOG_PSERIES_NORI(PACA_EXNMI, system_reset_common, - EXC_STD, NOTEST, 0x100) + EXCEPTION_PROLOG_NORI(PACA_EXNMI, system_reset_common, EXC_STD, + NOTEST, 0x100) #endif /* CONFIG_PPC_PSERIES */ @@ -338,7 +338,7 @@ machine_check_pSeries_0: * nested machine check corrupts it. machine_check_common enables * MSR_RI. */ - EXCEPTION_PROLOG_PSERIES_1_NORI(machine_check_common, EXC_STD) + EXCEPTION_PROLOG_2_NORI(machine_check_common, EXC_STD) TRAMP_KVM_SKIP(PACA_EXMC, 0x200) -- cgit From 6ebb939740d5141358a42a98de516d179df0ad9e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 23:07:10 +1000 Subject: powerpc/64s: Rename EXCEPTION_RELON_PROLOG_PSERIES_1 The EXCEPTION_RELON_PROLOG_PSERIES_1() macro does the same job as EXCEPTION_PROLOG_2 (which we just recently created), except for "RELON" (relocation on) exceptions. So rename it as such. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index d6f058b88e6d..e63a3dc769f6 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -157,7 +157,7 @@ b hrfi_flush_fallback #ifdef CONFIG_RELOCATABLE -#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ +#define __EXCEPTION_PROLOG_2_RELON(label, h) \ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ LOAD_HANDLER(r12,label); \ mtctr r12; \ @@ -167,26 +167,26 @@ bctr; #else /* If not relocatable, we can jump directly -- and save messing with LR */ -#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ +#define __EXCEPTION_PROLOG_2_RELON(label, h) \ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ li r10,MSR_RI; \ mtmsrd r10,1; /* Set RI (EE=0) */ \ b label; #endif -#define EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ - __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ +#define EXCEPTION_PROLOG_2_RELON(label, h) \ + __EXCEPTION_PROLOG_2_RELON(label, h) /* * As EXCEPTION_PROLOG_PSERIES(), except we've already got relocation on * so no need to rfid. Save lr in case we're CONFIG_RELOCATABLE, in which - * case EXCEPTION_RELON_PROLOG_PSERIES_1 will be using lr. + * case EXCEPTION_PROLOG_2_RELON will be using LR. */ #define EXCEPTION_RELON_PROLOG_PSERIES(area, label, h, extra, vec) \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(area); \ EXCEPTION_PROLOG_1(area, extra, vec); \ - EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) + EXCEPTION_PROLOG_2_RELON(label, h) /* * We're short on space and time in the exception prolog, so we can't @@ -581,7 +581,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define STD_RELON_EXCEPTION_OOL(vec, label) \ EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \ - EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_STD) + EXCEPTION_PROLOG_2_RELON(label, EXC_STD) #define STD_RELON_EXCEPTION_HV(loc, vec, label) \ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, \ @@ -589,7 +589,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define STD_RELON_EXCEPTION_HV_OOL(vec, label) \ EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec); \ - EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV) + EXCEPTION_PROLOG_2_RELON(label, EXC_HV) /* This associate vector numbers with bits in paca->irq_happened */ #define SOFTEN_VALUE_0x500 PACA_IRQ_EE @@ -655,7 +655,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(PACA_EXGEN); \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \ - EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) + EXCEPTION_PROLOG_2_RELON(label, h) #define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask)\ __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) @@ -674,7 +674,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\ - EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV) + EXCEPTION_PROLOG_2_RELON(label, EXC_HV) /* * Our exception common code can be passed various "additions" -- cgit From 270373f14f0fc55c3326eeab10e5468184a27ff1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 23:07:11 +1000 Subject: powerpc/64s: Rename EXCEPTION_RELON_PROLOG_PSERIES To just EXCEPTION_RELON_PROLOG(). Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index e63a3dc769f6..bbd548cce332 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -182,7 +182,7 @@ * so no need to rfid. Save lr in case we're CONFIG_RELOCATABLE, in which * case EXCEPTION_PROLOG_2_RELON will be using LR. */ -#define EXCEPTION_RELON_PROLOG_PSERIES(area, label, h, extra, vec) \ +#define EXCEPTION_RELON_PROLOG(area, label, h, extra, vec) \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(area); \ EXCEPTION_PROLOG_1(area, extra, vec); \ @@ -577,15 +577,14 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define STD_RELON_EXCEPTION(loc, vec, label) \ /* No guest interrupts come through here */ \ - EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, EXC_STD, NOTEST, vec); + EXCEPTION_RELON_PROLOG(PACA_EXGEN, label, EXC_STD, NOTEST, vec); #define STD_RELON_EXCEPTION_OOL(vec, label) \ EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \ EXCEPTION_PROLOG_2_RELON(label, EXC_STD) #define STD_RELON_EXCEPTION_HV(loc, vec, label) \ - EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, \ - EXC_HV, KVMTEST_HV, vec); + EXCEPTION_RELON_PROLOG(PACA_EXGEN, label, EXC_HV, KVMTEST_HV, vec); #define STD_RELON_EXCEPTION_HV_OOL(vec, label) \ EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec); \ -- cgit From bdf08e1da0590f16eb255cda20db2a77bfdb349b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 23:07:12 +1000 Subject: powerpc/64s: Rename EXCEPTION_PROLOG_PSERIES to EXCEPTION_PROLOG Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index bbd548cce332..38c34a982501 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -178,9 +178,9 @@ __EXCEPTION_PROLOG_2_RELON(label, h) /* - * As EXCEPTION_PROLOG_PSERIES(), except we've already got relocation on - * so no need to rfid. Save lr in case we're CONFIG_RELOCATABLE, in which - * case EXCEPTION_PROLOG_2_RELON will be using LR. + * As EXCEPTION_PROLOG(), except we've already got relocation on so no need to + * rfid. Save LR in case we're CONFIG_RELOCATABLE, in which case + * EXCEPTION_PROLOG_2_RELON will be using LR. */ #define EXCEPTION_RELON_PROLOG(area, label, h, extra, vec) \ SET_SCRATCH0(r13); /* save r13 */ \ @@ -344,7 +344,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define EXCEPTION_PROLOG_2_NORI(label, h) \ __EXCEPTION_PROLOG_2_NORI(label, h) -#define EXCEPTION_PROLOG_PSERIES(area, label, h, extra, vec) \ +#define EXCEPTION_PROLOG(area, label, h, extra, vec) \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(area); \ EXCEPTION_PROLOG_1(area, extra, vec); \ @@ -554,8 +554,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) * Exception vectors. */ #define STD_EXCEPTION(vec, label) \ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label, \ - EXC_STD, KVMTEST_PR, vec); \ + EXCEPTION_PROLOG(PACA_EXGEN, label, EXC_STD, KVMTEST_PR, vec); /* Version of above for when we have to branch out-of-line */ #define __OOL_EXCEPTION(vec, label, hdlr) \ @@ -568,8 +567,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) EXCEPTION_PROLOG_2(label, EXC_STD) #define STD_EXCEPTION_HV(loc, vec, label) \ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label, \ - EXC_HV, KVMTEST_HV, vec); + EXCEPTION_PROLOG(PACA_EXGEN, label, EXC_HV, KVMTEST_HV, vec); #define STD_EXCEPTION_HV_OOL(vec, label) \ EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec); \ -- cgit From 9bf2877ac128631f80cc9e85bfdf4dbd08abb225 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 23:07:13 +1000 Subject: powerpc/64s: Drop _MASKABLE_EXCEPTION_PSERIES() _MASKABLE_EXCEPTION_PSERIES() does nothing useful, update all callers to use __MASKABLE_EXCEPTION_PSERIES() directly. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 11 ++++------- arch/powerpc/kernel/exceptions-64s.S | 4 ++-- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 38c34a982501..ae048ff7a9cd 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -629,20 +629,17 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \ EXCEPTION_PROLOG_2(label, h); -#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \ - __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) - #define MASKABLE_EXCEPTION_PSERIES(loc, vec, label, bitmask) \ - _MASKABLE_EXCEPTION_PSERIES(vec, label, \ - EXC_STD, SOFTEN_TEST_PR, bitmask) + __MASKABLE_EXCEPTION_PSERIES(vec, label, \ + EXC_STD, SOFTEN_TEST_PR, bitmask) #define MASKABLE_EXCEPTION_PSERIES_OOL(vec, label, bitmask) \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec, bitmask);\ EXCEPTION_PROLOG_2(label, EXC_STD) #define MASKABLE_EXCEPTION_HV(loc, vec, label, bitmask) \ - _MASKABLE_EXCEPTION_PSERIES(vec, label, \ - EXC_HV, SOFTEN_TEST_HV, bitmask) + __MASKABLE_EXCEPTION_PSERIES(vec, label, \ + EXC_HV, SOFTEN_TEST_HV, bitmask) #define MASKABLE_EXCEPTION_HV_OOL(vec, label, bitmask) \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 373f6ea823a8..f0dc3460d9ec 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -768,11 +768,11 @@ EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) .globl hardware_interrupt_hv; hardware_interrupt_hv: BEGIN_FTR_SECTION - _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, + __MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV, IRQS_DISABLED) FTR_SECTION_ELSE - _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, + __MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR, IRQS_DISABLED) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) -- cgit From 6adc6e9c0730d87953e1a0f15d7c23bce1e5d874 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 23:07:14 +1000 Subject: powerpc/64s: Drop _MASKABLE_RELON_EXCEPTION_PSERIES() _MASKABLE_RELON_EXCEPTION_PSERIES() does nothing useful, update all callers to use __MASKABLE_RELON_EXCEPTION_PSERIES() directly. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 11 ++++------- arch/powerpc/kernel/exceptions-64s.S | 4 ++-- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index ae048ff7a9cd..52185dbede9a 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -651,20 +651,17 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \ EXCEPTION_PROLOG_2_RELON(label, h) -#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask)\ - __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) - #define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label, bitmask) \ - _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ - EXC_STD, SOFTEN_NOTEST_PR, bitmask) + __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ + EXC_STD, SOFTEN_NOTEST_PR, bitmask) #define MASKABLE_RELON_EXCEPTION_PSERIES_OOL(vec, label, bitmask) \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_PR, vec, bitmask);\ EXCEPTION_PROLOG_2(label, EXC_STD); #define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label, bitmask) \ - _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ - EXC_HV, SOFTEN_TEST_HV, bitmask) + __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ + EXC_HV, SOFTEN_TEST_HV, bitmask) #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f0dc3460d9ec..d370d2005506 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -782,11 +782,11 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) .globl hardware_interrupt_relon_hv; hardware_interrupt_relon_hv: BEGIN_FTR_SECTION - _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, + __MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV, IRQS_DISABLED) FTR_SECTION_ELSE - _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, + __MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR, IRQS_DISABLED) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) -- cgit From 0a55c2418500ba82a3a371b04471e3694e3ee168 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 23:07:15 +1000 Subject: powerpc/64s: Remove PSERIES naming from the MASKABLE macros Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 24 ++++++++++-------------- arch/powerpc/include/asm/head-64.h | 8 ++++---- arch/powerpc/kernel/exceptions-64s.S | 8 ++++---- 3 files changed, 18 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 52185dbede9a..b9d2b6985435 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -623,45 +623,41 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define SOFTEN_NOTEST_PR(vec, bitmask) _SOFTEN_TEST(EXC_STD, vec, bitmask) #define SOFTEN_NOTEST_HV(vec, bitmask) _SOFTEN_TEST(EXC_HV, vec, bitmask) -#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \ +#define __MASKABLE_EXCEPTION(vec, label, h, extra, bitmask) \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(PACA_EXGEN); \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \ EXCEPTION_PROLOG_2(label, h); -#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label, bitmask) \ - __MASKABLE_EXCEPTION_PSERIES(vec, label, \ - EXC_STD, SOFTEN_TEST_PR, bitmask) +#define MASKABLE_EXCEPTION(loc, vec, label, bitmask) \ + __MASKABLE_EXCEPTION(vec, label, EXC_STD, SOFTEN_TEST_PR, bitmask) -#define MASKABLE_EXCEPTION_PSERIES_OOL(vec, label, bitmask) \ +#define MASKABLE_EXCEPTION_OOL(vec, label, bitmask) \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec, bitmask);\ EXCEPTION_PROLOG_2(label, EXC_STD) #define MASKABLE_EXCEPTION_HV(loc, vec, label, bitmask) \ - __MASKABLE_EXCEPTION_PSERIES(vec, label, \ - EXC_HV, SOFTEN_TEST_HV, bitmask) + __MASKABLE_EXCEPTION(vec, label, EXC_HV, SOFTEN_TEST_HV, bitmask) #define MASKABLE_EXCEPTION_HV_OOL(vec, label, bitmask) \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\ EXCEPTION_PROLOG_2(label, EXC_HV) -#define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \ +#define __MASKABLE_RELON_EXCEPTION(vec, label, h, extra, bitmask) \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(PACA_EXGEN); \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \ EXCEPTION_PROLOG_2_RELON(label, h) -#define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label, bitmask) \ - __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ - EXC_STD, SOFTEN_NOTEST_PR, bitmask) +#define MASKABLE_RELON_EXCEPTION(loc, vec, label, bitmask) \ + __MASKABLE_RELON_EXCEPTION(vec, label, EXC_STD, SOFTEN_NOTEST_PR, bitmask) -#define MASKABLE_RELON_EXCEPTION_PSERIES_OOL(vec, label, bitmask) \ +#define MASKABLE_RELON_EXCEPTION_OOL(vec, label, bitmask) \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_PR, vec, bitmask);\ EXCEPTION_PROLOG_2(label, EXC_STD); #define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label, bitmask) \ - __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ - EXC_HV, SOFTEN_TEST_HV, bitmask) + __MASKABLE_RELON_EXCEPTION(vec, label, EXC_HV, SOFTEN_TEST_HV, bitmask) #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\ diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index 640d354a3ab3..5678f82cf355 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -270,12 +270,12 @@ name: #define EXC_REAL_MASKABLE(name, start, size, bitmask) \ EXC_REAL_BEGIN(name, start, size); \ - MASKABLE_EXCEPTION_PSERIES(start, start, name##_common, bitmask);\ + MASKABLE_EXCEPTION(start, start, name##_common, bitmask); \ EXC_REAL_END(name, start, size); #define EXC_VIRT_MASKABLE(name, start, size, realvec, bitmask) \ EXC_VIRT_BEGIN(name, start, size); \ - MASKABLE_RELON_EXCEPTION_PSERIES(start, realvec, name##_common, bitmask);\ + MASKABLE_RELON_EXCEPTION(start, realvec, name##_common, bitmask);\ EXC_VIRT_END(name, start, size); #define EXC_REAL_HV(name, start, size) \ @@ -306,7 +306,7 @@ name: #define __TRAMP_REAL_OOL_MASKABLE(name, vec, bitmask) \ TRAMP_REAL_BEGIN(tramp_real_##name); \ - MASKABLE_EXCEPTION_PSERIES_OOL(vec, name##_common, bitmask); \ + MASKABLE_EXCEPTION_OOL(vec, name##_common, bitmask); #define EXC_REAL_OOL_MASKABLE(name, start, size, bitmask) \ __EXC_REAL_OOL_MASKABLE(name, start, size); \ @@ -357,7 +357,7 @@ name: #define __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask) \ TRAMP_VIRT_BEGIN(tramp_virt_##name); \ - MASKABLE_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common, bitmask);\ + MASKABLE_RELON_EXCEPTION_OOL(realvec, name##_common, bitmask); #define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec, bitmask) \ __EXC_VIRT_OOL_MASKABLE(name, start, size); \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index d370d2005506..f8bf573cdf80 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -768,11 +768,11 @@ EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) .globl hardware_interrupt_hv; hardware_interrupt_hv: BEGIN_FTR_SECTION - __MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, + __MASKABLE_EXCEPTION(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV, IRQS_DISABLED) FTR_SECTION_ELSE - __MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, + __MASKABLE_EXCEPTION(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR, IRQS_DISABLED) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) @@ -782,11 +782,11 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) .globl hardware_interrupt_relon_hv; hardware_interrupt_relon_hv: BEGIN_FTR_SECTION - __MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, + __MASKABLE_RELON_EXCEPTION(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV, IRQS_DISABLED) FTR_SECTION_ELSE - __MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, + __MASKABLE_RELON_EXCEPTION(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR, IRQS_DISABLED) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) -- cgit From b536da7c2d7dfa015ea36e78185ad74e6d775c96 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 23:07:16 +1000 Subject: powerpc/64s: Drop unused loc parameter to MASKABLE_EXCEPTION macros We pass the "loc" (location) parameter to MASKABLE_EXCEPTION and friends, but it's not used, so drop it. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 8 ++++---- arch/powerpc/include/asm/head-64.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index b9d2b6985435..a86feddddad0 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -629,14 +629,14 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \ EXCEPTION_PROLOG_2(label, h); -#define MASKABLE_EXCEPTION(loc, vec, label, bitmask) \ +#define MASKABLE_EXCEPTION(vec, label, bitmask) \ __MASKABLE_EXCEPTION(vec, label, EXC_STD, SOFTEN_TEST_PR, bitmask) #define MASKABLE_EXCEPTION_OOL(vec, label, bitmask) \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec, bitmask);\ EXCEPTION_PROLOG_2(label, EXC_STD) -#define MASKABLE_EXCEPTION_HV(loc, vec, label, bitmask) \ +#define MASKABLE_EXCEPTION_HV(vec, label, bitmask) \ __MASKABLE_EXCEPTION(vec, label, EXC_HV, SOFTEN_TEST_HV, bitmask) #define MASKABLE_EXCEPTION_HV_OOL(vec, label, bitmask) \ @@ -649,14 +649,14 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \ EXCEPTION_PROLOG_2_RELON(label, h) -#define MASKABLE_RELON_EXCEPTION(loc, vec, label, bitmask) \ +#define MASKABLE_RELON_EXCEPTION(vec, label, bitmask) \ __MASKABLE_RELON_EXCEPTION(vec, label, EXC_STD, SOFTEN_NOTEST_PR, bitmask) #define MASKABLE_RELON_EXCEPTION_OOL(vec, label, bitmask) \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_PR, vec, bitmask);\ EXCEPTION_PROLOG_2(label, EXC_STD); -#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label, bitmask) \ +#define MASKABLE_RELON_EXCEPTION_HV(vec, label, bitmask) \ __MASKABLE_RELON_EXCEPTION(vec, label, EXC_HV, SOFTEN_TEST_HV, bitmask) #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \ diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index 5678f82cf355..a4f947888744 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -270,12 +270,12 @@ name: #define EXC_REAL_MASKABLE(name, start, size, bitmask) \ EXC_REAL_BEGIN(name, start, size); \ - MASKABLE_EXCEPTION(start, start, name##_common, bitmask); \ + MASKABLE_EXCEPTION(start, name##_common, bitmask); \ EXC_REAL_END(name, start, size); #define EXC_VIRT_MASKABLE(name, start, size, realvec, bitmask) \ EXC_VIRT_BEGIN(name, start, size); \ - MASKABLE_RELON_EXCEPTION(start, realvec, name##_common, bitmask);\ + MASKABLE_RELON_EXCEPTION(realvec, name##_common, bitmask); \ EXC_VIRT_END(name, start, size); #define EXC_REAL_HV(name, start, size) \ -- cgit From 0b924de4f650c9544cd3d0a8228145a622a37630 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 23:07:17 +1000 Subject: powerpc/64s: Don't use __MASKABLE_EXCEPTION unnecessarily We only need to use __MASKABLE_EXCEPTION in one of the four cases for hardware interrupt, so use the helper macros in the other cases. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f8bf573cdf80..f87d02ec75dd 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -768,13 +768,9 @@ EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) .globl hardware_interrupt_hv; hardware_interrupt_hv: BEGIN_FTR_SECTION - __MASKABLE_EXCEPTION(0x500, hardware_interrupt_common, - EXC_HV, SOFTEN_TEST_HV, - IRQS_DISABLED) + MASKABLE_EXCEPTION_HV(0x500, hardware_interrupt_common, IRQS_DISABLED) FTR_SECTION_ELSE - __MASKABLE_EXCEPTION(0x500, hardware_interrupt_common, - EXC_STD, SOFTEN_TEST_PR, - IRQS_DISABLED) + MASKABLE_EXCEPTION(0x500, hardware_interrupt_common, IRQS_DISABLED) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) EXC_REAL_END(hardware_interrupt, 0x500, 0x100) @@ -782,13 +778,11 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) .globl hardware_interrupt_relon_hv; hardware_interrupt_relon_hv: BEGIN_FTR_SECTION - __MASKABLE_RELON_EXCEPTION(0x500, hardware_interrupt_common, - EXC_HV, SOFTEN_TEST_HV, - IRQS_DISABLED) + MASKABLE_RELON_EXCEPTION_HV(0x500, hardware_interrupt_common, + IRQS_DISABLED) FTR_SECTION_ELSE __MASKABLE_RELON_EXCEPTION(0x500, hardware_interrupt_common, - EXC_STD, SOFTEN_TEST_PR, - IRQS_DISABLED) + EXC_STD, SOFTEN_TEST_PR, IRQS_DISABLED) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) -- cgit From cf175dc315f90185128fb061dc05b6fbb211aa2f Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Sat, 28 Jul 2018 09:06:32 +1000 Subject: powerpc/64: Disable the speculation barrier from the command line The speculation barrier can be disabled from the command line with the parameter: "nospectre_v1". Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/security.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 4cb8f1f7b593..79f9397998ed 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -16,6 +16,7 @@ unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; bool barrier_nospec_enabled; +static bool no_nospec; static void enable_barrier_nospec(bool enable) { @@ -42,9 +43,18 @@ void setup_barrier_nospec(void) enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR); - enable_barrier_nospec(enable); + if (!no_nospec) + enable_barrier_nospec(enable); } +static int __init handle_nospectre_v1(char *p) +{ + no_nospec = true; + + return 0; +} +early_param("nospectre_v1", handle_nospectre_v1); + #ifdef CONFIG_DEBUG_FS static int barrier_nospec_set(void *data, u64 val) { -- cgit From 6453b532f2c8856a80381e6b9a1f5ea2f12294df Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Sat, 28 Jul 2018 09:06:33 +1000 Subject: powerpc/64: Make stf barrier PPC_BOOK3S_64 specific. NXP Book3E platforms are not vulnerable to speculative store bypass, so make the mitigations PPC_BOOK3S_64 specific. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/security.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 79f9397998ed..8ee1ade845c6 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -176,6 +176,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c return s.len; } +#ifdef CONFIG_PPC_BOOK3S_64 /* * Store-forwarding barrier support. */ @@ -323,3 +324,4 @@ static __init int stf_barrier_debugfs_init(void) } device_initcall(stf_barrier_debugfs_init); #endif /* CONFIG_DEBUG_FS */ +#endif /* CONFIG_PPC_BOOK3S_64 */ -- cgit From 179ab1cbf883575c3a585bcfc0f2160f1d22a149 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sat, 28 Jul 2018 09:06:34 +1000 Subject: powerpc/64: Add CONFIG_PPC_BARRIER_NOSPEC Add a config symbol to encode which platforms support the barrier_nospec speculation barrier. Currently this is just Book3S 64 but we will add Book3E in a future patch. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 7 ++++++- arch/powerpc/include/asm/barrier.h | 6 +++--- arch/powerpc/include/asm/setup.h | 2 +- arch/powerpc/kernel/Makefile | 3 ++- arch/powerpc/kernel/module.c | 4 +++- arch/powerpc/kernel/vmlinux.lds.S | 4 +++- arch/powerpc/lib/feature-fixups.c | 6 ++++-- 7 files changed, 22 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ee38fce075ee..846c89ed7fa3 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -165,7 +165,7 @@ config PPC select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE - select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64 + select GENERIC_CPU_VULNERABILITIES if PPC_BARRIER_NOSPEC select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_SMP_IDLE_THREAD @@ -242,6 +242,11 @@ config PPC # Please keep this list sorted alphabetically. # +config PPC_BARRIER_NOSPEC + bool + default y + depends on PPC_BOOK3S_64 + config GENERIC_CSUM def_bool n diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index de1316874e45..cdc6960506e2 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -78,7 +78,7 @@ do { \ ___p1; \ }) -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_BARRIER_NOSPEC /* * Prevent execution of subsequent instructions until preceding branches have * been fully resolved and are no longer executing speculatively. @@ -88,10 +88,10 @@ do { \ // This also acts as a compiler barrier due to the memory clobber. #define barrier_nospec() asm (stringify_in_c(barrier_nospec_asm) ::: "memory") -#else /* !CONFIG_PPC_BOOK3S_64 */ +#else /* !CONFIG_PPC_BARRIER_NOSPEC */ #define barrier_nospec_asm #define barrier_nospec() -#endif +#endif /* CONFIG_PPC_BARRIER_NOSPEC */ #include diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 8721fd004291..8205f9fdfd67 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -56,7 +56,7 @@ void setup_barrier_nospec(void); void do_barrier_nospec_fixups(bool enable); extern bool barrier_nospec_enabled; -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_BARRIER_NOSPEC void do_barrier_nospec_fixups_range(bool enable, void *start, void *end); #else static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void *end) { }; diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 2b4c40b255e4..dbe2cf04b406 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -42,9 +42,10 @@ obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o -obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o security.o +obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o +obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 1b3c6835e730..77371c9ef3d8 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -72,13 +72,15 @@ int module_finalize(const Elf_Ehdr *hdr, do_feature_fixups(powerpc_firmware_features, (void *)sect->sh_addr, (void *)sect->sh_addr + sect->sh_size); +#endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_BARRIER_NOSPEC sect = find_section(hdr, sechdrs, "__spec_barrier_fixup"); if (sect != NULL) do_barrier_nospec_fixups_range(barrier_nospec_enabled, (void *)sect->sh_addr, (void *)sect->sh_addr + sect->sh_size); -#endif +#endif /* CONFIG_PPC_BARRIER_NOSPEC */ sect = find_section(hdr, sechdrs, "__lwsync_fixup"); if (sect != NULL) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 5baac79df97e..07ae018e550e 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -153,14 +153,16 @@ SECTIONS *(__rfi_flush_fixup) __stop___rfi_flush_fixup = .; } +#endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_BARRIER_NOSPEC . = ALIGN(8); __spec_barrier_fixup : AT(ADDR(__spec_barrier_fixup) - LOAD_OFFSET) { __start___barrier_nospec_fixup = .; *(__barrier_nospec_fixup) __stop___barrier_nospec_fixup = .; } -#endif +#endif /* CONFIG_PPC_BARRIER_NOSPEC */ EXCEPTION_TABLE(0) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 8b69f868298c..0e604b41b5d1 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -304,6 +304,9 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); } +#endif /* CONFIG_PPC_BOOK3S_64 */ + +#ifdef CONFIG_PPC_BARRIER_NOSPEC void do_barrier_nospec_fixups(bool enable) { void *start, *end; @@ -313,8 +316,7 @@ void do_barrier_nospec_fixups(bool enable) do_barrier_nospec_fixups_range(enable, start, end); } - -#endif /* CONFIG_PPC_BOOK3S_64 */ +#endif /* CONFIG_PPC_BARRIER_NOSPEC */ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) { -- cgit From af375eefbfb27cbb5b831984e66d724a40d26b5c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sat, 28 Jul 2018 09:06:35 +1000 Subject: powerpc/64: Call setup_barrier_nospec() from setup_arch() Currently we require platform code to call setup_barrier_nospec(). But if we add an empty definition for the !CONFIG_PPC_BARRIER_NOSPEC case then we can call it in setup_arch(). Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/setup.h | 4 ++++ arch/powerpc/kernel/setup-common.c | 2 ++ arch/powerpc/platforms/powernv/setup.c | 1 - arch/powerpc/platforms/pseries/setup.c | 1 - 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 8205f9fdfd67..1a951b00465d 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -52,7 +52,11 @@ enum l1d_flush_type { void setup_rfi_flush(enum l1d_flush_type, bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); +#ifdef CONFIG_PPC_BARRIER_NOSPEC void setup_barrier_nospec(void); +#else +static inline void setup_barrier_nospec(void) { }; +#endif void do_barrier_nospec_fixups(bool enable); extern bool barrier_nospec_enabled; diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 40b44bb53a4e..93fa0c99681e 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -972,6 +972,8 @@ void __init setup_arch(char **cmdline_p) if (ppc_md.setup_arch) ppc_md.setup_arch(); + setup_barrier_nospec(); + paging_init(); /* Initialize the MMU context management stuff. */ diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 7b09f66ee8c6..3fc25304af38 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -124,7 +124,6 @@ static void pnv_setup_rfi_flush(void) security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV)); setup_rfi_flush(type, enable); - setup_barrier_nospec(); } static void __init pnv_setup_arch(void) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index b411a74b861d..08f0c5de5b09 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -551,7 +551,6 @@ void pseries_setup_rfi_flush(void) security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR); setup_rfi_flush(types, enable); - setup_barrier_nospec(); } #ifdef CONFIG_PCI_IOV -- cgit From 406d2b6ae3420f5bb2b3db6986dc6f0b6dbb637b Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Sat, 28 Jul 2018 09:06:36 +1000 Subject: powerpc/64: Make meltdown reporting Book3S 64 specific In a subsequent patch we will enable building security.c for Book3E. However the NXP platforms are not vulnerable to Meltdown, so make the Meltdown vulnerability reporting PPC_BOOK3S_64 specific. Signed-off-by: Diana Craciun [mpe: Split out of larger patch] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/security.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 8ee1ade845c6..206488603b66 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -92,6 +92,7 @@ static __init int barrier_nospec_debugfs_init(void) device_initcall(barrier_nospec_debugfs_init); #endif /* CONFIG_DEBUG_FS */ +#ifdef CONFIG_PPC_BOOK3S_64 ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { bool thread_priv; @@ -124,6 +125,7 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha return sprintf(buf, "Vulnerable\n"); } +#endif ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) { -- cgit From ebcd1bfc33c7a90df941df68a6e5d4018c022fba Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Sat, 28 Jul 2018 09:06:37 +1000 Subject: powerpc/fsl: Add barrier_nospec implementation for NXP PowerPC Book3E Implement the barrier_nospec as a isync;sync instruction sequence. The implementation uses the infrastructure built for BOOK3S 64. Signed-off-by: Diana Craciun [mpe: Split out of larger patch] Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/barrier.h | 8 +++++++- arch/powerpc/lib/feature-fixups.c | 31 +++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 846c89ed7fa3..8b6cd6780d8f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -245,7 +245,7 @@ config PPC config PPC_BARRIER_NOSPEC bool default y - depends on PPC_BOOK3S_64 + depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E config GENERIC_CSUM def_bool n diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index cdc6960506e2..fbe8df433019 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -78,12 +78,18 @@ do { \ ___p1; \ }) +#ifdef CONFIG_PPC_BOOK3S_64 +#define NOSPEC_BARRIER_SLOT nop +#elif defined(CONFIG_PPC_FSL_BOOK3E) +#define NOSPEC_BARRIER_SLOT nop; nop +#endif + #ifdef CONFIG_PPC_BARRIER_NOSPEC /* * Prevent execution of subsequent instructions until preceding branches have * been fully resolved and are no longer executing speculatively. */ -#define barrier_nospec_asm NOSPEC_BARRIER_FIXUP_SECTION; nop +#define barrier_nospec_asm NOSPEC_BARRIER_FIXUP_SECTION; NOSPEC_BARRIER_SLOT // This also acts as a compiler barrier due to the memory clobber. #define barrier_nospec() asm (stringify_in_c(barrier_nospec_asm) ::: "memory") diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 0e604b41b5d1..e613b02bb2f0 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -318,6 +318,37 @@ void do_barrier_nospec_fixups(bool enable) } #endif /* CONFIG_PPC_BARRIER_NOSPEC */ +#ifdef CONFIG_PPC_FSL_BOOK3E +void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end) +{ + unsigned int instr[2], *dest; + long *start, *end; + int i; + + start = fixup_start; + end = fixup_end; + + instr[0] = PPC_INST_NOP; + instr[1] = PPC_INST_NOP; + + if (enable) { + pr_info("barrier-nospec: using isync; sync as speculation barrier\n"); + instr[0] = PPC_INST_ISYNC; + instr[1] = PPC_INST_SYNC; + } + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + patch_instruction(dest, instr[0]); + patch_instruction(dest + 1, instr[1]); + } + + printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); +} +#endif /* CONFIG_PPC_FSL_BOOK3E */ + void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) { long *start, *end; -- cgit From c28218d4abbf4f2035495334d8bfcba64bda4787 Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Sat, 28 Jul 2018 09:06:38 +1000 Subject: powerpc/fsl: Sanitize the syscall table for NXP PowerPC 32 bit platforms Used barrier_nospec to sanitize the syscall table. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_32.S | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 3bd097be90d9..e58c3f467db5 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -35,6 +35,7 @@ #include #include #include +#include /* * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE. @@ -360,6 +361,15 @@ syscall_dotrace_cont: ori r10,r10,sys_call_table@l slwi r0,r0,2 bge- 66f + + barrier_nospec_asm + /* + * Prevent the load of the handler below (based on the user-passed + * system call number) being speculatively executed until the test + * against NR_syscalls and branch to .66f above has + * committed. + */ + lwzx r10,r10,r0 /* Fetch system call handler [ptr] */ mtlr r10 addi r9,r1,STACK_FRAME_OVERHEAD -- cgit From 26cb1f36c43ee6e89d2a9f48a5a7500d5248f836 Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Sat, 28 Jul 2018 09:06:39 +1000 Subject: Documentation: Add nospectre_v1 parameter Currently only supported on powerpc. Signed-off-by: Diana Craciun Signed-off-by: Michael Ellerman --- Documentation/admin-guide/kernel-parameters.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index efc7aa7a0670..4167bbea51e1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2687,6 +2687,10 @@ nosmt [KNL,S390] Disable symmetric multithreading (SMT). Equivalent to smt=1. + nospectre_v1 [PPC] Disable mitigations for Spectre Variant 1 (bounds + check bypass). With this option data leaks are possible + in the system. + nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2 (indirect branch prediction) vulnerability. System may allow data leaks with this option, which is equivalent -- cgit From 06d0bbc6d0f56dacac3a79900e9a9a0d5972d818 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 24 Jul 2018 01:07:52 +1000 Subject: powerpc/asm: Add a patch_site macro & helpers for patching instructions Add a macro and some helper C functions for patching single asm instructions. The gas macro means we can do something like: 1: nop patch_site 1b, patch__foo Which is less visually distracting than defining a GLOBAL symbol at 1, and also doesn't pollute the symbol table which can confuse eg. perf. These are obviously similar to our existing feature sections, but are not automatically patched based on CPU/MMU features, rather they are designed to be manually patched by C code at some arbitrary point. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/code-patching-asm.h | 18 ++++++++++++++++++ arch/powerpc/include/asm/code-patching.h | 2 ++ arch/powerpc/lib/code-patching.c | 16 ++++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 arch/powerpc/include/asm/code-patching-asm.h diff --git a/arch/powerpc/include/asm/code-patching-asm.h b/arch/powerpc/include/asm/code-patching-asm.h new file mode 100644 index 000000000000..ed7b1448493a --- /dev/null +++ b/arch/powerpc/include/asm/code-patching-asm.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright 2018, Michael Ellerman, IBM Corporation. + */ +#ifndef _ASM_POWERPC_CODE_PATCHING_ASM_H +#define _ASM_POWERPC_CODE_PATCHING_ASM_H + +/* Define a "site" that can be patched */ +.macro patch_site label name + .pushsection ".rodata" + .balign 4 + .global \name +\name: + .4byte \label - . + .popsection +.endm + +#endif /* _ASM_POWERPC_CODE_PATCHING_ASM_H */ diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 9ecc7bfc8ae7..31733a95bbd0 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -33,6 +33,8 @@ unsigned int create_cond_branch(const unsigned int *addr, int patch_branch(unsigned int *addr, unsigned long target, int flags); int patch_instruction(unsigned int *addr, unsigned int instr); int raw_patch_instruction(unsigned int *addr, unsigned int instr); +int patch_instruction_site(s32 *addr, unsigned int instr); +int patch_branch_site(s32 *site, unsigned long target, int flags); int instr_is_relative_branch(unsigned int instr); int instr_is_relative_link_branch(unsigned int instr); diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index e0d881ab304e..850f3b8f4da5 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -195,6 +195,22 @@ int patch_branch(unsigned int *addr, unsigned long target, int flags) return patch_instruction(addr, create_branch(addr, target, flags)); } +int patch_branch_site(s32 *site, unsigned long target, int flags) +{ + unsigned int *addr; + + addr = (unsigned int *)((unsigned long)site + *site); + return patch_instruction(addr, create_branch(addr, target, flags)); +} + +int patch_instruction_site(s32 *site, unsigned int instr) +{ + unsigned int *addr; + + addr = (unsigned int *)((unsigned long)site + *site); + return patch_instruction(addr, instr); +} + bool is_offset_in_branch_range(long offset) { /* -- cgit From dc8c6cce9a26a51fc19961accb978217a3ba8c75 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 24 Jul 2018 01:07:53 +1000 Subject: powerpc/64s: Add new security feature flags for count cache flush Add security feature flags to indicate the need for software to flush the count cache on context switch, and for the presence of a hardware assisted count cache flush. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/security_features.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 44989b22383c..a0d47bc18a5c 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -59,6 +59,9 @@ static inline bool security_ftr_enabled(unsigned long feature) // Indirect branch prediction cache disabled #define SEC_FTR_COUNT_CACHE_DISABLED 0x0000000000000020ull +// bcctr 2,0,0 triggers a hardware assisted count cache flush +#define SEC_FTR_BCCTR_FLUSH_ASSIST 0x0000000000000800ull + // Features indicating need for Spectre/Meltdown mitigations @@ -74,6 +77,9 @@ static inline bool security_ftr_enabled(unsigned long feature) // Firmware configuration indicates user favours security over performance #define SEC_FTR_FAVOUR_SECURITY 0x0000000000000200ull +// Software required to flush count cache on context switch +#define SEC_FTR_FLUSH_COUNT_CACHE 0x0000000000000400ull + // Features enabled by default #define SEC_FTR_DEFAULT \ -- cgit From ee13cb249fabdff8b90aaff61add347749280087 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 24 Jul 2018 01:07:54 +1000 Subject: powerpc/64s: Add support for software count cache flush Some CPU revisions support a mode where the count cache needs to be flushed by software on context switch. Additionally some revisions may have a hardware accelerated flush, in which case the software flush sequence can be shortened. If we detect the appropriate flag from firmware we patch a branch into _switch() which takes us to a count cache flush sequence. That sequence in turn may be patched to return early if we detect that the CPU supports accelerating the flush sequence in hardware. Add debugfs support for reporting the state of the flush, as well as runtime disabling it. And modify the spectre_v2 sysfs file to report the state of the software flush. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/asm-prototypes.h | 6 ++ arch/powerpc/include/asm/security_features.h | 1 + arch/powerpc/kernel/entry_64.S | 54 +++++++++++++++ arch/powerpc/kernel/security.c | 98 ++++++++++++++++++++++++++-- 4 files changed, 154 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 769567b66c0c..70fdc5b9b9fb 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -143,4 +143,10 @@ struct kvm_vcpu; void _kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr); void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr); +/* Patch sites */ +extern s32 patch__call_flush_count_cache; +extern s32 patch__flush_count_cache_return; + +extern long flush_count_cache; + #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index a0d47bc18a5c..759597bf0fd8 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -22,6 +22,7 @@ enum stf_barrier_type { void setup_stf_barrier(void); void do_stf_barrier_fixups(enum stf_barrier_type types); +void setup_count_cache_flush(void); static inline void security_ftr_set(unsigned long feature) { diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index a9e74ecdab0b..2206912ea4f0 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -506,6 +507,57 @@ _GLOBAL(ret_from_kernel_thread) li r3,0 b .Lsyscall_exit +#ifdef CONFIG_PPC_BOOK3S_64 + +#define FLUSH_COUNT_CACHE \ +1: nop; \ + patch_site 1b, patch__call_flush_count_cache + + +#define BCCTR_FLUSH .long 0x4c400420 + +.macro nops number + .rept \number + nop + .endr +.endm + +.balign 32 +.global flush_count_cache +flush_count_cache: + /* Save LR into r9 */ + mflr r9 + + .rept 64 + bl .+4 + .endr + b 1f + nops 6 + + .balign 32 + /* Restore LR */ +1: mtlr r9 + li r9,0x7fff + mtctr r9 + + BCCTR_FLUSH + +2: nop + patch_site 2b patch__flush_count_cache_return + + nops 3 + + .rept 278 + .balign 32 + BCCTR_FLUSH + nops 7 + .endr + + blr +#else +#define FLUSH_COUNT_CACHE +#endif /* CONFIG_PPC_BOOK3S_64 */ + /* * This routine switches between two different tasks. The process * state of one is saved on its kernel stack. Then the state @@ -537,6 +589,8 @@ _GLOBAL(_switch) std r23,_CCR(r1) std r1,KSP(r3) /* Set old stack pointer */ + FLUSH_COUNT_CACHE + /* * On SMP kernels, care must be taken because a task may be * scheduled off CPUx and on to CPUy. Memory ordering must be diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 206488603b66..f6f469fc4073 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -8,6 +8,8 @@ #include #include +#include +#include #include #include #include @@ -15,6 +17,13 @@ unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; +enum count_cache_flush_type { + COUNT_CACHE_FLUSH_NONE = 0x1, + COUNT_CACHE_FLUSH_SW = 0x2, + COUNT_CACHE_FLUSH_HW = 0x4, +}; +static enum count_cache_flush_type count_cache_flush_type; + bool barrier_nospec_enabled; static bool no_nospec; @@ -159,17 +168,29 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED); ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED); - if (bcs || ccd) { + if (bcs || ccd || count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) { + bool comma = false; seq_buf_printf(&s, "Mitigation: "); - if (bcs) + if (bcs) { seq_buf_printf(&s, "Indirect branch serialisation (kernel only)"); + comma = true; + } + + if (ccd) { + if (comma) + seq_buf_printf(&s, ", "); + seq_buf_printf(&s, "Indirect branch cache disabled"); + comma = true; + } - if (bcs && ccd) + if (comma) seq_buf_printf(&s, ", "); - if (ccd) - seq_buf_printf(&s, "Indirect branch cache disabled"); + seq_buf_printf(&s, "Software count cache flush"); + + if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW) + seq_buf_printf(&s, "(hardware accelerated)"); } else seq_buf_printf(&s, "Vulnerable"); @@ -326,4 +347,71 @@ static __init int stf_barrier_debugfs_init(void) } device_initcall(stf_barrier_debugfs_init); #endif /* CONFIG_DEBUG_FS */ + +static void toggle_count_cache_flush(bool enable) +{ + if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) { + patch_instruction_site(&patch__call_flush_count_cache, PPC_INST_NOP); + count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; + pr_info("count-cache-flush: software flush disabled.\n"); + return; + } + + patch_branch_site(&patch__call_flush_count_cache, + (u64)&flush_count_cache, BRANCH_SET_LINK); + + if (!security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) { + count_cache_flush_type = COUNT_CACHE_FLUSH_SW; + pr_info("count-cache-flush: full software flush sequence enabled.\n"); + return; + } + + patch_instruction_site(&patch__flush_count_cache_return, PPC_INST_BLR); + count_cache_flush_type = COUNT_CACHE_FLUSH_HW; + pr_info("count-cache-flush: hardware assisted flush sequence enabled\n"); +} + +void setup_count_cache_flush(void) +{ + toggle_count_cache_flush(true); +} + +#ifdef CONFIG_DEBUG_FS +static int count_cache_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + toggle_count_cache_flush(enable); + + return 0; +} + +static int count_cache_flush_get(void *data, u64 *val) +{ + if (count_cache_flush_type == COUNT_CACHE_FLUSH_NONE) + *val = 0; + else + *val = 1; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get, + count_cache_flush_set, "%llu\n"); + +static __init int count_cache_flush_debugfs_init(void) +{ + debugfs_create_file("count_cache_flush", 0600, powerpc_debugfs_root, + NULL, &fops_count_cache_flush); + return 0; +} +device_initcall(count_cache_flush_debugfs_init); +#endif /* CONFIG_DEBUG_FS */ #endif /* CONFIG_PPC_BOOK3S_64 */ -- cgit From ba72dc171954b782a79d25e0f4b3ed91090c3b1e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 24 Jul 2018 01:07:55 +1000 Subject: powerpc/pseries: Query hypervisor for count cache flush settings Use the existing hypercall to determine the appropriate settings for the count cache flush, and then call the generic powerpc code to set it up based on the security feature flags. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hvcall.h | 2 ++ arch/powerpc/platforms/pseries/setup.c | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 662c8347d699..a0b17f9f1ea4 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -342,10 +342,12 @@ #define H_CPU_CHAR_BRANCH_HINTS_HONORED (1ull << 58) // IBM bit 5 #define H_CPU_CHAR_THREAD_RECONFIG_CTRL (1ull << 57) // IBM bit 6 #define H_CPU_CHAR_COUNT_CACHE_DISABLED (1ull << 56) // IBM bit 7 +#define H_CPU_CHAR_BCCTR_FLUSH_ASSIST (1ull << 54) // IBM bit 9 #define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 #define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 #define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 +#define H_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) // IBM bit 5 /* Flag values used in H_REGISTER_PROC_TBL hcall */ #define PROC_TABLE_OP_MASK 0x18 diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 08f0c5de5b09..395cfe320141 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -501,6 +501,12 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED) security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED); + if (result->character & H_CPU_CHAR_BCCTR_FLUSH_ASSIST) + security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST); + + if (result->behaviour & H_CPU_BEHAV_FLUSH_COUNT_CACHE) + security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE); + /* * The features below are enabled by default, so we instead look to see * if firmware has *disabled* them, and clear them if so. @@ -551,6 +557,7 @@ void pseries_setup_rfi_flush(void) security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR); setup_rfi_flush(types, enable); + setup_count_cache_flush(); } #ifdef CONFIG_PCI_IOV -- cgit From 99d54754d3d5f896a8f616b0b6520662bc99d66b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 24 Jul 2018 01:07:56 +1000 Subject: powerpc/powernv: Query firmware for count cache flush settings Look for fw-features properties to determine the appropriate settings for the count cache flush, and then call the generic powerpc code to set it up based on the security feature flags. Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/setup.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 3fc25304af38..adddde023622 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -78,6 +78,12 @@ static void init_fw_feat_flags(struct device_node *np) if (fw_feature_is("enabled", "fw-count-cache-disabled", np)) security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED); + if (fw_feature_is("enabled", "fw-count-cache-flush-bcctr2,0,0", np)) + security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST); + + if (fw_feature_is("enabled", "needs-count-cache-flush-on-context-switch", np)) + security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE); + /* * The features below are enabled by default, so we instead look to see * if firmware has *disabled* them, and clear them if so. @@ -124,6 +130,7 @@ static void pnv_setup_rfi_flush(void) security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV)); setup_rfi_flush(type, enable); + setup_count_cache_flush(); } static void __init pnv_setup_arch(void) -- cgit From 78ee9946371f5848ddfc88ab1a43867df8f17d83 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jul 2018 22:42:44 +1000 Subject: powerpc/64s: Make rfi_flush_fallback a little more robust Because rfi_flush_fallback runs immediately before the return to userspace it currently runs with the user r1 (stack pointer). This means if we oops in there we will report a bad kernel stack pointer in the exception entry path, eg: Bad kernel stack pointer 7ffff7150e40 at c0000000000023b4 Oops: Bad kernel stack pointer, sig: 6 [#1] LE SMP NR_CPUS=32 NUMA PowerNV Modules linked in: CPU: 0 PID: 1246 Comm: klogd Not tainted 4.18.0-rc2-gcc-7.3.1-00175-g0443f8a69ba3 #7 NIP: c0000000000023b4 LR: 0000000010053e00 CTR: 0000000000000040 REGS: c0000000fffe7d40 TRAP: 4100 Not tainted (4.18.0-rc2-gcc-7.3.1-00175-g0443f8a69ba3) MSR: 9000000002803031 CR: 44000442 XER: 20000000 CFAR: c00000000000bac8 IRQMASK: c0000000f1e66a80 GPR00: 0000000002000000 00007ffff7150e40 00007fff93a99900 0000000000000020 ... NIP [c0000000000023b4] rfi_flush_fallback+0x34/0x80 LR [0000000010053e00] 0x10053e00 Although the NIP tells us where we were, and the TRAP number tells us what happened, it would still be nicer if we could report the actual exception rather than barfing about the stack pointer. We an do that fairly simply by loading the kernel stack pointer on entry and restoring the user value before returning. That way we see a regular oops such as: Unrecoverable exception 4100 at c00000000000239c Oops: Unrecoverable exception, sig: 6 [#1] LE SMP NR_CPUS=32 NUMA PowerNV Modules linked in: CPU: 0 PID: 1251 Comm: klogd Not tainted 4.18.0-rc3-gcc-7.3.1-00097-g4ebfcac65acd-dirty #40 NIP: c00000000000239c LR: 0000000010053e00 CTR: 0000000000000040 REGS: c0000000f1e17bb0 TRAP: 4100 Not tainted (4.18.0-rc3-gcc-7.3.1-00097-g4ebfcac65acd-dirty) MSR: 9000000002803031 CR: 44000442 XER: 20000000 CFAR: c00000000000bac8 IRQMASK: 0 ... NIP [c00000000000239c] rfi_flush_fallback+0x3c/0x80 LR [0000000010053e00] 0x10053e00 Call Trace: [c0000000f1e17e30] [c00000000000b9e4] system_call+0x5c/0x70 (unreliable) Note this shouldn't make the kernel stack pointer vulnerable to a meltdown attack, because it should be flushed from the cache before we return to userspace. The user r1 value will be in the cache, because we load it in the return path, but that is harmless. Signed-off-by: Michael Ellerman Reviewed-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f87d02ec75dd..ea04dfb8c092 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1522,6 +1522,8 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback) TRAMP_REAL_BEGIN(rfi_flush_fallback) SET_SCRATCH0(r13); GET_PACA(r13); + std r1,PACA_EXRFI+EX_R12(r13) + ld r1,PACAKSAVE(r13) std r9,PACA_EXRFI+EX_R9(r13) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) @@ -1556,12 +1558,15 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback) ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) ld r11,PACA_EXRFI+EX_R11(r13) + ld r1,PACA_EXRFI+EX_R12(r13) GET_SCRATCH0(r13); rfid TRAMP_REAL_BEGIN(hrfi_flush_fallback) SET_SCRATCH0(r13); GET_PACA(r13); + std r1,PACA_EXRFI+EX_R12(r13) + ld r1,PACAKSAVE(r13) std r9,PACA_EXRFI+EX_R9(r13) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) @@ -1596,6 +1601,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) ld r11,PACA_EXRFI+EX_R11(r13) + ld r1,PACA_EXRFI+EX_R12(r13) GET_SCRATCH0(r13); hrfid -- cgit From edba42cd14dbb0cc8b48cf786b006ac5da4cd9a7 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 31 Jul 2018 22:08:41 +1000 Subject: selftests/powerpc: Skip earlier in alignment_handler test Currently the alignment_handler test prints "Can't open /dev/fb0" about 80 times per run, which is a little annoying. Refactor it to check earlier if it can open /dev/fb0 and skip if not, this results in each test printing something like: test: test_alignment_handler_vsx_206 tags: git_version:v4.18-rc3-134-gfb21a48904aa [SKIP] Test skipped on line 291 skip: test_alignment_handler_vsx_206 Signed-off-by: Michael Ellerman Acked-by: Andrew Donnellan --- .../powerpc/alignment/alignment_handler.c | 40 +++++++++++++++++++--- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index 0f2698f9fd6d..0eddd16af49f 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -191,7 +192,7 @@ int test_memcmp(void *s1, void *s2, int n, int offset, char *test_name) */ int do_test(char *test_name, void (*test_func)(char *, char *)) { - int offset, width, fd, rc = 0, r; + int offset, width, fd, rc, r; void *mem0, *mem1, *ci0, *ci1; printf("\tDoing %s:\t", test_name); @@ -199,8 +200,8 @@ int do_test(char *test_name, void (*test_func)(char *, char *)) fd = open("/dev/fb0", O_RDWR); if (fd < 0) { printf("\n"); - perror("Can't open /dev/fb0"); - SKIP_IF(1); + perror("Can't open /dev/fb0 now?"); + return 1; } ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED, @@ -226,6 +227,7 @@ int do_test(char *test_name, void (*test_func)(char *, char *)) return rc; } + rc = 0; /* offset = 0 no alignment fault, so skip */ for (offset = 1; offset < 16; offset++) { width = 16; /* vsx == 16 bytes */ @@ -244,32 +246,50 @@ int do_test(char *test_name, void (*test_func)(char *, char *)) r |= test_memcpy(mem1, mem0, width, offset, test_func); if (r && !debug) { printf("FAILED: Got signal"); + rc = 1; break; } r |= test_memcmp(mem1, ci1, width, offset, test_name); - rc |= r; if (r && !debug) { printf("FAILED: Wrong Data"); + rc = 1; break; } } - if (!r) + + if (rc == 0) printf("PASSED"); + printf("\n"); munmap(ci0, bufsize); munmap(ci1, bufsize); free(mem0); free(mem1); + close(fd); return rc; } +static bool can_open_fb0(void) +{ + int fd; + + fd = open("/dev/fb0", O_RDWR); + if (fd < 0) + return false; + + close(fd); + return true; +} + int test_alignment_handler_vsx_206(void) { int rc = 0; + SKIP_IF(!can_open_fb0()); + printf("VSX: 2.06B\n"); LOAD_VSX_XFORM_TEST(lxvd2x); LOAD_VSX_XFORM_TEST(lxvw4x); @@ -285,6 +305,8 @@ int test_alignment_handler_vsx_207(void) { int rc = 0; + SKIP_IF(!can_open_fb0()); + printf("VSX: 2.07B\n"); LOAD_VSX_XFORM_TEST(lxsspx); LOAD_VSX_XFORM_TEST(lxsiwax); @@ -298,6 +320,8 @@ int test_alignment_handler_vsx_300(void) { int rc = 0; + SKIP_IF(!can_open_fb0()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00)); printf("VSX: 3.00B\n"); LOAD_VMX_DFORM_TEST(lxsd); @@ -328,6 +352,8 @@ int test_alignment_handler_integer(void) { int rc = 0; + SKIP_IF(!can_open_fb0()); + printf("Integer\n"); LOAD_DFORM_TEST(lbz); LOAD_DFORM_TEST(lbzu); @@ -383,6 +409,8 @@ int test_alignment_handler_vmx(void) { int rc = 0; + SKIP_IF(!can_open_fb0()); + printf("VMX\n"); LOAD_VMX_XFORM_TEST(lvx); @@ -408,6 +436,8 @@ int test_alignment_handler_fp(void) { int rc = 0; + SKIP_IF(!can_open_fb0()); + printf("Floating point\n"); LOAD_FLOAT_DFORM_TEST(lfd); LOAD_FLOAT_XFORM_TEST(lfdx); -- cgit From 8e4bdc699bf32c58bfff7b2bfac204a6bf5efb69 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 31 Jul 2018 22:08:42 +1000 Subject: selftests/powerpc: Add more version checks to alignment_handler test The alignment_handler is documented to only work on Power8/Power9, but we can make it run on older CPUs by guarding more of the tests with feature checks. Signed-off-by: Michael Ellerman Reviewed-by: Andrew Donnellan --- .../powerpc/alignment/alignment_handler.c | 67 +++++++++++++++++++--- 1 file changed, 59 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index 0eddd16af49f..169a8b9719fb 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -49,6 +49,8 @@ #include #include +#include + #include "utils.h" int bufsize; @@ -289,6 +291,7 @@ int test_alignment_handler_vsx_206(void) int rc = 0; SKIP_IF(!can_open_fb0()); + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); printf("VSX: 2.06B\n"); LOAD_VSX_XFORM_TEST(lxvd2x); @@ -306,6 +309,7 @@ int test_alignment_handler_vsx_207(void) int rc = 0; SKIP_IF(!can_open_fb0()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); printf("VSX: 2.07B\n"); LOAD_VSX_XFORM_TEST(lxsspx); @@ -380,7 +384,6 @@ int test_alignment_handler_integer(void) LOAD_DFORM_TEST(ldu); LOAD_XFORM_TEST(ldx); LOAD_XFORM_TEST(ldux); - LOAD_XFORM_TEST(ldbrx); LOAD_DFORM_TEST(lmw); STORE_DFORM_TEST(stb); STORE_XFORM_TEST(stbx); @@ -400,8 +403,23 @@ int test_alignment_handler_integer(void) STORE_XFORM_TEST(stdx); STORE_DFORM_TEST(stdu); STORE_XFORM_TEST(stdux); - STORE_XFORM_TEST(stdbrx); STORE_DFORM_TEST(stmw); + + return rc; +} + +int test_alignment_handler_integer_206(void) +{ + int rc = 0; + + SKIP_IF(!can_open_fb0()); + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); + + printf("Integer: 2.06\n"); + + LOAD_XFORM_TEST(ldbrx); + STORE_XFORM_TEST(stdbrx); + return rc; } @@ -410,6 +428,7 @@ int test_alignment_handler_vmx(void) int rc = 0; SKIP_IF(!can_open_fb0()); + SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC)); printf("VMX\n"); LOAD_VMX_XFORM_TEST(lvx); @@ -441,20 +460,14 @@ int test_alignment_handler_fp(void) printf("Floating point\n"); LOAD_FLOAT_DFORM_TEST(lfd); LOAD_FLOAT_XFORM_TEST(lfdx); - LOAD_FLOAT_DFORM_TEST(lfdp); - LOAD_FLOAT_XFORM_TEST(lfdpx); LOAD_FLOAT_DFORM_TEST(lfdu); LOAD_FLOAT_XFORM_TEST(lfdux); LOAD_FLOAT_DFORM_TEST(lfs); LOAD_FLOAT_XFORM_TEST(lfsx); LOAD_FLOAT_DFORM_TEST(lfsu); LOAD_FLOAT_XFORM_TEST(lfsux); - LOAD_FLOAT_XFORM_TEST(lfiwzx); - LOAD_FLOAT_XFORM_TEST(lfiwax); STORE_FLOAT_DFORM_TEST(stfd); STORE_FLOAT_XFORM_TEST(stfdx); - STORE_FLOAT_DFORM_TEST(stfdp); - STORE_FLOAT_XFORM_TEST(stfdpx); STORE_FLOAT_DFORM_TEST(stfdu); STORE_FLOAT_XFORM_TEST(stfdux); STORE_FLOAT_DFORM_TEST(stfs); @@ -466,6 +479,38 @@ int test_alignment_handler_fp(void) return rc; } +int test_alignment_handler_fp_205(void) +{ + int rc = 0; + + SKIP_IF(!can_open_fb0()); + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_05)); + + printf("Floating point: 2.05\n"); + + LOAD_FLOAT_DFORM_TEST(lfdp); + LOAD_FLOAT_XFORM_TEST(lfdpx); + LOAD_FLOAT_XFORM_TEST(lfiwax); + STORE_FLOAT_DFORM_TEST(stfdp); + STORE_FLOAT_XFORM_TEST(stfdpx); + + return rc; +} + +int test_alignment_handler_fp_206(void) +{ + int rc = 0; + + SKIP_IF(!can_open_fb0()); + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); + + printf("Floating point: 2.06\n"); + + LOAD_FLOAT_XFORM_TEST(lfiwzx); + + return rc; +} + void usage(char *prog) { printf("Usage: %s [options]\n", prog); @@ -513,9 +558,15 @@ int main(int argc, char *argv[]) "test_alignment_handler_vsx_300"); rc |= test_harness(test_alignment_handler_integer, "test_alignment_handler_integer"); + rc |= test_harness(test_alignment_handler_integer_206, + "test_alignment_handler_integer_206"); rc |= test_harness(test_alignment_handler_vmx, "test_alignment_handler_vmx"); rc |= test_harness(test_alignment_handler_fp, "test_alignment_handler_fp"); + rc |= test_harness(test_alignment_handler_fp_205, + "test_alignment_handler_fp_205"); + rc |= test_harness(test_alignment_handler_fp_206, + "test_alignment_handler_fp_206"); return rc; } -- cgit From 658b0f92bc7003bc734471f61bf7cd56339eb8c3 Mon Sep 17 00:00:00 2001 From: Murilo Opsfelder Araujo Date: Wed, 1 Aug 2018 18:33:15 -0300 Subject: powerpc/traps: Print unhandled signals in a separate function Isolate the logic of printing unhandled signals out of _exception_pkey(). No functional change, only code rearrangement. Signed-off-by: Murilo Opsfelder Araujo Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/traps.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 0e17dcb48720..cbd3dc365193 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -301,26 +301,32 @@ void user_single_step_siginfo(struct task_struct *tsk, info->si_addr = (void __user *)regs->nip; } +static void show_signal_msg(int signr, struct pt_regs *regs, int code, + unsigned long addr) +{ + const char fmt32[] = KERN_INFO "%s[%d]: unhandled signal %d " \ + "at %08lx nip %08lx lr %08lx code %x\n"; + const char fmt64[] = KERN_INFO "%s[%d]: unhandled signal %d " \ + "at %016lx nip %016lx lr %016lx code %x\n"; + + if (show_unhandled_signals && unhandled_signal(current, signr)) { + printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, + current->comm, current->pid, signr, + addr, regs->nip, regs->link, code); + } +} void _exception_pkey(int signr, struct pt_regs *regs, int code, - unsigned long addr, int key) + unsigned long addr, int key) { siginfo_t info; - const char fmt32[] = KERN_INFO "%s[%d]: unhandled signal %d " \ - "at %08lx nip %08lx lr %08lx code %x\n"; - const char fmt64[] = KERN_INFO "%s[%d]: unhandled signal %d " \ - "at %016lx nip %016lx lr %016lx code %x\n"; if (!user_mode(regs)) { die("Exception in kernel mode", regs, signr); return; } - if (show_unhandled_signals && unhandled_signal(current, signr)) { - printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, - current->comm, current->pid, signr, - addr, regs->nip, regs->link, code); - } + show_signal_msg(signr, regs, code, addr); if (arch_irqs_disabled() && !arch_irq_disabled_regs(regs)) local_irq_enable(); -- cgit From 35a52a10c3ac5b425ff85c12913458b4741ab866 Mon Sep 17 00:00:00 2001 From: Murilo Opsfelder Araujo Date: Wed, 1 Aug 2018 18:33:16 -0300 Subject: powerpc/traps: Use an explicit ratelimit state for show_signal_msg() Replace printk_ratelimited() by printk() and a default rate limit burst to limit displaying unhandled signals messages. This will allow us to call print_vma_addr() in a future patch, which does not work with printk_ratelimited(). Signed-off-by: Murilo Opsfelder Araujo Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/traps.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index cbd3dc365193..47c6620d1609 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -301,6 +301,13 @@ void user_single_step_siginfo(struct task_struct *tsk, info->si_addr = (void __user *)regs->nip; } +static bool show_unhandled_signals_ratelimited(void) +{ + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + return show_unhandled_signals && __ratelimit(&rs); +} + static void show_signal_msg(int signr, struct pt_regs *regs, int code, unsigned long addr) { @@ -309,11 +316,15 @@ static void show_signal_msg(int signr, struct pt_regs *regs, int code, const char fmt64[] = KERN_INFO "%s[%d]: unhandled signal %d " \ "at %016lx nip %016lx lr %016lx code %x\n"; - if (show_unhandled_signals && unhandled_signal(current, signr)) { - printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, - current->comm, current->pid, signr, - addr, regs->nip, regs->link, code); - } + if (!show_unhandled_signals_ratelimited()) + return; + + if (!unhandled_signal(current, signr)) + return; + + printk(regs->msr & MSR_64BIT ? fmt64 : fmt32, + current->comm, current->pid, signr, + addr, regs->nip, regs->link, code); } void _exception_pkey(int signr, struct pt_regs *regs, int code, -- cgit From 49d8f2011dddac642a20491a698ad07059ed6096 Mon Sep 17 00:00:00 2001 From: Murilo Opsfelder Araujo Date: Wed, 1 Aug 2018 18:33:17 -0300 Subject: powerpc/traps: Use %lx format in show_signal_msg() Use %lx format to print registers. This avoids having two different formats and avoids checking for MSR_64BIT, improving readability of the function. Even though we could have used %px, which is functionally equivalent to %lx as per Documentation/core-api/printk-formats.rst, it is not semantically correct because the data printed are not pointers. And using %px requires casting data to (void *). Besides that, %lx matches the format used in show_regs(). Before this patch: pandafault[4808]: unhandled signal 11 at 0000000010000718 nip 0000000010000574 lr 00007fff935e7a6c code 2 After this patch: pandafault[4732]: unhandled signal 11 at 10000718 nip 10000574 lr 7fff86697a6c code 2 Signed-off-by: Murilo Opsfelder Araujo Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/traps.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 47c6620d1609..eb83cf6b5cb2 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -311,20 +311,15 @@ static bool show_unhandled_signals_ratelimited(void) static void show_signal_msg(int signr, struct pt_regs *regs, int code, unsigned long addr) { - const char fmt32[] = KERN_INFO "%s[%d]: unhandled signal %d " \ - "at %08lx nip %08lx lr %08lx code %x\n"; - const char fmt64[] = KERN_INFO "%s[%d]: unhandled signal %d " \ - "at %016lx nip %016lx lr %016lx code %x\n"; - if (!show_unhandled_signals_ratelimited()) return; if (!unhandled_signal(current, signr)) return; - printk(regs->msr & MSR_64BIT ? fmt64 : fmt32, - current->comm, current->pid, signr, - addr, regs->nip, regs->link, code); + pr_info("%s[%d]: unhandled signal %d at %lx nip %lx lr %lx code %x\n", + current->comm, current->pid, signr, + addr, regs->nip, regs->link, code); } void _exception_pkey(int signr, struct pt_regs *regs, int code, -- cgit From 0f642d616b8b15647ca34786877b298ff2970713 Mon Sep 17 00:00:00 2001 From: Murilo Opsfelder Araujo Date: Wed, 1 Aug 2018 18:33:18 -0300 Subject: powerpc/traps: Print VMA for unhandled signals This adds VMA address in the message printed for unhandled signals, similarly to what other architectures, like x86, print. Before this patch, a page fault looked like: pandafault[61470]: unhandled signal 11 at 100007d0 nip 1000061c lr 7fff8d185100 code 2 After this patch, a page fault looks like: pandafault[6303]: segfault 11 at 100007d0 nip 1000061c lr 7fff93c55100 code 2 in pandafault[10000000+10000] Signed-off-by: Murilo Opsfelder Araujo Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/traps.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index eb83cf6b5cb2..171c504d78fb 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -96,6 +96,19 @@ EXPORT_SYMBOL(__debugger_fault_handler); #define TM_DEBUG(x...) do { } while(0) #endif +static const char *signame(int signr) +{ + switch (signr) { + case SIGBUS: return "bus error"; + case SIGFPE: return "floating point exception"; + case SIGILL: return "illegal instruction"; + case SIGSEGV: return "segfault"; + case SIGTRAP: return "unhandled trap"; + } + + return "unknown signal"; +} + /* * Trap & Exception support */ @@ -317,9 +330,13 @@ static void show_signal_msg(int signr, struct pt_regs *regs, int code, if (!unhandled_signal(current, signr)) return; - pr_info("%s[%d]: unhandled signal %d at %lx nip %lx lr %lx code %x\n", - current->comm, current->pid, signr, + pr_info("%s[%d]: %s (%d) at %lx nip %lx lr %lx code %x", + current->comm, current->pid, signame(signr), signr, addr, regs->nip, regs->link, code); + + print_vma_addr(KERN_CONT " in ", regs->nip); + + pr_cont("\n"); } void _exception_pkey(int signr, struct pt_regs *regs, int code, -- cgit From 88b0fe17573592a8e3196bf143f865da460178e7 Mon Sep 17 00:00:00 2001 From: Murilo Opsfelder Araujo Date: Wed, 1 Aug 2018 18:33:19 -0300 Subject: powerpc: Add show_user_instructions() show_user_instructions() is a slightly modified version of show_instructions() that allows userspace instruction dump. This will be useful within show_signal_msg() to dump userspace instructions of the faulty location. Here is a sample of what show_user_instructions() outputs: pandafault[10850]: code: 4bfffeec 4bfffee8 3c401002 38427f00 fbe1fff8 f821ffc1 7c3f0b78 3d22fffe pandafault[10850]: code: 392988d0 f93f0020 e93f0020 39400048 <99490000> 39200000 7d234b78 383f0040 The current->comm and current->pid printed can serve as a glue that links the instructions dump to its originator, allowing messages to be interleaved in the logs. Signed-off-by: Murilo Opsfelder Araujo Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/stacktrace.h | 13 +++++++++++++ arch/powerpc/kernel/process.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 arch/powerpc/include/asm/stacktrace.h diff --git a/arch/powerpc/include/asm/stacktrace.h b/arch/powerpc/include/asm/stacktrace.h new file mode 100644 index 000000000000..6149b53b3bc8 --- /dev/null +++ b/arch/powerpc/include/asm/stacktrace.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Stack trace functions. + * + * Copyright 2018, Murilo Opsfelder Araujo, IBM Corporation. + */ + +#ifndef _ASM_POWERPC_STACKTRACE_H +#define _ASM_POWERPC_STACKTRACE_H + +void show_user_instructions(struct pt_regs *regs); + +#endif /* _ASM_POWERPC_STACKTRACE_H */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 2172f9908633..913c5725cdb2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1299,6 +1299,38 @@ static void show_instructions(struct pt_regs *regs) pr_cont("\n"); } +void show_user_instructions(struct pt_regs *regs) +{ + unsigned long pc; + int i; + + pc = regs->nip - (instructions_to_print * 3 / 4 * sizeof(int)); + + pr_info("%s[%d]: code: ", current->comm, current->pid); + + for (i = 0; i < instructions_to_print; i++) { + int instr; + + if (!(i % 8) && (i > 0)) { + pr_cont("\n"); + pr_info("%s[%d]: code: ", current->comm, current->pid); + } + + if (probe_kernel_address((unsigned int __user *)pc, instr)) { + pr_cont("XXXXXXXX "); + } else { + if (regs->nip == pc) + pr_cont("<%08x> ", instr); + else + pr_cont("%08x ", instr); + } + + pc += sizeof(int); + } + + pr_cont("\n"); +} + struct regbit { unsigned long bit; const char *name; -- cgit From a99b9c5ed43ecfff5c47c558cc0fcbf8c6503f4c Mon Sep 17 00:00:00 2001 From: Murilo Opsfelder Araujo Date: Wed, 1 Aug 2018 18:33:20 -0300 Subject: powerpc/traps: Show instructions on exceptions Call show_user_instructions() in arch/powerpc/kernel/traps.c to dump instructions at faulty location, useful to debugging. Before this patch, an unhandled signal message looked like: pandafault[10524]: segfault (11) at 100007d0 nip 1000061c lr 7fffbd295100 code 2 in pandafault[10000000+10000] After this patch, it looks like: pandafault[10524]: segfault (11) at 100007d0 nip 1000061c lr 7fffbd295100 code 2 in pandafault[10000000+10000] pandafault[10524]: code: 4bfffeec 4bfffee8 3c401002 38427f00 fbe1fff8 f821ffc1 7c3f0b78 3d22fffe pandafault[10524]: code: 392988d0 f93f0020 e93f0020 39400048 <99490000> 39200000 7d234b78 383f0040 Signed-off-by: Murilo Opsfelder Araujo Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/traps.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 171c504d78fb..070e96f1773a 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -70,6 +70,7 @@ #include #include #include +#include #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) int (*__debugger)(struct pt_regs *regs) __read_mostly; @@ -337,6 +338,8 @@ static void show_signal_msg(int signr, struct pt_regs *regs, int code, print_vma_addr(KERN_CONT " in ", regs->nip); pr_cont("\n"); + + show_user_instructions(regs); } void _exception_pkey(int signr, struct pt_regs *regs, int code, -- cgit From e13606d7321c0c08d4ac2d74a1102680a40cfdee Mon Sep 17 00:00:00 2001 From: Darren Stevens Date: Fri, 3 Aug 2018 21:15:10 +1000 Subject: powerpc/pasemi: Use pr_err/pr_warn... for kernel messages Pasemi code still uses printk(KERN_ERR/KERN_WARN ... change these to pr_err(, pr_warn(... to match other powerpc arch code. No functional changes. Signed-off-by: Darren Stevens [mpe: Unsplit some strings while we're at it] Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pasemi/dma_lib.c | 6 ++-- arch/powerpc/platforms/pasemi/gpio_mdio.c | 2 +- arch/powerpc/platforms/pasemi/idle.c | 4 +-- arch/powerpc/platforms/pasemi/iommu.c | 2 +- arch/powerpc/platforms/pasemi/misc.c | 4 +-- arch/powerpc/platforms/pasemi/pci.c | 5 ++- arch/powerpc/platforms/pasemi/setup.c | 53 +++++++++++++++---------------- 7 files changed, 36 insertions(+), 40 deletions(-) diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c index 2c72263ad6ab..c80f72c370ae 100644 --- a/arch/powerpc/platforms/pasemi/dma_lib.c +++ b/arch/powerpc/platforms/pasemi/dma_lib.c @@ -531,7 +531,7 @@ int pasemi_dma_init(void) iob_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa001, NULL); if (!iob_pdev) { BUG(); - printk(KERN_WARNING "Can't find I/O Bridge\n"); + pr_warn("Can't find I/O Bridge\n"); err = -ENODEV; goto out; } @@ -540,7 +540,7 @@ int pasemi_dma_init(void) dma_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa007, NULL); if (!dma_pdev) { BUG(); - printk(KERN_WARNING "Can't find DMA controller\n"); + pr_warn("Can't find DMA controller\n"); err = -ENODEV; goto out; } @@ -623,7 +623,7 @@ int pasemi_dma_init(void) pasemi_write_dma_reg(PAS_DMA_TXF_CFLG0, 0xffffffff); pasemi_write_dma_reg(PAS_DMA_TXF_CFLG1, 0xffffffff); - printk(KERN_INFO "PA Semi PWRficient DMA library initialized " + pr_info("PA Semi PWRficient DMA library initialized " "(%d tx, %d rx channels)\n", num_txch, num_rxch); out: diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c index c23e60959aa8..6f35a2afe522 100644 --- a/arch/powerpc/platforms/pasemi/gpio_mdio.c +++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c @@ -256,7 +256,7 @@ static int gpio_mdio_probe(struct platform_device *ofdev) err = of_mdiobus_register(new_bus, np); if (err != 0) { - printk(KERN_ERR "%s: Cannot register as MDIO bus, err %d\n", + pr_err("%s: Cannot register as MDIO bus, err %d\n", new_bus->name, err); goto out_free_irq; } diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c index 44e0d9226f0a..8bb4e8082441 100644 --- a/arch/powerpc/platforms/pasemi/idle.c +++ b/arch/powerpc/platforms/pasemi/idle.c @@ -78,13 +78,13 @@ static int pasemi_system_reset_exception(struct pt_regs *regs) static int __init pasemi_idle_init(void) { #ifndef CONFIG_PPC_PASEMI_CPUFREQ - printk(KERN_WARNING "No cpufreq driver, powersavings modes disabled\n"); + pr_warn("No cpufreq driver, powersavings modes disabled\n"); current_mode = 0; #endif ppc_md.system_reset_exception = pasemi_system_reset_exception; ppc_md.power_save = modes[current_mode].entry; - printk(KERN_INFO "Using PA6T idle loop (%s)\n", modes[current_mode].name); + pr_info("Using PA6T idle loop (%s)\n", modes[current_mode].name); return 0; } diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index 78b80cbd9768..f06c83f321e6 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -210,7 +210,7 @@ static int __init iob_init(struct device_node *dn) /* For 2G space, 8x64 pages (2^21 bytes) is max total l2 size */ iob_l2_base = (u32 *)__va(memblock_alloc_base(1UL<<21, 1UL<<21, 0x80000000)); - printk(KERN_INFO "IOBMAP L2 allocated at: %p\n", iob_l2_base); + pr_info("IOBMAP L2 allocated at: %p\n", iob_l2_base); /* Allocate a spare page to map all invalid IOTLB pages. */ tmp = memblock_alloc(IOBMAP_PAGE_SIZE, IOBMAP_PAGE_SIZE); diff --git a/arch/powerpc/platforms/pasemi/misc.c b/arch/powerpc/platforms/pasemi/misc.c index 8571e7bf78b6..aa958a46957f 100644 --- a/arch/powerpc/platforms/pasemi/misc.c +++ b/arch/powerpc/platforms/pasemi/misc.c @@ -69,9 +69,7 @@ static int __init pasemi_register_i2c_devices(void) addr = of_get_property(node, "reg", &len); if (!addr || len < sizeof(int) || *addr > (1 << 10) - 1) { - printk(KERN_WARNING - "pasemi_register_i2c_devices: " - "invalid i2c device entry\n"); + pr_warn("pasemi_register_i2c_devices: invalid i2c device entry\n"); continue; } diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c index 3e3e807d5b6b..c3c64172482d 100644 --- a/arch/powerpc/platforms/pasemi/pci.c +++ b/arch/powerpc/platforms/pasemi/pci.c @@ -205,7 +205,7 @@ static int __init pas_add_bridge(struct device_node *dev) setup_pa_pxp(hose); - printk(KERN_INFO "Found PA-PXP PCI host bridge.\n"); + pr_info("Found PA-PXP PCI host bridge.\n"); /* Interpret the "ranges" property */ pci_process_bridge_OF_ranges(hose, dev, 1); @@ -220,8 +220,7 @@ void __init pas_pci_init(void) root = of_find_node_by_path("/"); if (!root) { - printk(KERN_CRIT "pas_pci_init: can't find root " - "of device tree\n"); + pr_crit("pas_pci_init: can't find root of device tree\n"); return; } diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index d0b8ae53660d..9a6eb04cca83 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -207,8 +207,7 @@ static __init void pas_init_IRQ(void) break; } if (!mpic_node) { - printk(KERN_ERR - "Failed to locate the MPIC interrupt controller\n"); + pr_err("Failed to locate the MPIC interrupt controller\n"); return; } @@ -217,12 +216,12 @@ static __init void pas_init_IRQ(void) naddr = of_n_addr_cells(root); opprop = of_get_property(root, "platform-open-pic", &opplen); if (!opprop) { - printk(KERN_ERR "No platform-open-pic property.\n"); + pr_err("No platform-open-pic property.\n"); of_node_put(root); return; } openpic_addr = of_read_number(opprop, naddr); - printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr); + pr_debug("OpenPIC addr: %lx\n", openpic_addr); mpic_flags = MPIC_LARGE_VECTORS | MPIC_NO_BIAS | MPIC_NO_RESET; @@ -265,72 +264,72 @@ static int pas_machine_check_handler(struct pt_regs *regs) srr1 = regs->msr; if (nmi_virq && mpic_get_mcirq() == nmi_virq) { - printk(KERN_ERR "NMI delivered\n"); + pr_err("NMI delivered\n"); debugger(regs); mpic_end_irq(irq_get_irq_data(nmi_virq)); goto out; } dsisr = mfspr(SPRN_DSISR); - printk(KERN_ERR "Machine Check on CPU %d\n", cpu); - printk(KERN_ERR "SRR0 0x%016lx SRR1 0x%016lx\n", srr0, srr1); - printk(KERN_ERR "DSISR 0x%016lx DAR 0x%016lx\n", dsisr, regs->dar); - printk(KERN_ERR "BER 0x%016lx MER 0x%016lx\n", mfspr(SPRN_PA6T_BER), + pr_err("Machine Check on CPU %d\n", cpu); + pr_err("SRR0 0x%016lx SRR1 0x%016lx\n", srr0, srr1); + pr_err("DSISR 0x%016lx DAR 0x%016lx\n", dsisr, regs->dar); + pr_err("BER 0x%016lx MER 0x%016lx\n", mfspr(SPRN_PA6T_BER), mfspr(SPRN_PA6T_MER)); - printk(KERN_ERR "IER 0x%016lx DER 0x%016lx\n", mfspr(SPRN_PA6T_IER), + pr_err("IER 0x%016lx DER 0x%016lx\n", mfspr(SPRN_PA6T_IER), mfspr(SPRN_PA6T_DER)); - printk(KERN_ERR "Cause:\n"); + pr_err("Cause:\n"); if (srr1 & 0x200000) - printk(KERN_ERR "Signalled by SDC\n"); + pr_err("Signalled by SDC\n"); if (srr1 & 0x100000) { - printk(KERN_ERR "Load/Store detected error:\n"); + pr_err("Load/Store detected error:\n"); if (dsisr & 0x8000) - printk(KERN_ERR "D-cache ECC double-bit error or bus error\n"); + pr_err("D-cache ECC double-bit error or bus error\n"); if (dsisr & 0x4000) - printk(KERN_ERR "LSU snoop response error\n"); + pr_err("LSU snoop response error\n"); if (dsisr & 0x2000) { - printk(KERN_ERR "MMU SLB multi-hit or invalid B field\n"); + pr_err("MMU SLB multi-hit or invalid B field\n"); dump_slb = 1; } if (dsisr & 0x1000) - printk(KERN_ERR "Recoverable Duptags\n"); + pr_err("Recoverable Duptags\n"); if (dsisr & 0x800) - printk(KERN_ERR "Recoverable D-cache parity error count overflow\n"); + pr_err("Recoverable D-cache parity error count overflow\n"); if (dsisr & 0x400) - printk(KERN_ERR "TLB parity error count overflow\n"); + pr_err("TLB parity error count overflow\n"); } if (srr1 & 0x80000) - printk(KERN_ERR "Bus Error\n"); + pr_err("Bus Error\n"); if (srr1 & 0x40000) { - printk(KERN_ERR "I-side SLB multiple hit\n"); + pr_err("I-side SLB multiple hit\n"); dump_slb = 1; } if (srr1 & 0x20000) - printk(KERN_ERR "I-cache parity error hit\n"); + pr_err("I-cache parity error hit\n"); if (num_mce_regs == 0) - printk(KERN_ERR "No MCE registers mapped yet, can't dump\n"); + pr_err("No MCE registers mapped yet, can't dump\n"); else - printk(KERN_ERR "SoC debug registers:\n"); + pr_err("SoC debug registers:\n"); for (i = 0; i < num_mce_regs; i++) - printk(KERN_ERR "%s: 0x%08x\n", mce_regs[i].name, + pr_err("%s: 0x%08x\n", mce_regs[i].name, in_le32(mce_regs[i].addr)); if (dump_slb) { unsigned long e, v; int i; - printk(KERN_ERR "slb contents:\n"); + pr_err("slb contents:\n"); for (i = 0; i < mmu_slb_size; i++) { asm volatile("slbmfee %0,%1" : "=r" (e) : "r" (i)); asm volatile("slbmfev %0,%1" : "=r" (v) : "r" (i)); - printk(KERN_ERR "%02d %016lx %016lx\n", i, e, v); + pr_err("%02d %016lx %016lx\n", i, e, v); } } -- cgit From a0ac3687fb404d59296ecba4642071424d153569 Mon Sep 17 00:00:00 2001 From: Parth Y Shah Date: Fri, 3 Aug 2018 15:50:38 +0530 Subject: misc: cxl: changed asterisk position Resolved <"foo* bar" should be "foo *bar"> error Signed-off-by: Parth Y Shah Signed-off-by: Michael Ellerman --- drivers/misc/cxl/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c index 70dbb6de102c..d45f3e6b17d2 100644 --- a/drivers/misc/cxl/fault.c +++ b/drivers/misc/cxl/fault.c @@ -33,7 +33,7 @@ static bool sste_matches(struct cxl_sste *sste, struct copro_slb *slb) * This finds a free SSTE for the given SLB, or returns NULL if it's already in * the segment table. */ -static struct cxl_sste* find_free_sste(struct cxl_context *ctx, +static struct cxl_sste *find_free_sste(struct cxl_context *ctx, struct copro_slb *slb) { struct cxl_sste *primary, *sste, *ret = NULL; -- cgit From 4231aba000f5a4583dd9f67057aadb68c3eca99d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 27 Jul 2018 21:48:17 +1000 Subject: powerpc/64s: Fix page table fragment refcount race vs speculative references The page table fragment allocator uses the main page refcount racily with respect to speculative references. A customer observed a BUG due to page table page refcount underflow in the fragment allocator. This can be caused by the fragment allocator set_page_count stomping on a speculative reference, and then the speculative failure handler decrements the new reference, and the underflow eventually pops when the page tables are freed. Fix this by using a dedicated field in the struct page for the page table fragment allocator. Fixes: 5c1f6ee9a31c ("powerpc: Reduce PTE table memory wastage") Cc: stable@vger.kernel.org # v3.10+ Reviewed-by: Aneesh Kumar K.V Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/mm/mmu_context_book3s64.c | 8 ++++---- arch/powerpc/mm/pgtable-book3s64.c | 17 +++++++++++------ include/linux/mm_types.h | 5 ++++- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 8b24168ea8c4..4a892d894a0f 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -200,9 +200,9 @@ static void pte_frag_destroy(void *pte_frag) /* drop all the pending references */ count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT; /* We allow PTE_FRAG_NR fragments from a PTE page */ - if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) { + if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) { pgtable_page_dtor(page); - free_unref_page(page); + __free_page(page); } } @@ -215,9 +215,9 @@ static void pmd_frag_destroy(void *pmd_frag) /* drop all the pending references */ count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT; /* We allow PTE_FRAG_NR fragments from a PTE page */ - if (page_ref_sub_and_test(page, PMD_FRAG_NR - count)) { + if (atomic_sub_and_test(PMD_FRAG_NR - count, &page->pt_frag_refcount)) { pgtable_pmd_page_dtor(page); - free_unref_page(page); + __free_page(page); } } diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index 4afbfbb64bfd..78d0b3d5ebad 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -270,6 +270,8 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm) return NULL; } + atomic_set(&page->pt_frag_refcount, 1); + ret = page_address(page); /* * if we support only one fragment just return the @@ -285,7 +287,7 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm) * count. */ if (likely(!mm->context.pmd_frag)) { - set_page_count(page, PMD_FRAG_NR); + atomic_set(&page->pt_frag_refcount, PMD_FRAG_NR); mm->context.pmd_frag = ret + PMD_FRAG_SIZE; } spin_unlock(&mm->page_table_lock); @@ -308,9 +310,10 @@ void pmd_fragment_free(unsigned long *pmd) { struct page *page = virt_to_page(pmd); - if (put_page_testzero(page)) { + BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0); + if (atomic_dec_and_test(&page->pt_frag_refcount)) { pgtable_pmd_page_dtor(page); - free_unref_page(page); + __free_page(page); } } @@ -352,6 +355,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel) return NULL; } + atomic_set(&page->pt_frag_refcount, 1); ret = page_address(page); /* @@ -367,7 +371,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel) * count. */ if (likely(!mm->context.pte_frag)) { - set_page_count(page, PTE_FRAG_NR); + atomic_set(&page->pt_frag_refcount, PTE_FRAG_NR); mm->context.pte_frag = ret + PTE_FRAG_SIZE; } spin_unlock(&mm->page_table_lock); @@ -390,10 +394,11 @@ void pte_fragment_free(unsigned long *table, int kernel) { struct page *page = virt_to_page(table); - if (put_page_testzero(page)) { + BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0); + if (atomic_dec_and_test(&page->pt_frag_refcount)) { if (!kernel) pgtable_page_dtor(page); - free_unref_page(page); + __free_page(page); } } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 99ce070e7dcb..22651e124071 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -139,7 +139,10 @@ struct page { unsigned long _pt_pad_1; /* compound_head */ pgtable_t pmd_huge_pte; /* protected by page->ptl */ unsigned long _pt_pad_2; /* mapping */ - struct mm_struct *pt_mm; /* x86 pgds only */ + union { + struct mm_struct *pt_mm; /* x86 pgds only */ + atomic_t pt_frag_refcount; /* powerpc */ + }; #if ALLOC_SPLIT_PTLOCKS spinlock_t *ptl; #else -- cgit From 7ccc4fe5ff9e3a134e863beed0dba18a5e511659 Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Fri, 18 May 2018 13:05:25 +0530 Subject: powerpc/perf: Remove sched_task function defined for thread-imc Call trace observed while running perf-fuzzer: CPU: 43 PID: 9088 Comm: perf_fuzzer Not tainted 4.13.0-32-generic #35~lp1746225 task: c000003f776ac900 task.stack: c000003f77728000 NIP: c000000000299b70 LR: c0000000002a4534 CTR: c00000000029bb80 REGS: c000003f7772b760 TRAP: 0700 Not tainted (4.13.0-32-generic) MSR: 900000000282b033 CR: 24008822 XER: 00000000 CFAR: c000000000299a70 SOFTE: 0 GPR00: c0000000002a4534 c000003f7772b9e0 c000000001606200 c000003fef858908 GPR04: c000003f776ac900 0000000000000001 ffffffffffffffff 0000003fee730000 GPR08: 0000000000000000 0000000000000000 c0000000011220d8 0000000000000002 GPR12: c00000000029bb80 c000000007a3d900 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 c000003f776ad090 c000000000c71354 GPR24: c000003fef716780 0000003fee730000 c000003fe69d4200 c000003f776ad330 GPR28: c0000000011220d8 0000000000000001 c0000000014c6108 c000003fef858900 NIP [c000000000299b70] perf_pmu_sched_task+0x170/0x180 LR [c0000000002a4534] __perf_event_task_sched_in+0xc4/0x230 Call Trace: perf_iterate_sb+0x158/0x2a0 (unreliable) __perf_event_task_sched_in+0xc4/0x230 finish_task_switch+0x21c/0x310 __schedule+0x304/0xb80 schedule+0x40/0xc0 do_wait+0x254/0x2e0 kernel_wait4+0xa0/0x1a0 SyS_wait4+0x64/0xc0 system_call+0x58/0x6c Instruction dump: 3beafea0 7faa4800 409eff18 e8010060 eb610028 ebc10040 7c0803a6 38210050 eb81ffe0 eba1ffe8 ebe1fff8 4e800020 <0fe00000> 4bffffbc 60000000 60420000 ---[ end trace 8c46856d314c1811 ]--- The context switch call-backs for thread-imc are defined in sched_task function. So when thread-imc events are grouped with software pmu events, perf_pmu_sched_task hits the WARN_ON_ONCE condition, since software PMUs are assumed not to have a sched_task defined. Patch to move the thread_imc enable/disable opal call back from sched_task to event_[add/del] function Fixes: f74c89bd80fb ("powerpc/perf: Add thread IMC PMU support") Signed-off-by: Anju T Sudhakar Reviewed-by: Madhavan Srinivasan Tested-by: Joel Stanley Signed-off-by: Michael Ellerman --- arch/powerpc/perf/imc-pmu.c | 108 +++++++++++++++++++++----------------------- 1 file changed, 51 insertions(+), 57 deletions(-) diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index d1977b61f827..1fafc32b12a0 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -867,59 +867,6 @@ static int thread_imc_cpu_init(void) ppc_thread_imc_cpu_offline); } -void thread_imc_pmu_sched_task(struct perf_event_context *ctx, - bool sched_in) -{ - int core_id; - struct imc_pmu_ref *ref; - - if (!is_core_imc_mem_inited(smp_processor_id())) - return; - - core_id = smp_processor_id() / threads_per_core; - /* - * imc pmus are enabled only when it is used. - * See if this is triggered for the first time. - * If yes, take the mutex lock and enable the counters. - * If not, just increment the count in ref count struct. - */ - ref = &core_imc_refc[core_id]; - if (!ref) - return; - - if (sched_in) { - mutex_lock(&ref->lock); - if (ref->refc == 0) { - if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE, - get_hard_smp_processor_id(smp_processor_id()))) { - mutex_unlock(&ref->lock); - pr_err("thread-imc: Unable to start the counter\ - for core %d\n", core_id); - return; - } - } - ++ref->refc; - mutex_unlock(&ref->lock); - } else { - mutex_lock(&ref->lock); - ref->refc--; - if (ref->refc == 0) { - if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, - get_hard_smp_processor_id(smp_processor_id()))) { - mutex_unlock(&ref->lock); - pr_err("thread-imc: Unable to stop the counters\ - for core %d\n", core_id); - return; - } - } else if (ref->refc < 0) { - ref->refc = 0; - } - mutex_unlock(&ref->lock); - } - - return; -} - static int thread_imc_event_init(struct perf_event *event) { u32 config = event->attr.config; @@ -1046,22 +993,70 @@ static int imc_event_add(struct perf_event *event, int flags) static int thread_imc_event_add(struct perf_event *event, int flags) { + int core_id; + struct imc_pmu_ref *ref; + if (flags & PERF_EF_START) imc_event_start(event, flags); - /* Enable the sched_task to start the engine */ - perf_sched_cb_inc(event->ctx->pmu); + if (!is_core_imc_mem_inited(smp_processor_id())) + return -EINVAL; + + core_id = smp_processor_id() / threads_per_core; + /* + * imc pmus are enabled only when it is used. + * See if this is triggered for the first time. + * If yes, take the mutex lock and enable the counters. + * If not, just increment the count in ref count struct. + */ + ref = &core_imc_refc[core_id]; + if (!ref) + return -EINVAL; + + mutex_lock(&ref->lock); + if (ref->refc == 0) { + if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE, + get_hard_smp_processor_id(smp_processor_id()))) { + mutex_unlock(&ref->lock); + pr_err("thread-imc: Unable to start the counter\ + for core %d\n", core_id); + return -EINVAL; + } + } + ++ref->refc; + mutex_unlock(&ref->lock); return 0; } static void thread_imc_event_del(struct perf_event *event, int flags) { + + int core_id; + struct imc_pmu_ref *ref; + /* * Take a snapshot and calculate the delta and update * the event counter values. */ imc_event_update(event); - perf_sched_cb_dec(event->ctx->pmu); + + core_id = smp_processor_id() / threads_per_core; + ref = &core_imc_refc[core_id]; + + mutex_lock(&ref->lock); + ref->refc--; + if (ref->refc == 0) { + if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, + get_hard_smp_processor_id(smp_processor_id()))) { + mutex_unlock(&ref->lock); + pr_err("thread-imc: Unable to stop the counters\ + for core %d\n", core_id); + return; + } + } else if (ref->refc < 0) { + ref->refc = 0; + } + mutex_unlock(&ref->lock); } /* update_pmu_ops : Populate the appropriate operations for "pmu" */ @@ -1087,7 +1082,6 @@ static int update_pmu_ops(struct imc_pmu *pmu) break; case IMC_DOMAIN_THREAD: pmu->pmu.event_init = thread_imc_event_init; - pmu->pmu.sched_task = thread_imc_pmu_sched_task; pmu->pmu.add = thread_imc_event_add; pmu->pmu.del = thread_imc_event_del; pmu->pmu.start_txn = thread_imc_pmu_start_txn; -- cgit From f5daf77a55ef0e695cc90c440ed6503073ac5e07 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 15 Jul 2018 10:34:46 -0700 Subject: powerpc/platforms/85xx: fix t1042rdb_diu.c build errors & warning Fix build errors and warnings in t1042rdb_diu.c by adding header files and MODULE_LICENSE(). ../arch/powerpc/platforms/85xx/t1042rdb_diu.c:152:1: warning: data definition has no type or storage class early_initcall(t1042rdb_diu_init); ../arch/powerpc/platforms/85xx/t1042rdb_diu.c:152:1: error: type defaults to 'int' in declaration of 'early_initcall' [-Werror=implicit-int] ../arch/powerpc/platforms/85xx/t1042rdb_diu.c:152:1: warning: parameter names (without types) in function declaration and WARNING: modpost: missing MODULE_LICENSE() in arch/powerpc/platforms/85xx/t1042rdb_diu.o Signed-off-by: Randy Dunlap Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Scott Wood Cc: Kumar Gala Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/85xx/t1042rdb_diu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/platforms/85xx/t1042rdb_diu.c b/arch/powerpc/platforms/85xx/t1042rdb_diu.c index 58fa3d319f1c..dac36ba82fea 100644 --- a/arch/powerpc/platforms/85xx/t1042rdb_diu.c +++ b/arch/powerpc/platforms/85xx/t1042rdb_diu.c @@ -9,8 +9,10 @@ * option) any later version. */ +#include #include #include +#include #include #include @@ -150,3 +152,5 @@ static int __init t1042rdb_diu_init(void) } early_initcall(t1042rdb_diu_init); + +MODULE_LICENSE("GPL"); -- cgit From 6e708000ec2c93c2bde6a46aa2d6c3e80d4eaeb9 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Wed, 13 Jun 2018 00:28:57 -0700 Subject: powerpc/powernv: Export opal_check_token symbol Export opal_check_token symbol for modules to check the availability of OPAL calls before using them. Signed-off-by: Haren Myneni Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 6405b6f20193..46f58ff80bcf 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -977,6 +977,7 @@ EXPORT_SYMBOL_GPL(opal_flash_read); EXPORT_SYMBOL_GPL(opal_flash_write); EXPORT_SYMBOL_GPL(opal_flash_erase); EXPORT_SYMBOL_GPL(opal_prd_msg); +EXPORT_SYMBOL_GPL(opal_check_token); /* Convert a region of vmalloc memory to an opal sg list */ struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, -- cgit From 656ecc16e8fc2ab44b3d70e3fcc197a7020d0ca5 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Wed, 13 Jun 2018 00:32:40 -0700 Subject: crypto/nx: Initialize 842 high and normal RxFIFO control registers NX increments readOffset by FIFO size in receive FIFO control register when CRB is read. But the index in RxFIFO has to match with the corresponding entry in FIFO maintained by VAS in kernel. Otherwise NX may be processing incorrect CRBs and can cause CRB timeout. VAS FIFO offset is 0 when the receive window is opened during initialization. When the module is reloaded or in kexec boot, readOffset in FIFO control register may not match with VAS entry. This patch adds nx_coproc_init OPAL call to reset readOffset and queued entries in FIFO control register for both high and normal FIFOs. Signed-off-by: Haren Myneni [mpe: Fixup uninitialized variable warning] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal-api.h | 3 ++- arch/powerpc/include/asm/opal.h | 1 + arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + arch/powerpc/platforms/powernv/opal.c | 2 ++ drivers/crypto/nx/nx-842-powernv.c | 31 +++++++++++++++++++++++--- 5 files changed, 34 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 56a94a1bd754..8365353330b4 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -209,7 +209,8 @@ #define OPAL_SENSOR_GROUP_ENABLE 163 #define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164 #define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165 -#define OPAL_LAST 165 +#define OPAL_NX_COPROC_INIT 167 +#define OPAL_LAST 167 #define QUIESCE_HOLD 1 /* Spin all calls at entry */ #define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 2e81555de643..834e7e29f1e4 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -293,6 +293,7 @@ int opal_get_power_shift_ratio(u32 handle, int token, u32 *psr); int opal_set_power_shift_ratio(u32 handle, int token, u32 psr); int opal_sensor_group_clear(u32 group_hndl, int token); int opal_sensor_group_enable(u32 group_hndl, int token, bool enable); +int opal_nx_coproc_init(uint32_t chip_id, uint32_t ct); s64 opal_signal_system_reset(s32 cpu); s64 opal_quiesce(u64 shutdown_type, s32 cpu); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 029b37c04f35..251528231a9e 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -330,3 +330,4 @@ OPAL_CALL(opal_pci_get_pbcq_tunnel_bar, OPAL_PCI_GET_PBCQ_TUNNEL_BAR); OPAL_CALL(opal_pci_set_pbcq_tunnel_bar, OPAL_PCI_SET_PBCQ_TUNNEL_BAR); OPAL_CALL(opal_sensor_read_u64, OPAL_SENSOR_READ_U64); OPAL_CALL(opal_sensor_group_enable, OPAL_SENSOR_GROUP_ENABLE); +OPAL_CALL(opal_nx_coproc_init, OPAL_NX_COPROC_INIT); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 46f58ff80bcf..404c379db168 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -1090,3 +1090,5 @@ EXPORT_SYMBOL_GPL(opal_write_oppanel_async); EXPORT_SYMBOL_GPL(opal_int_set_mfrr); EXPORT_SYMBOL_GPL(opal_int_eoi); EXPORT_SYMBOL_GPL(opal_error_code); +/* Export the below symbol for NX compression */ +EXPORT_SYMBOL(opal_nx_coproc_init); diff --git a/drivers/crypto/nx/nx-842-powernv.c b/drivers/crypto/nx/nx-842-powernv.c index 36afd6d8753c..c68df7e8bee1 100644 --- a/drivers/crypto/nx/nx-842-powernv.c +++ b/drivers/crypto/nx/nx-842-powernv.c @@ -24,6 +24,8 @@ #include #include #include +#include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Dan Streetman "); @@ -753,7 +755,7 @@ static int nx842_open_percpu_txwins(void) } static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id, - int vasid) + int vasid, int *ct) { struct vas_window *rxwin = NULL; struct vas_rx_win_attr rxattr; @@ -837,6 +839,15 @@ static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id, coproc->vas.id = vasid; nx842_add_coprocs_list(coproc, chip_id); + /* + * (lpid, pid, tid) combination has to be unique for each + * coprocessor instance in the system. So to make it + * unique, skiboot uses coprocessor type such as 842 or + * GZIP for pid and provides this value to kernel in pid + * device-tree property. + */ + *ct = pid; + return 0; err_out: @@ -850,6 +861,7 @@ static int __init nx842_powernv_probe_vas(struct device_node *pn) struct device_node *dn; int chip_id, vasid, ret = 0; int nx_fifo_found = 0; + int uninitialized_var(ct); chip_id = of_get_ibm_chip_id(pn); if (chip_id < 0) { @@ -865,7 +877,7 @@ static int __init nx842_powernv_probe_vas(struct device_node *pn) for_each_child_of_node(pn, dn) { if (of_device_is_compatible(dn, "ibm,p9-nx-842")) { - ret = vas_cfg_coproc_info(dn, chip_id, vasid); + ret = vas_cfg_coproc_info(dn, chip_id, vasid, &ct); if (ret) { of_node_put(dn); return ret; @@ -876,9 +888,22 @@ static int __init nx842_powernv_probe_vas(struct device_node *pn) if (!nx_fifo_found) { pr_err("NX842 FIFO nodes are missing\n"); - ret = -EINVAL; + return -EINVAL; } + /* + * Initialize NX instance for both high and normal priority FIFOs. + */ + if (opal_check_token(OPAL_NX_COPROC_INIT)) { + ret = opal_nx_coproc_init(chip_id, ct); + if (ret) { + pr_err("Failed to initialize NX for chip(%d): %d\n", + chip_id, ret); + ret = opal_error_code(ret); + } + } else + pr_warn("Firmware doesn't support NX initialization\n"); + return ret; } -- cgit From 81d7b08b3cec79a43411e7175401b369090dba29 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Sat, 4 Aug 2018 22:25:00 +0800 Subject: powerpc/powermac: of_node_put() is not needed after iterator for_each_node_by_name() iterators only exit normally when the loop cursor is NULL, So there is no need to call of_node_put(). Signed-off-by: zhong jiang Reviewed-by: Tyrel Datwyler Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powermac/feature.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 3f82cb24eb2b..4eb8cb38fc69 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -2889,10 +2889,8 @@ set_initial_features(void) /* On all machines, switch modem & serial ports off */ for_each_node_by_name(np, "ch-a") initial_serial_shutdown(np); - of_node_put(np); for_each_node_by_name(np, "ch-b") initial_serial_shutdown(np); - of_node_put(np); } void __init -- cgit From a7c81ce398e2ad304f61d6167155f3ef65a96524 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 3 Aug 2018 20:13:03 +1000 Subject: powerpc/64: Make exception table clearer in __copy_tofrom_user_base This aims to make the generation of exception table entries for the loads and stores in __copy_tofrom_user_base clearer and easier to verify. Instead of having a series of local labels on the loads and stores, with a series of corresponding labels later for the exception handlers, we now use macros to generate exception table entries at the point of each load and store that could potentially trap. We do this with the macros lex (load exception) and stex (store exception). These macros are used right before the load or store to which they apply. Some complexity is introduced by the fact that we have some more work to do after hitting an exception, because we need to calculate and return the number of bytes not copied. The code uses r3 as the current pointer into the destination buffer, that is, the address of the first byte of the destination that has not been modified. However, at various points in the copy loops, r3 can be 4, 8, 16 or 24 bytes behind that point. To express this offset in an understandable way, we define a symbol r3_offset which is updated at various points so that it equal to the difference between the address of the first unmodified byte of the destination and the value in r3. (In fact it only needs to be accurate at the point of each lex or stex macro invocation.) The rules for updating r3_offset are as follows: * It starts out at 0 * An addi r3,r3,N instruction decreases r3_offset by N * A store instruction (stb, sth, stw, std) to N(r3) increases r3_offset by the width of the store (1, 2, 4, 8) * A store with update instruction (stbu, sthu, stwu, stdu) to N(r3) sets r3_offset to the width of the store. There is some trickiness to the way that the lex and stex macros and the associated exception handlers work. I would have liked to use the current value of r3_offset in the name of the symbol used as the exception handler, as in ".Lld_exc_$(r3_offset)" and then have symbols .Lld_exc_0, .Lld_exc_8, .Lld_exc_16 etc. corresponding to the offsets that needed to be added to r3. However, I couldn't see a way to do that with gas. Instead, the exception handler address is .Lld_exc - r3_offset or .Lst_exc - r3_offset, that is, the distance ahead of .Lld_exc/.Lst_exc that we start executing is equal to the amount that we need to add to r3. This works because r3_offset is always a small multiple of 4, and our instructions are 4 bytes long. This means that before .Lld_exc and .Lst_exc, we have a sequence of instructions that increments r3 by 4, 8, 16 or 24 depending on where we start. The sequence increments r3 by 4 per instruction (on average). We also replace the exception table for the 4k copy loop by a macro per load or store. These loads and stores all use exactly the same exception handler, which simply resets the argument registers r3, r4 and r5 to there original values and re-does the whole copy using the slower loop. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/lib/copyuser_64.S | 551 +++++++++++++++++------------------------ 1 file changed, 225 insertions(+), 326 deletions(-) diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index 2d6f128d3ebe..8c5033c85e37 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -20,6 +20,28 @@ #define sHd sld /* Shift towards high-numbered address. */ #endif +/* + * These macros are used to generate exception table entries. + * The exception handlers below use the original arguments + * (stored on the stack) and the point where we're up to in + * the destination buffer, i.e. the address of the first + * unmodified byte. Generally r3 points into the destination + * buffer, but the first unmodified byte is at a variable + * offset from r3. In the code below, the symbol r3_offset + * is set to indicate the current offset at each point in + * the code. This offset is then used as a negative offset + * from the exception handler code, and those instructions + * before the exception handlers are addi instructions that + * adjust r3 to point to the correct place. + */ + .macro lex /* exception handler for load */ +100: EX_TABLE(100b, .Lld_exc - r3_offset) + .endm + + .macro stex /* exception handler for store */ +100: EX_TABLE(100b, .Lst_exc - r3_offset) + .endm + .align 7 _GLOBAL_TOC(__copy_tofrom_user) #ifdef CONFIG_PPC_BOOK3S_64 @@ -30,7 +52,7 @@ FTR_SECTION_ELSE ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) #endif _GLOBAL(__copy_tofrom_user_base) - /* first check for a whole page copy on a page boundary */ + /* first check for a 4kB copy on a 4kB boundary */ cmpldi cr1,r5,16 cmpdi cr6,r5,4096 or r0,r3,r4 @@ -59,6 +81,7 @@ ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ CPU_FTR_UNALIGNED_LD_STD) .Ldst_aligned: addi r3,r3,-16 +r3_offset = 16 BEGIN_FTR_SECTION andi. r0,r4,7 bne .Lsrc_unaligned @@ -66,57 +89,69 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) blt cr1,.Ldo_tail /* if < 16 bytes to copy */ srdi r0,r5,5 cmpdi cr1,r0,0 -20: ld r7,0(r4) -220: ld r6,8(r4) +lex; ld r7,0(r4) +lex; ld r6,8(r4) addi r4,r4,16 mtctr r0 andi. r0,r5,0x10 beq 22f addi r3,r3,16 +r3_offset = 0 addi r4,r4,-16 mr r9,r7 mr r8,r6 beq cr1,72f -21: ld r7,16(r4) -221: ld r6,24(r4) +21: +lex; ld r7,16(r4) +lex; ld r6,24(r4) addi r4,r4,32 -70: std r9,0(r3) -270: std r8,8(r3) -22: ld r9,0(r4) -222: ld r8,8(r4) -71: std r7,16(r3) -271: std r6,24(r3) +stex; std r9,0(r3) +r3_offset = 8 +stex; std r8,8(r3) +r3_offset = 16 +22: +lex; ld r9,0(r4) +lex; ld r8,8(r4) +stex; std r7,16(r3) +r3_offset = 24 +stex; std r6,24(r3) addi r3,r3,32 +r3_offset = 0 bdnz 21b -72: std r9,0(r3) -272: std r8,8(r3) +72: +stex; std r9,0(r3) +r3_offset = 8 +stex; std r8,8(r3) +r3_offset = 16 andi. r5,r5,0xf beq+ 3f addi r4,r4,16 .Ldo_tail: addi r3,r3,16 +r3_offset = 0 bf cr7*4+0,246f -244: ld r9,0(r4) +lex; ld r9,0(r4) addi r4,r4,8 -245: std r9,0(r3) +stex; std r9,0(r3) addi r3,r3,8 246: bf cr7*4+1,1f -23: lwz r9,0(r4) +lex; lwz r9,0(r4) addi r4,r4,4 -73: stw r9,0(r3) +stex; stw r9,0(r3) addi r3,r3,4 1: bf cr7*4+2,2f -44: lhz r9,0(r4) +lex; lhz r9,0(r4) addi r4,r4,2 -74: sth r9,0(r3) +stex; sth r9,0(r3) addi r3,r3,2 2: bf cr7*4+3,3f -45: lbz r9,0(r4) -75: stb r9,0(r3) +lex; lbz r9,0(r4) +stex; stb r9,0(r3) 3: li r3,0 blr .Lsrc_unaligned: +r3_offset = 16 srdi r6,r5,3 addi r5,r5,-16 subf r4,r0,r4 @@ -129,58 +164,69 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) add r5,r5,r0 bt cr7*4+0,28f -24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ -25: ld r0,8(r4) +lex; ld r9,0(r4) /* 3+2n loads, 2+2n stores */ +lex; ld r0,8(r4) sLd r6,r9,r10 -26: ldu r9,16(r4) +lex; ldu r9,16(r4) sHd r7,r0,r11 sLd r8,r0,r10 or r7,r7,r6 blt cr6,79f -27: ld r0,8(r4) +lex; ld r0,8(r4) b 2f -28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ -29: ldu r9,8(r4) +28: +lex; ld r0,0(r4) /* 4+2n loads, 3+2n stores */ +lex; ldu r9,8(r4) sLd r8,r0,r10 addi r3,r3,-8 +r3_offset = 24 blt cr6,5f -30: ld r0,8(r4) +lex; ld r0,8(r4) sHd r12,r9,r11 sLd r6,r9,r10 -31: ldu r9,16(r4) +lex; ldu r9,16(r4) or r12,r8,r12 sHd r7,r0,r11 sLd r8,r0,r10 addi r3,r3,16 +r3_offset = 8 beq cr6,78f 1: or r7,r7,r6 -32: ld r0,8(r4) -76: std r12,8(r3) +lex; ld r0,8(r4) +stex; std r12,8(r3) +r3_offset = 16 2: sHd r12,r9,r11 sLd r6,r9,r10 -33: ldu r9,16(r4) +lex; ldu r9,16(r4) or r12,r8,r12 -77: stdu r7,16(r3) +stex; stdu r7,16(r3) +r3_offset = 8 sHd r7,r0,r11 sLd r8,r0,r10 bdnz 1b -78: std r12,8(r3) +78: +stex; std r12,8(r3) +r3_offset = 16 or r7,r7,r6 -79: std r7,16(r3) +79: +stex; std r7,16(r3) +r3_offset = 24 5: sHd r12,r9,r11 or r12,r8,r12 -80: std r12,24(r3) +stex; std r12,24(r3) +r3_offset = 32 bne 6f li r3,0 blr 6: cmpwi cr1,r5,8 addi r3,r3,32 +r3_offset = 0 sLd r9,r9,r10 ble cr1,7f -34: ld r0,8(r4) +lex; ld r0,8(r4) sHd r7,r0,r11 or r9,r7,r9 7: @@ -188,7 +234,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) #ifdef __BIG_ENDIAN__ rotldi r9,r9,32 #endif -94: stw r9,0(r3) +stex; stw r9,0(r3) #ifdef __LITTLE_ENDIAN__ rotrdi r9,r9,32 #endif @@ -197,7 +243,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) #ifdef __BIG_ENDIAN__ rotldi r9,r9,16 #endif -95: sth r9,0(r3) +stex; sth r9,0(r3) #ifdef __LITTLE_ENDIAN__ rotrdi r9,r9,16 #endif @@ -206,7 +252,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) #ifdef __BIG_ENDIAN__ rotldi r9,r9,8 #endif -96: stb r9,0(r3) +stex; stb r9,0(r3) #ifdef __LITTLE_ENDIAN__ rotrdi r9,r9,8 #endif @@ -214,47 +260,55 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) blr .Ldst_unaligned: +r3_offset = 0 PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */ subf r5,r6,r5 li r7,0 cmpldi cr1,r5,16 bf cr7*4+3,1f -35: lbz r0,0(r4) -81: stb r0,0(r3) +100: EX_TABLE(100b, .Lld_exc_r7) + lbz r0,0(r4) +100: EX_TABLE(100b, .Lst_exc_r7) + stb r0,0(r3) addi r7,r7,1 1: bf cr7*4+2,2f -36: lhzx r0,r7,r4 -82: sthx r0,r7,r3 +100: EX_TABLE(100b, .Lld_exc_r7) + lhzx r0,r7,r4 +100: EX_TABLE(100b, .Lst_exc_r7) + sthx r0,r7,r3 addi r7,r7,2 2: bf cr7*4+1,3f -37: lwzx r0,r7,r4 -83: stwx r0,r7,r3 +100: EX_TABLE(100b, .Lld_exc_r7) + lwzx r0,r7,r4 +100: EX_TABLE(100b, .Lst_exc_r7) + stwx r0,r7,r3 3: PPC_MTOCRF(0x01,r5) add r4,r6,r4 add r3,r6,r3 b .Ldst_aligned .Lshort_copy: +r3_offset = 0 bf cr7*4+0,1f -38: lwz r0,0(r4) -39: lwz r9,4(r4) +lex; lwz r0,0(r4) +lex; lwz r9,4(r4) addi r4,r4,8 -84: stw r0,0(r3) -85: stw r9,4(r3) +stex; stw r0,0(r3) +stex; stw r9,4(r3) addi r3,r3,8 1: bf cr7*4+1,2f -40: lwz r0,0(r4) +lex; lwz r0,0(r4) addi r4,r4,4 -86: stw r0,0(r3) +stex; stw r0,0(r3) addi r3,r3,4 2: bf cr7*4+2,3f -41: lhz r0,0(r4) +lex; lhz r0,0(r4) addi r4,r4,2 -87: sth r0,0(r3) +stex; sth r0,0(r3) addi r3,r3,2 3: bf cr7*4+3,4f -42: lbz r0,0(r4) -88: stb r0,0(r3) +lex; lbz r0,0(r4) +stex; stb r0,0(r3) 4: li r3,0 blr @@ -262,48 +316,34 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) * exception handlers follow * we have to return the number of bytes not copied * for an exception on a load, we set the rest of the destination to 0 + * Note that the number of bytes of instructions for adjusting r3 needs + * to equal the amount of the adjustment, due to the trick of using + * .Lld_exc - r3_offset as the handler address. */ -136: -137: +.Lld_exc_r7: add r3,r3,r7 - b 1f -130: -131: + b .Lld_exc + + /* adjust by 24 */ addi r3,r3,8 -120: -320: -122: -322: -124: -125: -126: -127: -128: -129: -133: + nop + /* adjust by 16 */ addi r3,r3,8 -132: + nop + /* adjust by 8 */ addi r3,r3,8 -121: -321: -344: -134: -135: -138: -139: -140: -141: -142: -123: -144: -145: + nop /* - * here we have had a fault on a load and r3 points to the first - * unmodified byte of the destination + * Here we have had a fault on a load and r3 points to the first + * unmodified byte of the destination. We use the original arguments + * and r3 to work out how much wasn't copied. Since we load some + * distance ahead of the stores, we continue copying byte-by-byte until + * we hit the load fault again in order to copy as much as possible. */ -1: ld r6,-24(r1) +.Lld_exc: + ld r6,-24(r1) ld r4,-16(r1) ld r5,-8(r1) subf r6,r6,r3 @@ -314,9 +354,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) * first see if we can copy any more bytes before hitting another exception */ mtctr r5 +r3_offset = 0 +100: EX_TABLE(100b, .Ldone) 43: lbz r0,0(r4) addi r4,r4,1 -89: stb r0,0(r3) +stex; stb r0,0(r3) addi r3,r3,1 bdnz 43b li r3,0 /* huh? all copied successfully this time? */ @@ -325,116 +367,46 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) /* * here we have trapped again, amount remaining is in ctr. */ -143: mfctr r3 +.Ldone: + mfctr r3 blr /* * exception handlers for stores: we just need to work * out how many bytes weren't copied + * Note that the number of bytes of instructions for adjusting r3 needs + * to equal the amount of the adjustment, due to the trick of using + * .Lst_exc - r3_offset as the handler address. */ -182: -183: +.Lst_exc_r7: add r3,r3,r7 - b 1f -371: -180: + b .Lst_exc + + /* adjust by 24 */ addi r3,r3,8 -171: -177: -179: + nop + /* adjust by 16 */ addi r3,r3,8 -370: -372: -176: -178: + nop + /* adjust by 8 */ addi r3,r3,4 -185: + /* adjust by 4 */ addi r3,r3,4 -170: -172: -345: -173: -174: -175: -181: -184: -186: -187: -188: -189: -194: -195: -196: -1: +.Lst_exc: ld r6,-24(r1) ld r5,-8(r1) add r6,r6,r5 - subf r3,r3,r6 /* #bytes not copied */ + subf r3,r3,r6 /* #bytes not copied in r3 */ blr - EX_TABLE(20b,120b) - EX_TABLE(220b,320b) - EX_TABLE(21b,121b) - EX_TABLE(221b,321b) - EX_TABLE(70b,170b) - EX_TABLE(270b,370b) - EX_TABLE(22b,122b) - EX_TABLE(222b,322b) - EX_TABLE(71b,171b) - EX_TABLE(271b,371b) - EX_TABLE(72b,172b) - EX_TABLE(272b,372b) - EX_TABLE(244b,344b) - EX_TABLE(245b,345b) - EX_TABLE(23b,123b) - EX_TABLE(73b,173b) - EX_TABLE(44b,144b) - EX_TABLE(74b,174b) - EX_TABLE(45b,145b) - EX_TABLE(75b,175b) - EX_TABLE(24b,124b) - EX_TABLE(25b,125b) - EX_TABLE(26b,126b) - EX_TABLE(27b,127b) - EX_TABLE(28b,128b) - EX_TABLE(29b,129b) - EX_TABLE(30b,130b) - EX_TABLE(31b,131b) - EX_TABLE(32b,132b) - EX_TABLE(76b,176b) - EX_TABLE(33b,133b) - EX_TABLE(77b,177b) - EX_TABLE(78b,178b) - EX_TABLE(79b,179b) - EX_TABLE(80b,180b) - EX_TABLE(34b,134b) - EX_TABLE(94b,194b) - EX_TABLE(95b,195b) - EX_TABLE(96b,196b) - EX_TABLE(35b,135b) - EX_TABLE(81b,181b) - EX_TABLE(36b,136b) - EX_TABLE(82b,182b) - EX_TABLE(37b,137b) - EX_TABLE(83b,183b) - EX_TABLE(38b,138b) - EX_TABLE(39b,139b) - EX_TABLE(84b,184b) - EX_TABLE(85b,185b) - EX_TABLE(40b,140b) - EX_TABLE(86b,186b) - EX_TABLE(41b,141b) - EX_TABLE(87b,187b) - EX_TABLE(42b,142b) - EX_TABLE(88b,188b) - EX_TABLE(43b,143b) - EX_TABLE(89b,189b) - /* * Routine to copy a whole page of data, optimized for POWER4. * On POWER4 it is more than 50% faster than the simple loop * above (following the .Ldst_aligned label). */ + .macro exc +100: EX_TABLE(100b, .Labort) + .endm .Lcopy_page_4K: std r31,-32(1) std r30,-40(1) @@ -453,86 +425,86 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) li r0,5 0: addi r5,r5,-24 mtctr r0 -20: ld r22,640(4) -21: ld r21,512(4) -22: ld r20,384(4) -23: ld r11,256(4) -24: ld r9,128(4) -25: ld r7,0(4) -26: ld r25,648(4) -27: ld r24,520(4) -28: ld r23,392(4) -29: ld r10,264(4) -30: ld r8,136(4) -31: ldu r6,8(4) +exc; ld r22,640(4) +exc; ld r21,512(4) +exc; ld r20,384(4) +exc; ld r11,256(4) +exc; ld r9,128(4) +exc; ld r7,0(4) +exc; ld r25,648(4) +exc; ld r24,520(4) +exc; ld r23,392(4) +exc; ld r10,264(4) +exc; ld r8,136(4) +exc; ldu r6,8(4) cmpwi r5,24 1: -32: std r22,648(3) -33: std r21,520(3) -34: std r20,392(3) -35: std r11,264(3) -36: std r9,136(3) -37: std r7,8(3) -38: ld r28,648(4) -39: ld r27,520(4) -40: ld r26,392(4) -41: ld r31,264(4) -42: ld r30,136(4) -43: ld r29,8(4) -44: std r25,656(3) -45: std r24,528(3) -46: std r23,400(3) -47: std r10,272(3) -48: std r8,144(3) -49: std r6,16(3) -50: ld r22,656(4) -51: ld r21,528(4) -52: ld r20,400(4) -53: ld r11,272(4) -54: ld r9,144(4) -55: ld r7,16(4) -56: std r28,664(3) -57: std r27,536(3) -58: std r26,408(3) -59: std r31,280(3) -60: std r30,152(3) -61: stdu r29,24(3) -62: ld r25,664(4) -63: ld r24,536(4) -64: ld r23,408(4) -65: ld r10,280(4) -66: ld r8,152(4) -67: ldu r6,24(4) +exc; std r22,648(3) +exc; std r21,520(3) +exc; std r20,392(3) +exc; std r11,264(3) +exc; std r9,136(3) +exc; std r7,8(3) +exc; ld r28,648(4) +exc; ld r27,520(4) +exc; ld r26,392(4) +exc; ld r31,264(4) +exc; ld r30,136(4) +exc; ld r29,8(4) +exc; std r25,656(3) +exc; std r24,528(3) +exc; std r23,400(3) +exc; std r10,272(3) +exc; std r8,144(3) +exc; std r6,16(3) +exc; ld r22,656(4) +exc; ld r21,528(4) +exc; ld r20,400(4) +exc; ld r11,272(4) +exc; ld r9,144(4) +exc; ld r7,16(4) +exc; std r28,664(3) +exc; std r27,536(3) +exc; std r26,408(3) +exc; std r31,280(3) +exc; std r30,152(3) +exc; stdu r29,24(3) +exc; ld r25,664(4) +exc; ld r24,536(4) +exc; ld r23,408(4) +exc; ld r10,280(4) +exc; ld r8,152(4) +exc; ldu r6,24(4) bdnz 1b -68: std r22,648(3) -69: std r21,520(3) -70: std r20,392(3) -71: std r11,264(3) -72: std r9,136(3) -73: std r7,8(3) -74: addi r4,r4,640 -75: addi r3,r3,648 +exc; std r22,648(3) +exc; std r21,520(3) +exc; std r20,392(3) +exc; std r11,264(3) +exc; std r9,136(3) +exc; std r7,8(3) + addi r4,r4,640 + addi r3,r3,648 bge 0b mtctr r5 -76: ld r7,0(4) -77: ld r8,8(4) -78: ldu r9,16(4) +exc; ld r7,0(4) +exc; ld r8,8(4) +exc; ldu r9,16(4) 3: -79: ld r10,8(4) -80: std r7,8(3) -81: ld r7,16(4) -82: std r8,16(3) -83: ld r8,24(4) -84: std r9,24(3) -85: ldu r9,32(4) -86: stdu r10,32(3) +exc; ld r10,8(4) +exc; std r7,8(3) +exc; ld r7,16(4) +exc; std r8,16(3) +exc; ld r8,24(4) +exc; std r9,24(3) +exc; ldu r9,32(4) +exc; stdu r10,32(3) bdnz 3b 4: -87: ld r10,8(4) -88: std r7,8(3) -89: std r8,16(3) -90: std r9,24(3) -91: std r10,32(3) +exc; ld r10,8(4) +exc; std r7,8(3) +exc; std r8,16(3) +exc; std r9,24(3) +exc; std r10,32(3) 9: ld r20,-120(1) ld r21,-112(1) ld r22,-104(1) @@ -552,7 +524,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) * on an exception, reset to the beginning and jump back into the * standard __copy_tofrom_user */ -100: ld r20,-120(1) +.Labort: + ld r20,-120(1) ld r21,-112(1) ld r22,-104(1) ld r23,-96(1) @@ -568,78 +541,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) ld r4,-16(r1) li r5,4096 b .Ldst_aligned - - EX_TABLE(20b,100b) - EX_TABLE(21b,100b) - EX_TABLE(22b,100b) - EX_TABLE(23b,100b) - EX_TABLE(24b,100b) - EX_TABLE(25b,100b) - EX_TABLE(26b,100b) - EX_TABLE(27b,100b) - EX_TABLE(28b,100b) - EX_TABLE(29b,100b) - EX_TABLE(30b,100b) - EX_TABLE(31b,100b) - EX_TABLE(32b,100b) - EX_TABLE(33b,100b) - EX_TABLE(34b,100b) - EX_TABLE(35b,100b) - EX_TABLE(36b,100b) - EX_TABLE(37b,100b) - EX_TABLE(38b,100b) - EX_TABLE(39b,100b) - EX_TABLE(40b,100b) - EX_TABLE(41b,100b) - EX_TABLE(42b,100b) - EX_TABLE(43b,100b) - EX_TABLE(44b,100b) - EX_TABLE(45b,100b) - EX_TABLE(46b,100b) - EX_TABLE(47b,100b) - EX_TABLE(48b,100b) - EX_TABLE(49b,100b) - EX_TABLE(50b,100b) - EX_TABLE(51b,100b) - EX_TABLE(52b,100b) - EX_TABLE(53b,100b) - EX_TABLE(54b,100b) - EX_TABLE(55b,100b) - EX_TABLE(56b,100b) - EX_TABLE(57b,100b) - EX_TABLE(58b,100b) - EX_TABLE(59b,100b) - EX_TABLE(60b,100b) - EX_TABLE(61b,100b) - EX_TABLE(62b,100b) - EX_TABLE(63b,100b) - EX_TABLE(64b,100b) - EX_TABLE(65b,100b) - EX_TABLE(66b,100b) - EX_TABLE(67b,100b) - EX_TABLE(68b,100b) - EX_TABLE(69b,100b) - EX_TABLE(70b,100b) - EX_TABLE(71b,100b) - EX_TABLE(72b,100b) - EX_TABLE(73b,100b) - EX_TABLE(74b,100b) - EX_TABLE(75b,100b) - EX_TABLE(76b,100b) - EX_TABLE(77b,100b) - EX_TABLE(78b,100b) - EX_TABLE(79b,100b) - EX_TABLE(80b,100b) - EX_TABLE(81b,100b) - EX_TABLE(82b,100b) - EX_TABLE(83b,100b) - EX_TABLE(84b,100b) - EX_TABLE(85b,100b) - EX_TABLE(86b,100b) - EX_TABLE(87b,100b) - EX_TABLE(88b,100b) - EX_TABLE(89b,100b) - EX_TABLE(90b,100b) - EX_TABLE(91b,100b) - EXPORT_SYMBOL(__copy_tofrom_user) -- cgit From 98c45f51f767bfdd71d773cceaceb403352e51ae Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 3 Aug 2018 20:13:04 +1000 Subject: selftests/powerpc/64: Test all paths through copy routines The hand-coded assembler 64-bit copy routines include feature sections that select one code path or another depending on which CPU we are executing on. The self-tests for these copy routines end up testing just one path. This adds a mechanism for selecting any desired code path at compile time, and makes 2 or 3 versions of each test, each using a different code path, so as to cover all the possible paths. Signed-off-by: Paul Mackerras [mpe: Add -mcpu=power4 to CFLAGS for older compilers] Signed-off-by: Michael Ellerman --- arch/powerpc/lib/copyuser_64.S | 7 +++++ arch/powerpc/lib/copyuser_power7.S | 21 ++++++------- arch/powerpc/lib/memcpy_64.S | 9 ++++-- arch/powerpc/lib/memcpy_power7.S | 22 +++++++------- .../testing/selftests/powerpc/copyloops/.gitignore | 14 ++++++--- tools/testing/selftests/powerpc/copyloops/Makefile | 35 ++++++++++++++++++---- .../selftests/powerpc/copyloops/asm/ppc_asm.h | 21 +++++++------ 7 files changed, 85 insertions(+), 44 deletions(-) diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index 8c5033c85e37..2197a35097c5 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -12,6 +12,11 @@ #include #include +#ifndef SELFTEST_CASE +/* 0 == most CPUs, 1 == POWER6, 2 == Cell */ +#define SELFTEST_CASE 0 +#endif + #ifdef __BIG_ENDIAN__ #define sLd sld /* Shift towards low-numbered address. */ #define sHd srd /* Shift towards high-numbered address. */ @@ -73,6 +78,7 @@ _GLOBAL(__copy_tofrom_user_base) * At the time of writing the only CPU that has this combination of bits * set is Power6. */ +test_feature = (SELFTEST_CASE == 1) BEGIN_FTR_SECTION nop FTR_SECTION_ELSE @@ -82,6 +88,7 @@ ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ .Ldst_aligned: addi r3,r3,-16 r3_offset = 16 +test_feature = (SELFTEST_CASE == 0) BEGIN_FTR_SECTION andi. r0,r4,7 bne .Lsrc_unaligned diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index 215e4760c09f..1a1fe180af62 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -19,6 +19,11 @@ */ #include +#ifndef SELFTEST_CASE +/* 0 == don't use VMX, 1 == use VMX */ +#define SELFTEST_CASE 0 +#endif + #ifdef __BIG_ENDIAN__ #define LVS(VRT,RA,RB) lvsl VRT,RA,RB #define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC @@ -80,7 +85,6 @@ _GLOBAL(__copy_tofrom_user_power7) -#ifdef CONFIG_ALTIVEC cmpldi r5,16 cmpldi cr1,r5,3328 @@ -89,15 +93,12 @@ _GLOBAL(__copy_tofrom_user_power7) std r5,-STACKFRAMESIZE+STK_REG(R29)(r1) blt .Lshort_copy - bge cr1,.Lvmx_copy -#else - cmpldi r5,16 - std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) - std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) - std r5,-STACKFRAMESIZE+STK_REG(R29)(r1) - - blt .Lshort_copy +#ifdef CONFIG_ALTIVEC +test_feature = SELFTEST_CASE +BEGIN_FTR_SECTION + bgt cr1,.Lvmx_copy +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif .Lnonvmx_copy: @@ -278,8 +279,8 @@ err1; stb r0,0(r3) addi r1,r1,STACKFRAMESIZE b .Lnonvmx_copy -#ifdef CONFIG_ALTIVEC .Lvmx_copy: +#ifdef CONFIG_ALTIVEC mflr r0 std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S index 94650d6eae9c..273ea67e60a1 100644 --- a/arch/powerpc/lib/memcpy_64.S +++ b/arch/powerpc/lib/memcpy_64.S @@ -12,6 +12,11 @@ #include #include +#ifndef SELFTEST_CASE +/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */ +#define SELFTEST_CASE 0 +#endif + .align 7 _GLOBAL_TOC(memcpy) BEGIN_FTR_SECTION @@ -22,10 +27,8 @@ BEGIN_FTR_SECTION #endif FTR_SECTION_ELSE #ifdef CONFIG_PPC_BOOK3S_64 -#ifndef SELFTEST b memcpy_power7 #endif -#endif ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) #ifdef __LITTLE_ENDIAN__ /* dumb little-endian memcpy that will get replaced at runtime */ @@ -49,6 +52,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) cleared. At the time of writing the only CPU that has this combination of bits set is Power6. */ +test_feature = (SELFTEST_CASE == 1) BEGIN_FTR_SECTION nop FTR_SECTION_ELSE @@ -57,6 +61,7 @@ ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ CPU_FTR_UNALIGNED_LD_STD) .Ldst_aligned: addi r3,r3,-16 +test_feature = (SELFTEST_CASE == 0) BEGIN_FTR_SECTION andi. r0,r4,7 bne .Lsrc_unaligned diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S index 070cdf6f584f..89bfefcf7fcc 100644 --- a/arch/powerpc/lib/memcpy_power7.S +++ b/arch/powerpc/lib/memcpy_power7.S @@ -19,7 +19,10 @@ */ #include -_GLOBAL(memcpy_power7) +#ifndef SELFTEST_CASE +/* 0 == don't use VMX, 1 == use VMX */ +#define SELFTEST_CASE 0 +#endif #ifdef __BIG_ENDIAN__ #define LVS(VRT,RA,RB) lvsl VRT,RA,RB @@ -29,20 +32,17 @@ _GLOBAL(memcpy_power7) #define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC #endif -#ifdef CONFIG_ALTIVEC +_GLOBAL(memcpy_power7) cmpldi r5,16 cmpldi cr1,r5,4096 - std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) - blt .Lshort_copy - bgt cr1,.Lvmx_copy -#else - cmpldi r5,16 - - std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) - blt .Lshort_copy +#ifdef CONFIG_ALTIVEC +test_feature = SELFTEST_CASE +BEGIN_FTR_SECTION + bgt cr1, .Lvmx_copy +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif .Lnonvmx_copy: @@ -223,8 +223,8 @@ _GLOBAL(memcpy_power7) addi r1,r1,STACKFRAMESIZE b .Lnonvmx_copy -#ifdef CONFIG_ALTIVEC .Lvmx_copy: +#ifdef CONFIG_ALTIVEC mflr r0 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) std r5,-STACKFRAMESIZE+STK_REG(R29)(r1) diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore index 25a192f62c4d..bc6c4ba153af 100644 --- a/tools/testing/selftests/powerpc/copyloops/.gitignore +++ b/tools/testing/selftests/powerpc/copyloops/.gitignore @@ -1,4 +1,10 @@ -copyuser_64 -copyuser_power7 -memcpy_64 -memcpy_power7 +copyuser_64_t0 +copyuser_64_t1 +copyuser_64_t2 +copyuser_power7_t0 +copyuser_power7_t1 +memcpy_64_t0 +memcpy_64_t1 +memcpy_64_t2 +memcpy_power7_t0 +memcpy_power7_t1 diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index eedce3366f64..498d8d875ab8 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -4,18 +4,41 @@ CFLAGS += -m64 CFLAGS += -I$(CURDIR) CFLAGS += -D SELFTEST CFLAGS += -maltivec +CFLAGS += -mcpu=power4 # Use our CFLAGS for the implicit .S rule & set the asm machine type ASFLAGS = $(CFLAGS) -Wa,-mpower4 -TEST_GEN_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7 +TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \ + copyuser_p7_t0 copyuser_p7_t1 \ + memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \ + memcpy_p7_t0 memcpy_p7_t1 + EXTRA_SOURCES := validate.c ../harness.c include ../../lib.mk -$(OUTPUT)/copyuser_64: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_base -$(OUTPUT)/copyuser_power7: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_power7 -$(OUTPUT)/memcpy_64: CPPFLAGS += -D COPY_LOOP=test_memcpy -$(OUTPUT)/memcpy_power7: CPPFLAGS += -D COPY_LOOP=test_memcpy_power7 +$(OUTPUT)/copyuser_64_t%: copyuser_64.S $(EXTRA_SOURCES) + $(CC) $(CPPFLAGS) $(CFLAGS) \ + -D COPY_LOOP=test___copy_tofrom_user_base \ + -D SELFTEST_CASE=$(subst copyuser_64_t,,$(notdir $@)) \ + -o $@ $^ + +$(OUTPUT)/copyuser_p7_t%: copyuser_power7.S $(EXTRA_SOURCES) + $(CC) $(CPPFLAGS) $(CFLAGS) \ + -D COPY_LOOP=test___copy_tofrom_user_power7 \ + -D SELFTEST_CASE=$(subst copyuser_p7_t,,$(notdir $@)) \ + -o $@ $^ + +# Strictly speaking, we only need the memcpy_64 test cases for big-endian +$(OUTPUT)/memcpy_64_t%: memcpy_64.S $(EXTRA_SOURCES) + $(CC) $(CPPFLAGS) $(CFLAGS) \ + -D COPY_LOOP=test_memcpy \ + -D SELFTEST_CASE=$(subst memcpy_64_t,,$(notdir $@)) \ + -o $@ $^ -$(TEST_GEN_PROGS): $(EXTRA_SOURCES) +$(OUTPUT)/memcpy_p7_t%: memcpy_power7.S $(EXTRA_SOURCES) + $(CC) $(CPPFLAGS) $(CFLAGS) \ + -D COPY_LOOP=test_memcpy_power7 \ + -D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \ + -o $@ $^ diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h index dfce161aebfc..91bc40392eb3 100644 --- a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h +++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h @@ -43,17 +43,16 @@ FUNC_START(enter_vmx_ops) FUNC_START(exit_vmx_ops) blr -FUNC_START(memcpy_power7) - blr - -FUNC_START(__copy_tofrom_user_power7) - blr - FUNC_START(__copy_tofrom_user_base) blr -#define BEGIN_FTR_SECTION -#define FTR_SECTION_ELSE -#define ALT_FTR_SECTION_END_IFCLR(x) -#define ALT_FTR_SECTION_END(x, y) -#define END_FTR_SECTION_IFCLR(x) +#define BEGIN_FTR_SECTION .if test_feature +#define FTR_SECTION_ELSE .else +#define ALT_FTR_SECTION_END_IFCLR(x) .endif +#define ALT_FTR_SECTION_END_IFSET(x) .endif +#define ALT_FTR_SECTION_END(x, y) .endif +#define END_FTR_SECTION_IFCLR(x) .endif +#define END_FTR_SECTION_IFSET(x) .endif + +/* Default to taking the first of any alternative feature sections */ +test_feature = 1 -- cgit From 2679f63fe5e4bb1459a0c20859a99c70c15d5ea1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 3 Aug 2018 20:13:05 +1000 Subject: selftests/powerpc/64: Test exception cases in copy_tofrom_user This adds a set of test cases to test the behaviour of copy_tofrom_user when exceptions are encountered accessing the source or destination. Currently, copy_tofrom_user does not always copy as many bytes as possible when an exception occurs on a store to the destination, and that is reflected in failures in these tests. Based on a test program from Anton Blanchard. [paulus@ozlabs.org - test all three paths, wrote commit description, made EX_TABLE create an exception table.] Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- .../testing/selftests/powerpc/copyloops/.gitignore | 3 + tools/testing/selftests/powerpc/copyloops/Makefile | 12 +- .../selftests/powerpc/copyloops/asm/ppc_asm.h | 27 ++--- .../powerpc/copyloops/copy_tofrom_user_reference.S | 24 ++++ .../selftests/powerpc/copyloops/exc_validate.c | 124 +++++++++++++++++++++ tools/testing/selftests/powerpc/copyloops/stubs.S | 19 ++++ 6 files changed, 188 insertions(+), 21 deletions(-) create mode 100644 tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S create mode 100644 tools/testing/selftests/powerpc/copyloops/exc_validate.c create mode 100644 tools/testing/selftests/powerpc/copyloops/stubs.S diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore index bc6c4ba153af..ce12cd0e2967 100644 --- a/tools/testing/selftests/powerpc/copyloops/.gitignore +++ b/tools/testing/selftests/powerpc/copyloops/.gitignore @@ -8,3 +8,6 @@ memcpy_64_t1 memcpy_64_t2 memcpy_power7_t0 memcpy_power7_t1 +copyuser_64_exc_t0 +copyuser_64_exc_t1 +copyuser_64_exc_t2 diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index 498d8d875ab8..1cf89a34d97c 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -12,9 +12,10 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4 TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \ copyuser_p7_t0 copyuser_p7_t1 \ memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \ - memcpy_p7_t0 memcpy_p7_t1 + memcpy_p7_t0 memcpy_p7_t1 \ + copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 -EXTRA_SOURCES := validate.c ../harness.c +EXTRA_SOURCES := validate.c ../harness.c stubs.S include ../../lib.mk @@ -42,3 +43,10 @@ $(OUTPUT)/memcpy_p7_t%: memcpy_power7.S $(EXTRA_SOURCES) -D COPY_LOOP=test_memcpy_power7 \ -D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \ -o $@ $^ + +$(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \ + copy_tofrom_user_reference.S stubs.S + $(CC) $(CPPFLAGS) $(CFLAGS) \ + -D COPY_LOOP=test___copy_tofrom_user_base \ + -D SELFTEST_CASE=$(subst copyuser_64_exc_t,,$(notdir $@)) \ + -o $@ $^ diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h index 91bc40392eb3..0605df807593 100644 --- a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h +++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __SELFTESTS_POWERPC_PPC_ASM_H +#define __SELFTESTS_POWERPC_PPC_ASM_H #include #define CONFIG_ALTIVEC @@ -26,25 +28,10 @@ #define PPC_MTOCRF(A, B) mtocrf A, B -#define EX_TABLE(x, y) - -FUNC_START(enter_vmx_usercopy) - li r3,1 - blr - -FUNC_START(exit_vmx_usercopy) - li r3,0 - blr - -FUNC_START(enter_vmx_ops) - li r3,1 - blr - -FUNC_START(exit_vmx_ops) - blr - -FUNC_START(__copy_tofrom_user_base) - blr +#define EX_TABLE(x, y) \ + .section __ex_table,"a"; \ + .8byte x, y; \ + .previous #define BEGIN_FTR_SECTION .if test_feature #define FTR_SECTION_ELSE .else @@ -56,3 +43,5 @@ FUNC_START(__copy_tofrom_user_base) /* Default to taking the first of any alternative feature sections */ test_feature = 1 + +#endif /* __SELFTESTS_POWERPC_PPC_ASM_H */ diff --git a/tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S b/tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S new file mode 100644 index 000000000000..3363b86407d6 --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S @@ -0,0 +1,24 @@ +#include + +_GLOBAL(copy_tofrom_user_reference) + cmpdi r5,0 + beq 4f + + mtctr r5 + +1: lbz r6,0(r4) +2: stb r6,0(r3) + addi r3,r3,1 + addi r4,r4,1 + bdnz 1b + +3: mfctr r3 + blr + +4: mr r3,r5 + blr + +.section __ex_table,"a" + .llong 1b,3b + .llong 2b,3b +.text diff --git a/tools/testing/selftests/powerpc/copyloops/exc_validate.c b/tools/testing/selftests/powerpc/copyloops/exc_validate.c new file mode 100644 index 000000000000..c896ea9a763c --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/exc_validate.c @@ -0,0 +1,124 @@ +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +extern char __start___ex_table[]; +extern char __stop___ex_table[]; + +#if defined(__powerpc64__) +#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP] +#elif defined(__powerpc__) +#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.uc_regs->gregs[PT_NIP] +#else +#error implement UCONTEXT_NIA +#endif + +static void segv_handler(int signr, siginfo_t *info, void *ptr) +{ + ucontext_t *uc = (ucontext_t *)ptr; + unsigned long addr = (unsigned long)info->si_addr; + unsigned long *ip = &UCONTEXT_NIA(uc); + unsigned long *ex_p = (unsigned long *)__start___ex_table; + + while (ex_p < (unsigned long *)__stop___ex_table) { + unsigned long insn, fixup; + + insn = *ex_p++; + fixup = *ex_p++; + + if (insn == *ip) { + *ip = fixup; + return; + } + } + + printf("No exception table match for NIA %lx ADDR %lx\n", *ip, addr); + abort(); +} + +static void setup_segv_handler(void) +{ + struct sigaction action; + + memset(&action, 0, sizeof(action)); + action.sa_sigaction = segv_handler; + action.sa_flags = SA_SIGINFO; + sigaction(SIGSEGV, &action, NULL); +} + +unsigned long COPY_LOOP(void *to, const void *from, unsigned long size); +unsigned long test_copy_tofrom_user_reference(void *to, const void *from, unsigned long size); + +static int total_passed; +static int total_failed; + +static void do_one_test(char *dstp, char *srcp, unsigned long len) +{ + unsigned long got, expected; + + got = COPY_LOOP(dstp, srcp, len); + expected = test_copy_tofrom_user_reference(dstp, srcp, len); + + if (got != expected) { + total_failed++; + printf("FAIL from=%p to=%p len=%ld returned %ld, expected %ld\n", + srcp, dstp, len, got, expected); + //abort(); + } else + total_passed++; +} + +//#define MAX_LEN 512 +#define MAX_LEN 16 + +int test_copy_exception(void) +{ + int page_size; + static char *p, *q; + unsigned long src, dst, len; + + page_size = getpagesize(); + p = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + + if (p == MAP_FAILED) { + perror("mmap"); + exit(1); + } + + memset(p, 0, page_size); + + setup_segv_handler(); + + if (mprotect(p + page_size, page_size, PROT_NONE)) { + perror("mprotect"); + exit(1); + } + + q = p + page_size - MAX_LEN; + + for (src = 0; src < MAX_LEN; src++) { + for (dst = 0; dst < MAX_LEN; dst++) { + for (len = 0; len < MAX_LEN+1; len++) { + // printf("from=%p to=%p len=%ld\n", q+dst, q+src, len); + do_one_test(q+dst, q+src, len); + } + } + } + + printf("Totals:\n"); + printf(" Pass: %d\n", total_passed); + printf(" Fail: %d\n", total_failed); + + return 0; +} + +int main(void) +{ + return test_harness(test_copy_exception, str(COPY_LOOP)); +} diff --git a/tools/testing/selftests/powerpc/copyloops/stubs.S b/tools/testing/selftests/powerpc/copyloops/stubs.S new file mode 100644 index 000000000000..ec8bcf2bf1c2 --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/stubs.S @@ -0,0 +1,19 @@ +#include + +FUNC_START(enter_vmx_usercopy) + li r3,1 + blr + +FUNC_START(exit_vmx_usercopy) + li r3,0 + blr + +FUNC_START(enter_vmx_ops) + li r3,1 + blr + +FUNC_START(exit_vmx_ops) + blr + +FUNC_START(__copy_tofrom_user_base) + blr -- cgit From f8db2007ff5838aff696bd4297eefcc77af2cf46 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 3 Aug 2018 20:13:06 +1000 Subject: powerpc/64: Copy as much as possible in __copy_tofrom_user In __copy_tofrom_user, if we encounter an exception on a store, we stop copying and return the number of bytes not copied. However, if the store is wider than one byte and is to an unaligned address, it is possible that the store operand overlaps a page boundary and the exception occurred on the latter part of the store operand, meaning that it would be possible to copy a few more bytes. Since copy_to_user is generally expected to copy as much as possible, it would be better to copy those extra few bytes. This adds code to do that. Since this edge case is not performance-critical, the code has been written to be compact rather than as fast as possible. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/lib/copyuser_64.S | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index 2197a35097c5..96c514bee66b 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -379,8 +379,8 @@ stex; stb r0,0(r3) blr /* - * exception handlers for stores: we just need to work - * out how many bytes weren't copied + * exception handlers for stores: we need to work out how many bytes + * weren't copied, and we may need to copy some more. * Note that the number of bytes of instructions for adjusting r3 needs * to equal the amount of the adjustment, due to the trick of using * .Lst_exc - r3_offset as the handler address. @@ -400,10 +400,27 @@ stex; stb r0,0(r3) /* adjust by 4 */ addi r3,r3,4 .Lst_exc: - ld r6,-24(r1) - ld r5,-8(r1) - add r6,r6,r5 - subf r3,r3,r6 /* #bytes not copied in r3 */ + ld r6,-24(r1) /* original destination pointer */ + ld r4,-16(r1) /* original source pointer */ + ld r5,-8(r1) /* original number of bytes */ + add r7,r6,r5 + /* + * If the destination pointer isn't 8-byte aligned, + * we may have got the exception as a result of a + * store that overlapped a page boundary, so we may be + * able to copy a few more bytes. + */ +17: andi. r0,r3,7 + beq 19f + subf r8,r6,r3 /* #bytes copied */ +100: EX_TABLE(100b,19f) + lbzx r0,r8,r4 +100: EX_TABLE(100b,19f) + stb r0,0(r3) + addi r3,r3,1 + cmpld r3,r7 + blt 17b +19: subf r3,r3,r7 /* #bytes not copied in r3 */ blr /* -- cgit From badf436f6fa5dc864d579d73fe75386b743470c0 Mon Sep 17 00:00:00 2001 From: "Rodrigo R. Galvao" Date: Mon, 6 Aug 2018 13:42:03 -0300 Subject: powerpc/Makefiles: Convert ifeq to ifdef where possible In Makefiles if we're testing a CONFIG_FOO symbol for equality with 'y' we can instead just use ifdef. The latter reads easily, so convert to it where possible. Signed-off-by: Rodrigo R. Galvao Reviewed-by: Mauro S. M. Rodrigues Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 26 +++++++++++++------------- arch/powerpc/boot/Makefile | 2 +- arch/powerpc/kernel/Makefile | 14 ++++++++------ arch/powerpc/mm/Makefile | 4 ++-- arch/powerpc/net/Makefile | 2 +- arch/powerpc/platforms/52xx/Makefile | 2 +- arch/powerpc/platforms/cell/Makefile | 2 +- arch/powerpc/platforms/pseries/Makefile | 2 +- arch/powerpc/sysdev/Makefile | 2 +- 9 files changed, 29 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 48e887f03a6c..ccb64750eb6a 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -36,7 +36,7 @@ else KBUILD_DEFCONFIG := ppc64_defconfig endif -ifeq ($(CONFIG_PPC64),y) +ifdef CONFIG_PPC64 new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi) ifeq ($(new_nm),y) @@ -74,7 +74,7 @@ KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o endif endif -ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) +ifdef CONFIG_CPU_LITTLE_ENDIAN KBUILD_CFLAGS += -mlittle-endian LDFLAGS += -EL LDEMULATION := lppc @@ -117,7 +117,7 @@ LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y) LDFLAGS_vmlinux += $(call ld-option,--orphan-handling=warn) -ifeq ($(CONFIG_PPC64),y) +ifdef CONFIG_PPC64 ifeq ($(call cc-option-yn,-mcmodel=medium),y) # -mcmodel=medium breaks modules because it uses 32bit offsets from # the TOC pointer to create pointers where possible. Pointers into the @@ -134,7 +134,7 @@ endif endif CFLAGS-$(CONFIG_PPC64) := $(call cc-option,-mtraceback=no) -ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) +ifdef CONFIG_CPU_LITTLE_ENDIAN CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc)) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2) else @@ -148,8 +148,8 @@ CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 $(MULTIPLEWORD) CFLAGS-$(CONFIG_PPC32) += $(call cc-option,-mno-readonly-in-sdata) -ifeq ($(CONFIG_PPC_BOOK3S_64),y) -ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) +ifdef CONFIG_PPC_BOOK3S_64 +ifdef CONFIG_CPU_LITTLE_ENDIAN CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power8 CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power9,-mtune=power8) else @@ -173,7 +173,7 @@ CFLAGS-$(CONFIG_POWER9_CPU) += $(call cc-option,-mcpu=power9) CFLAGS-$(CONFIG_PPC_8xx) += $(call cc-option,-mcpu=860) # Altivec option not allowed with e500mc64 in GCC. -ifeq ($(CONFIG_ALTIVEC),y) +ifdef CONFIG_ALTIVEC E5500_CPU := -mcpu=powerpc64 else E5500_CPU := $(call cc-option,-mcpu=e500mc64,-mcpu=powerpc64) @@ -181,8 +181,8 @@ endif CFLAGS-$(CONFIG_E5500_CPU) += $(E5500_CPU) CFLAGS-$(CONFIG_E6500_CPU) += $(call cc-option,-mcpu=e6500,$(E5500_CPU)) -ifeq ($(CONFIG_PPC32),y) -ifeq ($(CONFIG_PPC_E500MC),y) +ifdef CONFIG_PPC32 +ifdef CONFIG_PPC_E500MC CFLAGS-y += $(call cc-option,-mcpu=e500mc,-mcpu=powerpc) else CFLAGS-$(CONFIG_E500) += $(call cc-option,-mcpu=8540 -msoft-float,-mcpu=powerpc) @@ -204,7 +204,7 @@ else CHECKFLAGS += -D__LITTLE_ENDIAN__ endif -ifeq ($(CONFIG_476FPE_ERR46),y) +ifdef CONFIG_476FPE_ERR46 KBUILD_LDFLAGS_MODULE += --ppc476-workaround \ -T $(srctree)/arch/powerpc/platforms/44x/ppc476_modules.lds endif @@ -231,12 +231,12 @@ KBUILD_CFLAGS += $(call cc-option,-fno-dwarf2-cfi-asm) # often slow when they are implemented at all KBUILD_CFLAGS += $(call cc-option,-mno-string) -ifeq ($(CONFIG_6xx),y) +ifdef CONFIG_6xx KBUILD_CFLAGS += -mcpu=powerpc endif # Work around a gcc code-gen bug with -fno-omit-frame-pointer. -ifeq ($(CONFIG_FUNCTION_TRACER),y) +ifdef CONFIG_FUNCTION_TRACER KBUILD_CFLAGS += -mno-sched-epilog endif @@ -395,7 +395,7 @@ install: $(Q)$(MAKE) $(build)=$(boot) install vdso_install: -ifeq ($(CONFIG_PPC64),y) +ifdef CONFIG_PPC64 $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@ endif $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@ diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index deea20c334df..0fb96c26136f 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -354,7 +354,7 @@ image-$(CONFIG_AMIGAONE) += cuImage.amigaone # For 32-bit powermacs, build the COFF and miboot images # as well as the ELF images. -ifeq ($(CONFIG_PPC32),y) +ifdef CONFIG_PPC32 image-$(CONFIG_PPC_PMAC) += zImage.coff zImage.miboot endif diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index dbe2cf04b406..3b66f2c19c84 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -7,10 +7,10 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror -ifeq ($(CONFIG_PPC64),y) +ifdef CONFIG_PPC64 CFLAGS_prom_init.o += $(NO_MINIMAL_TOC) endif -ifeq ($(CONFIG_PPC32),y) +ifdef CONFIG_PPC32 CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC endif @@ -63,13 +63,13 @@ obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_FA_DUMP) += fadump.o -ifeq ($(CONFIG_PPC32),y) +ifdef CONFIG_PPC32 obj-$(CONFIG_E500) += idle_e500.o endif obj-$(CONFIG_6xx) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o obj-$(CONFIG_TAU) += tau_6xx.o obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o -ifeq ($(CONFIG_FSL_BOOKE),y) +ifdef CONFIG_FSL_BOOKE obj-$(CONFIG_HIBERNATION) += swsusp_booke.o else obj-$(CONFIG_HIBERNATION) += swsusp_$(BITS).o @@ -110,9 +110,11 @@ obj-$(CONFIG_PCI_MSI) += msi.o obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o crash.o \ machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file_$(BITS).o kexec_elf_$(BITS).o -ifeq ($(CONFIG_HAVE_IMA_KEXEC)$(CONFIG_IMA),yy) +ifdef CONFIG_HAVE_IMA_KEXEC +ifdef CONFIG_IMA obj-y += ima_kexec.o endif +endif obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o @@ -165,7 +167,7 @@ PHONY += systbl_chk systbl_chk: $(src)/systbl_chk.sh $(obj)/systbl_chk.i $(call cmd,systbl_chk) -ifeq ($(CONFIG_PPC_OF_BOOT_TRAMPOLINE),y) +ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE $(obj)/built-in.a: prom_init_check quiet_cmd_prom_init_check = CALL $< diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index f06f3577d8d1..cdf6a9960046 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -19,7 +19,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o -ifeq ($(CONFIG_PPC_BOOK3S_64),y) +ifdef CONFIG_PPC_BOOK3S_64 obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o endif @@ -31,7 +31,7 @@ obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o obj-$(CONFIG_PPC_SPLPAR) += vphn.o obj-$(CONFIG_PPC_MM_SLICES) += slice.o obj-y += hugetlbpage.o -ifeq ($(CONFIG_HUGETLB_PAGE),y) +ifdef CONFIG_HUGETLB_PAGE obj-$(CONFIG_PPC_BOOK3S_64) += hugetlbpage-hash64.o obj-$(CONFIG_PPC_RADIX_MMU) += hugetlbpage-radix.o obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile index 809f019d3cba..c2dec3a68d4c 100644 --- a/arch/powerpc/net/Makefile +++ b/arch/powerpc/net/Makefile @@ -2,7 +2,7 @@ # # Arch-specific network modules # -ifeq ($(CONFIG_PPC64),y) +ifdef CONFIG_PPC64 obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o else obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o diff --git a/arch/powerpc/platforms/52xx/Makefile b/arch/powerpc/platforms/52xx/Makefile index ff2f86fe5429..f40d48eab779 100644 --- a/arch/powerpc/platforms/52xx/Makefile +++ b/arch/powerpc/platforms/52xx/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_PPC_LITE5200) += lite5200.o obj-$(CONFIG_PPC_MEDIA5200) += media5200.o obj-$(CONFIG_PM) += mpc52xx_sleep.o mpc52xx_pm.o -ifeq ($(CONFIG_PPC_LITE5200),y) +ifdef CONFIG_PPC_LITE5200 obj-$(CONFIG_PM) += lite5200_sleep.o lite5200_pm.o endif diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile index d5f808e8a5f3..10064a33ca96 100644 --- a/arch/powerpc/platforms/cell/Makefile +++ b/arch/powerpc/platforms/cell/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_CBE_CPUFREQ_SPU_GOVERNOR) += cpufreq_spudemand.o obj-$(CONFIG_PPC_IBM_CELL_POWERBUTTON) += cbe_powerbutton.o -ifeq ($(CONFIG_SMP),y) +ifdef CONFIG_SMP obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o endif diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 13eede60c24d..7e89d5c47068 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -25,6 +25,6 @@ obj-$(CONFIG_LPARCFG) += lparcfg.o obj-$(CONFIG_IBMVIO) += vio.o obj-$(CONFIG_IBMEBUS) += ibmebus.o -ifeq ($(CONFIG_PPC_PSERIES),y) +ifdef CONFIG_PPC_PSERIES obj-$(CONFIG_SUSPEND) += suspend.o endif diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index ea2f595b5133..f730539074c4 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -48,7 +48,7 @@ obj-$(CONFIG_UCODE_PATCH) += micropatch.o obj-$(CONFIG_PPC_MPC512x) += mpc5xxx_clocks.o obj-$(CONFIG_PPC_MPC52xx) += mpc5xxx_clocks.o -ifeq ($(CONFIG_SUSPEND),y) +ifdef CONFIG_SUSPEND obj-$(CONFIG_6xx) += 6xx-suspend.o endif -- cgit From cc62d20ce4ca4fe73a09d571144b29d43478cb9c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 7 Jun 2018 10:10:18 +0000 Subject: powerpc: Make CPU selection logic generic in Makefile At the time being, when adding a new CPU for selection, both Kconfig.cputype and Makefile have to be modified. This patch moves into Kconfig.cputype the name of the CPU to me passed to the -mcpu= argument. Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin [mpe: Rename the option to TARGET_CPU to echo the gcc documentation] Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 8 +------- arch/powerpc/platforms/Kconfig.cputype | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index ccb64750eb6a..828efa7c2a35 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -164,13 +164,7 @@ ifdef CONFIG_MPROFILE_KERNEL CC_FLAGS_FTRACE := -pg -mprofile-kernel endif -CFLAGS-$(CONFIG_CELL_CPU) += $(call cc-option,-mcpu=cell) -CFLAGS-$(CONFIG_POWER5_CPU) += $(call cc-option,-mcpu=power5) -CFLAGS-$(CONFIG_POWER6_CPU) += $(call cc-option,-mcpu=power6) -CFLAGS-$(CONFIG_POWER7_CPU) += $(call cc-option,-mcpu=power7) -CFLAGS-$(CONFIG_POWER8_CPU) += $(call cc-option,-mcpu=power8) -CFLAGS-$(CONFIG_POWER9_CPU) += $(call cc-option,-mcpu=power9) -CFLAGS-$(CONFIG_PPC_8xx) += $(call cc-option,-mcpu=860) +CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU)) # Altivec option not allowed with e500mc64 in GCC. ifdef CONFIG_ALTIVEC diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index e93d6186de6e..8e2bcd36ae1c 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -139,6 +139,21 @@ config E6500_CPU endchoice +config TARGET_CPU_BOOL + bool + default !GENERIC_CPU + +config TARGET_CPU + string + depends on TARGET_CPU_BOOL + default "cell" if CELL_CPU + default "power5" if POWER5_CPU + default "power6" if POWER6_CPU + default "power7" if POWER7_CPU + default "power8" if POWER8_CPU + default "power9" if POWER9_CPU + default "860" if PPC_8xx + config PPC_BOOK3S def_bool y depends on PPC_BOOK3S_32 || PPC_BOOK3S_64 -- cgit From 0e00a8c9fd92409afa107aaa05f8465a76856080 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 7 Jun 2018 10:10:20 +0000 Subject: powerpc: Allow CPU selection also on PPC32 This patch extends to PPC32 the capability to select the exact CPU type. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/Kconfig.cputype | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 8e2bcd36ae1c..165a64f884d8 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -85,7 +85,6 @@ endchoice choice prompt "CPU selection" - depends on PPC64 default GENERIC_CPU help This will create a kernel which is optimised for a particular CPU. @@ -95,13 +94,17 @@ choice config GENERIC_CPU bool "Generic (POWER4 and above)" - depends on !CPU_LITTLE_ENDIAN + depends on PPC64 && !CPU_LITTLE_ENDIAN config GENERIC_CPU bool "Generic (POWER8 and above)" - depends on CPU_LITTLE_ENDIAN + depends on PPC64 && CPU_LITTLE_ENDIAN select ARCH_HAS_FAST_MULTIPLIER +config GENERIC_CPU + bool "Generic 32 bits powerpc" + depends on PPC32 && !PPC_8xx + config CELL_CPU bool "Cell Broadband Engine" depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN @@ -137,6 +140,10 @@ config E6500_CPU bool "Freescale e6500" depends on E500 +config 860_CPU + bool "8xx family" + depends on PPC_8xx + endchoice config TARGET_CPU_BOOL @@ -152,7 +159,7 @@ config TARGET_CPU default "power7" if POWER7_CPU default "power8" if POWER8_CPU default "power9" if POWER9_CPU - default "860" if PPC_8xx + default "860" if 860_CPU config PPC_BOOK3S def_bool y -- cgit From d6690b1a9b0dd95a6fbd166d9657e0cc80afbf99 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 7 Jun 2018 10:10:22 +0000 Subject: powerpc: Allow CPU selection of e300core variants GCC supports -mcpu=e300c2 and -mcpu=e300c3 This patch gives the opportunity to tune kernel to one of those two types. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/Kconfig.cputype | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 165a64f884d8..6c6a7c72cae4 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -144,6 +144,14 @@ config 860_CPU bool "8xx family" depends on PPC_8xx +config E300C2_CPU + bool "e300c2 (832x)" + depends on PPC_BOOK3S_32 + +config E300C3_CPU + bool "e300c3 (831x)" + depends on PPC_BOOK3S_32 + endchoice config TARGET_CPU_BOOL @@ -160,6 +168,8 @@ config TARGET_CPU default "power8" if POWER8_CPU default "power9" if POWER9_CPU default "860" if 860_CPU + default "e300c2" if E300C2_CPU + default "e300c3" if E300C3_CPU config PPC_BOOK3S def_bool y -- cgit From 77b5f703dcc859915f0f20d92bc538e4a99ef149 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 10 Apr 2018 17:16:10 +1000 Subject: powerpc/powernv/opal: Use standard interrupts property when available For (bad) historical reasons, OPAL used to create a non-standard pair of properties "opal-interrupts" and "opal-interrupts-names" for representing the list of interrupts it wants Linux to request on its behalf. Among other issues, the opal-interrupts doesn't have a way to carry the type of interrupts, and they were assumed to be all level sensitive. This is wrong on some recent systems where some of them are edge sensitive causing warnings in the XIVE code and possible misbehaviours if they need to be retriggered (typically the NPU2 TCE error interrupts). This makes Linux switch to using the standard "interrupts" and "interrupt-names" properties instead when they are available, using standard of_irq helpers, which can carry all the desired type information. Newer versions of OPAL will generate those properties in addition to the legacy ones. Signed-off-by: Benjamin Herrenschmidt [mpe: Fixup prefix logic to check strlen(r->name). Reinstate setting of start = 0 in opal_event_shutdown() to avoid double free warnings] Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-irqchip.c | 126 +++++++++++++++----------- 1 file changed, 75 insertions(+), 51 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index 605c7e5d52c2..bc97770a67db 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -38,8 +39,8 @@ struct opal_event_irqchip { }; static struct opal_event_irqchip opal_event_irqchip; static u64 last_outstanding_events; -static unsigned int opal_irq_count; -static unsigned int *opal_irqs; +static int opal_irq_count; +static struct resource *opal_irqs; void opal_handle_events(void) { @@ -165,24 +166,23 @@ void opal_event_shutdown(void) /* First free interrupts, which will also mask them */ for (i = 0; i < opal_irq_count; i++) { - if (!opal_irqs[i]) + if (!opal_irqs || !opal_irqs[i].start) continue; if (in_interrupt() || irqs_disabled()) - disable_irq_nosync(opal_irqs[i]); + disable_irq_nosync(opal_irqs[i].start); else - free_irq(opal_irqs[i], NULL); + free_irq(opal_irqs[i].start, NULL); - opal_irqs[i] = 0; + opal_irqs[i].start = 0; } } int __init opal_event_init(void) { struct device_node *dn, *opal_node; - const char **names; - u32 *irqs; - int i, rc; + bool old_style = false; + int i, rc = 0; opal_node = of_find_node_by_path("/ibm,opal"); if (!opal_node) { @@ -207,67 +207,91 @@ int __init opal_event_init(void) goto out; } - /* Get opal-interrupts property and names if present */ - rc = of_property_count_u32_elems(opal_node, "opal-interrupts"); - if (rc < 0) - goto out; + /* Look for new-style (standard) "interrupts" property */ + opal_irq_count = of_irq_count(opal_node); - opal_irq_count = rc; - pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count); + /* Absent ? Look for the old one */ + if (opal_irq_count < 1) { + /* Get opal-interrupts property and names if present */ + rc = of_property_count_u32_elems(opal_node, "opal-interrupts"); + if (rc > 0) + opal_irq_count = rc; + old_style = true; + } - irqs = kcalloc(opal_irq_count, sizeof(*irqs), GFP_KERNEL); - names = kcalloc(opal_irq_count, sizeof(*names), GFP_KERNEL); - opal_irqs = kcalloc(opal_irq_count, sizeof(*opal_irqs), GFP_KERNEL); + /* No interrupts ? Bail out */ + if (!opal_irq_count) + goto out; - if (WARN_ON(!irqs || !names || !opal_irqs)) - goto out_free; + pr_debug("OPAL: Found %d interrupts reserved for OPAL using %s scheme\n", + opal_irq_count, old_style ? "old" : "new"); - rc = of_property_read_u32_array(opal_node, "opal-interrupts", - irqs, opal_irq_count); - if (rc < 0) { - pr_err("Error %d reading opal-interrupts array\n", rc); - goto out_free; + /* Allocate an IRQ resources array */ + opal_irqs = kcalloc(opal_irq_count, sizeof(struct resource), GFP_KERNEL); + if (WARN_ON(!opal_irqs)) { + rc = -ENOMEM; + goto out; } - /* It's not an error for the names to be missing */ - of_property_read_string_array(opal_node, "opal-interrupts-names", - names, opal_irq_count); + /* Build the resources array */ + if (old_style) { + /* Old style "opal-interrupts" property */ + for (i = 0; i < opal_irq_count; i++) { + struct resource *r = &opal_irqs[i]; + const char *name = NULL; + u32 hw_irq; + int virq; + + rc = of_property_read_u32_index(opal_node, "opal-interrupts", + i, &hw_irq); + if (WARN_ON(rc < 0)) { + opal_irq_count = i; + break; + } + of_property_read_string_index(opal_node, "opal-interrupts-names", + i, &name); + virq = irq_create_mapping(NULL, hw_irq); + if (!virq) { + pr_warn("Failed to map OPAL irq 0x%x\n", hw_irq); + continue; + } + r->start = r->end = virq; + r->flags = IORESOURCE_IRQ | IRQ_TYPE_LEVEL_LOW; + r->name = name; + } + } else { + /* new style standard "interrupts" property */ + rc = of_irq_to_resource_table(opal_node, opal_irqs, opal_irq_count); + if (WARN_ON(rc < 0)) { + opal_irq_count = 0; + kfree(opal_irqs); + goto out; + } + if (WARN_ON(rc < opal_irq_count)) + opal_irq_count = rc; + } /* Install interrupt handlers */ for (i = 0; i < opal_irq_count; i++) { - unsigned int virq; - char *name; - - /* Get hardware and virtual IRQ */ - virq = irq_create_mapping(NULL, irqs[i]); - if (!virq) { - pr_warn("Failed to map irq 0x%x\n", irqs[i]); - continue; - } + struct resource *r = &opal_irqs[i]; + const char *name; - if (names[i] && strlen(names[i])) - name = kasprintf(GFP_KERNEL, "opal-%s", names[i]); + /* Prefix name */ + if (r->name && strlen(r->name)) + name = kasprintf(GFP_KERNEL, "opal-%s", r->name); else name = kasprintf(GFP_KERNEL, "opal"); /* Install interrupt handler */ - rc = request_irq(virq, opal_interrupt, IRQF_TRIGGER_LOW, + rc = request_irq(r->start, opal_interrupt, r->flags & IRQD_TRIGGER_MASK, name, NULL); if (rc) { - irq_dispose_mapping(virq); - pr_warn("Error %d requesting irq %d (0x%x)\n", - rc, virq, irqs[i]); + pr_warn("Error %d requesting OPAL irq %d\n", rc, (int)r->start); continue; } - - /* Cache IRQ */ - opal_irqs[i] = virq; } - -out_free: - kfree(irqs); - kfree(names); -out: + rc = 0; + out: of_node_put(opal_node); return rc; } -- cgit From 7c27a26e1ed5a7dd709aa19685d2c98f64e1cf0c Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 7 Aug 2018 11:15:39 -0300 Subject: selftests/powerpc: Kill child processes on SIGINT There are some powerpc selftests, as tm/tm-unavailable, that run for a long period (>120 seconds), and if it is interrupted, as pressing CRTL-C (SIGINT), the foreground process (harness) dies but the child process and threads continue to execute (with PPID = 1 now) in background. In this case, you'd think the whole test exited, but there are remaining threads and processes being executed in background. Sometimes these zombies processes are doing annoying things, as consuming the whole CPU or dumping things to STDOUT. This patch fixes this problem by attaching an empty signal handler to SIGINT in the harness process. This handler will interrupt (EINTR) the parent process waitpid() call, letting the code to follow through the normal flow, which will kill all the processes in the child process group. This patch also fixes a typo. Signed-off-by: Breno Leitao Signed-off-by: Gustavo Romero Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/harness.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c index 66d31de60b9a..9d7166dfad1e 100644 --- a/tools/testing/selftests/powerpc/harness.c +++ b/tools/testing/selftests/powerpc/harness.c @@ -85,13 +85,13 @@ wait: return status; } -static void alarm_handler(int signum) +static void sig_handler(int signum) { - /* Jut wake us up from waitpid */ + /* Just wake us up from waitpid */ } -static struct sigaction alarm_action = { - .sa_handler = alarm_handler, +static struct sigaction sig_action = { + .sa_handler = sig_handler, }; void test_harness_set_timeout(uint64_t time) @@ -106,8 +106,14 @@ int test_harness(int (test_function)(void), char *name) test_start(name); test_set_git_version(GIT_VERSION); - if (sigaction(SIGALRM, &alarm_action, NULL)) { - perror("sigaction"); + if (sigaction(SIGINT, &sig_action, NULL)) { + perror("sigaction (sigint)"); + test_error(name); + return 1; + } + + if (sigaction(SIGALRM, &sig_action, NULL)) { + perror("sigaction (sigalrm)"); test_error(name); return 1; } -- cgit From 2a39926c6aa9dda4fd26741f0f834a555f403d32 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 6 Aug 2018 15:09:11 +0000 Subject: powerpc/cpm1: fix compilation error with CONFIG_PPC_EARLY_DEBUG_CPM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e8cb7a55eb8dc ("powerpc: remove superflous inclusions of asm/fixmap.h") removed inclusion of asm/fixmap.h from files not including objects from that file. However, asm/mmu-8xx.h includes call to __fix_to_virt(). The proper way would be to include asm/fixmap.h in asm/mmu-8xx.h but it creates an inclusion loop. So we have to leave asm/fixmap.h in sysdep/cpm_common.c for CONFIG_PPC_EARLY_DEBUG_CPM CC arch/powerpc/sysdev/cpm_common.o In file included from ./arch/powerpc/include/asm/mmu.h:340:0, from ./arch/powerpc/include/asm/reg_8xx.h:8, from ./arch/powerpc/include/asm/reg.h:29, from ./arch/powerpc/include/asm/processor.h:13, from ./arch/powerpc/include/asm/thread_info.h:28, from ./include/linux/thread_info.h:38, from ./arch/powerpc/include/asm/ptrace.h:159, from ./arch/powerpc/include/asm/hw_irq.h:12, from ./arch/powerpc/include/asm/irqflags.h:12, from ./include/linux/irqflags.h:16, from ./include/asm-generic/cmpxchg-local.h:6, from ./arch/powerpc/include/asm/cmpxchg.h:537, from ./arch/powerpc/include/asm/atomic.h:11, from ./include/linux/atomic.h:5, from ./include/linux/mutex.h:18, from ./include/linux/kernfs.h:13, from ./include/linux/sysfs.h:16, from ./include/linux/kobject.h:20, from ./include/linux/device.h:16, from ./include/linux/node.h:18, from ./include/linux/cpu.h:17, from ./include/linux/of_device.h:5, from arch/powerpc/sysdev/cpm_common.c:21: arch/powerpc/sysdev/cpm_common.c: In function ‘udbg_init_cpm’: ./arch/powerpc/include/asm/mmu-8xx.h:218:25: error: implicit declaration of function ‘__fix_to_virt’ [-Werror=implicit-function-declaration] #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) ^ arch/powerpc/sysdev/cpm_common.c:75:7: note: in expansion of macro ‘VIRT_IMMR_BASE’ VIRT_IMMR_BASE); ^ ./arch/powerpc/include/asm/mmu-8xx.h:218:39: error: ‘FIX_IMMR_BASE’ undeclared (first use in this function) #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) ^ arch/powerpc/sysdev/cpm_common.c:75:7: note: in expansion of macro ‘VIRT_IMMR_BASE’ VIRT_IMMR_BASE); ^ ./arch/powerpc/include/asm/mmu-8xx.h:218:39: note: each undeclared identifier is reported only once for each function it appears in #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) ^ arch/powerpc/sysdev/cpm_common.c:75:7: note: in expansion of macro ‘VIRT_IMMR_BASE’ VIRT_IMMR_BASE); ^ cc1: all warnings being treated as errors make[1]: *** [arch/powerpc/sysdev/cpm_common.o] Error 1 Fixes: e8cb7a55eb8dc ("powerpc: remove superflous inclusions of asm/fixmap.h") Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/fixmap.h | 1 + arch/powerpc/sysdev/cpm_common.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 40efdf1d2d6e..41cc15c14eee 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -16,6 +16,7 @@ #ifndef __ASSEMBLY__ #include +#include #ifdef CONFIG_HIGHMEM #include #include diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c index 010975c3422f..b74508175b67 100644 --- a/arch/powerpc/sysdev/cpm_common.c +++ b/arch/powerpc/sysdev/cpm_common.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include -- cgit From 24f36ce6166059783436bdaba3f3b8f23a6d2ddd Mon Sep 17 00:00:00 2001 From: Camelia Groza Date: Wed, 18 Jul 2018 14:46:49 +0300 Subject: powerpc/configs/dpaa: enable the Cortina PHY driver Cortina PHYs are present on T4240RDB and T2080RDB. Enable the driver by default. Signed-off-by: Camelia Groza Signed-off-by: Scott Wood --- arch/powerpc/configs/dpaa.config | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/configs/dpaa.config b/arch/powerpc/configs/dpaa.config index 2fe76f5e938a..4ffacafe4036 100644 --- a/arch/powerpc/configs/dpaa.config +++ b/arch/powerpc/configs/dpaa.config @@ -2,3 +2,4 @@ CONFIG_FSL_DPAA=y CONFIG_FSL_PAMU=y CONFIG_FSL_FMAN=y CONFIG_FSL_DPAA_ETH=y +CONFIG_CORTINA_PHY=y -- cgit From 39e560a9182e72ad8b0fd2d922f0c516c663cc59 Mon Sep 17 00:00:00 2001 From: Camelia Groza Date: Wed, 18 Jul 2018 14:46:50 +0300 Subject: powerpc/dts/fsl: t4240rdb: use the Cortina PHY driver compatible The Cortina PHY is not compatible with IEEE 802.3 clause 45. Signed-off-by: Camelia Groza [scottwood: made commit message about compatibility, not driver choice] Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/fsl/t4240rdb.dts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/boot/dts/fsl/t4240rdb.dts b/arch/powerpc/boot/dts/fsl/t4240rdb.dts index 15eb0a3f7290..a56a705d41f7 100644 --- a/arch/powerpc/boot/dts/fsl/t4240rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t4240rdb.dts @@ -267,22 +267,22 @@ mdio@fd000 { xfiphy1: ethernet-phy@10 { - compatible = "ethernet-phy-ieee802.3-c45"; + compatible = "ethernet-phy-id13e5.1002"; reg = <0x10>; }; xfiphy2: ethernet-phy@11 { - compatible = "ethernet-phy-ieee802.3-c45"; + compatible = "ethernet-phy-id13e5.1002"; reg = <0x11>; }; xfiphy3: ethernet-phy@13 { - compatible = "ethernet-phy-ieee802.3-c45"; + compatible = "ethernet-phy-id13e5.1002"; reg = <0x13>; }; xfiphy4: ethernet-phy@12 { - compatible = "ethernet-phy-ieee802.3-c45"; + compatible = "ethernet-phy-id13e5.1002"; reg = <0x12>; }; }; -- cgit From bd96461249bdded1f6ee50b25e8cd4ce3faaf581 Mon Sep 17 00:00:00 2001 From: Camelia Groza Date: Wed, 18 Jul 2018 14:46:51 +0300 Subject: powerpc/dts/fsl: t2080rdb: use the Cortina PHY driver compatible The Cortina PHY is not compatible with IEEE 802.3 clause 45. Signed-off-by: Camelia Groza [scottwood: made commit message about compatibility, not driver choice] Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/fsl/t2080rdb.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/boot/dts/fsl/t2080rdb.dts b/arch/powerpc/boot/dts/fsl/t2080rdb.dts index 836e4c965b22..55c0210a771d 100644 --- a/arch/powerpc/boot/dts/fsl/t2080rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t2080rdb.dts @@ -97,12 +97,12 @@ mdio@fd000 { xg_cs4315_phy1: ethernet-phy@c { - compatible = "ethernet-phy-ieee802.3-c45"; + compatible = "ethernet-phy-id13e5.1002"; reg = <0xc>; }; xg_cs4315_phy2: ethernet-phy@d { - compatible = "ethernet-phy-ieee802.3-c45"; + compatible = "ethernet-phy-id13e5.1002"; reg = <0xd>; }; -- cgit From d3da701d3308ce1fa457f32c6c9e2102aedea030 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Fri, 3 Aug 2018 16:06:00 +1000 Subject: powerpc/powernv: Allow memory that has been hot-removed to be hot-added This patch allows the memory removed by memtrace to be readded to the kernel. So now you don't have to reboot your system to add the memory back to the kernel or to have a different amount of memory removed. Signed-off-by: Rashmica Gupta Tested-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/memtrace.c | 92 ++++++++++++++++++++++++++++--- 1 file changed, 85 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index f73101119eba..51dc398ae3f7 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -177,8 +177,11 @@ static int memtrace_init_debugfs(void) snprintf(ent->name, 16, "%08x", ent->nid); dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir); - if (!dir) + if (!dir) { + pr_err("Failed to create debugfs directory for node %d\n", + ent->nid); return -1; + } ent->dir = dir; debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops); @@ -189,18 +192,93 @@ static int memtrace_init_debugfs(void) return ret; } +static int online_mem_block(struct memory_block *mem, void *arg) +{ + return device_online(&mem->dev); +} + +/* + * Iterate through the chunks of memory we have removed from the kernel + * and attempt to add them back to the kernel. + */ +static int memtrace_online(void) +{ + int i, ret = 0; + struct memtrace_entry *ent; + + for (i = memtrace_array_nr - 1; i >= 0; i--) { + ent = &memtrace_array[i]; + + /* We have onlined this chunk previously */ + if (ent->nid == -1) + continue; + + /* Remove from io mappings */ + if (ent->mem) { + iounmap(ent->mem); + ent->mem = 0; + } + + if (add_memory(ent->nid, ent->start, ent->size)) { + pr_err("Failed to add trace memory to node %d\n", + ent->nid); + ret += 1; + continue; + } + + /* + * If kernel isn't compiled with the auto online option + * we need to online the memory ourselves. + */ + if (!memhp_auto_online) { + walk_memory_range(PFN_DOWN(ent->start), + PFN_UP(ent->start + ent->size - 1), + NULL, online_mem_block); + } + + /* + * Memory was added successfully so clean up references to it + * so on reentry we can tell that this chunk was added. + */ + debugfs_remove_recursive(ent->dir); + pr_info("Added trace memory back to node %d\n", ent->nid); + ent->size = ent->start = ent->nid = -1; + } + if (ret) + return ret; + + /* If all chunks of memory were added successfully, reset globals */ + kfree(memtrace_array); + memtrace_array = NULL; + memtrace_size = 0; + memtrace_array_nr = 0; + return 0; +} + static int memtrace_enable_set(void *data, u64 val) { - if (memtrace_size) + u64 bytes; + + /* + * Don't attempt to do anything if size isn't aligned to a memory + * block or equal to zero. + */ + bytes = memory_block_size_bytes(); + if (val & (bytes - 1)) { + pr_err("Value must be aligned with 0x%llx\n", bytes); return -EINVAL; + } - if (!val) - return -EINVAL; + /* Re-add/online previously removed/offlined memory */ + if (memtrace_size) { + if (memtrace_online()) + return -EAGAIN; + } - /* Make sure size is aligned to a memory block */ - if (val & (memory_block_size_bytes() - 1)) - return -EINVAL; + if (!val) + return 0; + /* Offline and remove memory */ if (memtrace_init_regions_runtime(val)) return -EINVAL; -- cgit From 24576a70e7dfa703617a5e7ae8e6cbd66cfb6e5d Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Fri, 3 Aug 2018 16:06:01 +1000 Subject: Documentation: Update documentation on ppc-memtrace Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman --- Documentation/ABI/testing/ppc-memtrace | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Documentation/ABI/testing/ppc-memtrace b/Documentation/ABI/testing/ppc-memtrace index 2e8b93741270..9606aed33137 100644 --- a/Documentation/ABI/testing/ppc-memtrace +++ b/Documentation/ABI/testing/ppc-memtrace @@ -13,10 +13,11 @@ Contact: linuxppc-dev@lists.ozlabs.org Description: Write an integer containing the size in bytes of the memory you want removed from each NUMA node to this file - it must be aligned to the memblock size. This amount of RAM will be removed - from the kernel mappings and the following debugfs files will be - created. This can only be successfully done once per boot. Once - memory is successfully removed from each node, the following - files are created. + from each NUMA node in the kernel mappings and the following + debugfs files will be created. Once memory is successfully + removed from each node, the following files are created. To + re-add memory to the kernel, echo 0 into this file (it will be + automatically onlined). What: /sys/kernel/debug/powerpc/memtrace/ Date: Aug 2017 -- cgit From c42d3be0c06f0c1c416054022aa535c08a1f9b39 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 8 Aug 2018 14:57:24 +0300 Subject: powerpc: Fix size calculation using resource_size() The problem is the the calculation should be "end - start + 1" but the plus one is missing in this calculation. Fixes: 8626816e905e ("powerpc: add support for MPIC message register API") Signed-off-by: Dan Carpenter Reviewed-by: Tyrel Datwyler Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/mpic_msgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c index eb69a5186243..280e964e1aa8 100644 --- a/arch/powerpc/sysdev/mpic_msgr.c +++ b/arch/powerpc/sysdev/mpic_msgr.c @@ -196,7 +196,7 @@ static int mpic_msgr_probe(struct platform_device *dev) /* IO map the message register block. */ of_address_to_resource(np, 0, &rsrc); - msgr_block_addr = ioremap(rsrc.start, rsrc.end - rsrc.start); + msgr_block_addr = ioremap(rsrc.start, resource_size(&rsrc)); if (!msgr_block_addr) { dev_err(&dev->dev, "Failed to iomap MPIC message registers"); return -EFAULT; -- cgit From 6bd6d8672208e8dc0c18588d6eb4588a195fd788 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 6 Aug 2018 15:09:11 +0000 Subject: powerpc/cpm1: fix compilation error with CONFIG_PPC_EARLY_DEBUG_CPM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e8cb7a55eb8dc ("powerpc: remove superflous inclusions of asm/fixmap.h") removed inclusion of asm/fixmap.h from files not including objects from that file. However, asm/mmu-8xx.h includes call to __fix_to_virt(). The proper way would be to include asm/fixmap.h in asm/mmu-8xx.h but it creates an inclusion loop. So we have to leave asm/fixmap.h in sysdep/cpm_common.c for CONFIG_PPC_EARLY_DEBUG_CPM CC arch/powerpc/sysdev/cpm_common.o In file included from ./arch/powerpc/include/asm/mmu.h:340:0, from ./arch/powerpc/include/asm/reg_8xx.h:8, from ./arch/powerpc/include/asm/reg.h:29, from ./arch/powerpc/include/asm/processor.h:13, from ./arch/powerpc/include/asm/thread_info.h:28, from ./include/linux/thread_info.h:38, from ./arch/powerpc/include/asm/ptrace.h:159, from ./arch/powerpc/include/asm/hw_irq.h:12, from ./arch/powerpc/include/asm/irqflags.h:12, from ./include/linux/irqflags.h:16, from ./include/asm-generic/cmpxchg-local.h:6, from ./arch/powerpc/include/asm/cmpxchg.h:537, from ./arch/powerpc/include/asm/atomic.h:11, from ./include/linux/atomic.h:5, from ./include/linux/mutex.h:18, from ./include/linux/kernfs.h:13, from ./include/linux/sysfs.h:16, from ./include/linux/kobject.h:20, from ./include/linux/device.h:16, from ./include/linux/node.h:18, from ./include/linux/cpu.h:17, from ./include/linux/of_device.h:5, from arch/powerpc/sysdev/cpm_common.c:21: arch/powerpc/sysdev/cpm_common.c: In function ‘udbg_init_cpm’: ./arch/powerpc/include/asm/mmu-8xx.h:218:25: error: implicit declaration of function ‘__fix_to_virt’ [-Werror=implicit-function-declaration] #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) ^ arch/powerpc/sysdev/cpm_common.c:75:7: note: in expansion of macro ‘VIRT_IMMR_BASE’ VIRT_IMMR_BASE); ^ ./arch/powerpc/include/asm/mmu-8xx.h:218:39: error: ‘FIX_IMMR_BASE’ undeclared (first use in this function) #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) ^ arch/powerpc/sysdev/cpm_common.c:75:7: note: in expansion of macro ‘VIRT_IMMR_BASE’ VIRT_IMMR_BASE); ^ ./arch/powerpc/include/asm/mmu-8xx.h:218:39: note: each undeclared identifier is reported only once for each function it appears in #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) ^ arch/powerpc/sysdev/cpm_common.c:75:7: note: in expansion of macro ‘VIRT_IMMR_BASE’ VIRT_IMMR_BASE); ^ cc1: all warnings being treated as errors make[1]: *** [arch/powerpc/sysdev/cpm_common.o] Error 1 Fixes: e8cb7a55eb8dc ("powerpc: remove superflous inclusions of asm/fixmap.h") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/fixmap.h | 1 + arch/powerpc/sysdev/cpm_common.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 40efdf1d2d6e..41cc15c14eee 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -16,6 +16,7 @@ #ifndef __ASSEMBLY__ #include +#include #ifdef CONFIG_HIGHMEM #include #include diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c index 010975c3422f..b74508175b67 100644 --- a/arch/powerpc/sysdev/cpm_common.c +++ b/arch/powerpc/sysdev/cpm_common.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include -- cgit From 1bd6a1c4b80a28d975287630644e6b47d0f977a5 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Tue, 7 Aug 2018 02:12:45 +0530 Subject: powerpc/fadump: handle crash memory ranges array index overflow Crash memory ranges is an array of memory ranges of the crashing kernel to be exported as a dump via /proc/vmcore file. The size of the array is set based on INIT_MEMBLOCK_REGIONS, which works alright in most cases where memblock memory regions count is less than INIT_MEMBLOCK_REGIONS value. But this count can grow beyond INIT_MEMBLOCK_REGIONS value since commit 142b45a72e22 ("memblock: Add array resizing support"). On large memory systems with a few DLPAR operations, the memblock memory regions count could be larger than INIT_MEMBLOCK_REGIONS value. On such systems, registering fadump results in crash or other system failures like below: task: c00007f39a290010 ti: c00000000b738000 task.ti: c00000000b738000 NIP: c000000000047df4 LR: c0000000000f9e58 CTR: c00000000010f180 REGS: c00000000b73b570 TRAP: 0300 Tainted: G L X (4.4.140+) MSR: 8000000000009033 CR: 22004484 XER: 20000000 CFAR: c000000000008500 DAR: 000007a450000000 DSISR: 40000000 SOFTE: 0 ... NIP [c000000000047df4] smp_send_reschedule+0x24/0x80 LR [c0000000000f9e58] resched_curr+0x138/0x160 Call Trace: resched_curr+0x138/0x160 (unreliable) check_preempt_curr+0xc8/0xf0 ttwu_do_wakeup+0x38/0x150 try_to_wake_up+0x224/0x4d0 __wake_up_common+0x94/0x100 ep_poll_callback+0xac/0x1c0 __wake_up_common+0x94/0x100 __wake_up_sync_key+0x70/0xa0 sock_def_readable+0x58/0xa0 unix_stream_sendmsg+0x2dc/0x4c0 sock_sendmsg+0x68/0xa0 ___sys_sendmsg+0x2cc/0x2e0 __sys_sendmsg+0x5c/0xc0 SyS_socketcall+0x36c/0x3f0 system_call+0x3c/0x100 as array index overflow is not checked for while setting up crash memory ranges causing memory corruption. To resolve this issue, dynamically allocate memory for crash memory ranges and resize it incrementally, in units of pagesize, on hitting array size limit. Fixes: 2df173d9e85d ("fadump: Initialize elfcore header and add PT_LOAD program headers.") Cc: stable@vger.kernel.org # v3.4+ Signed-off-by: Hari Bathini Reviewed-by: Mahesh Salgaonkar [mpe: Just use PAGE_SIZE directly, fixup variable placement] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/fadump.h | 3 -- arch/powerpc/kernel/fadump.c | 91 +++++++++++++++++++++++++++++++++------ 2 files changed, 77 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index 5a23010af600..1e7a33592e29 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -195,9 +195,6 @@ struct fadump_crash_info_header { struct cpumask online_mask; }; -/* Crash memory ranges */ -#define INIT_CRASHMEM_RANGES (INIT_MEMBLOCK_REGIONS + 2) - struct fad_crash_memory_ranges { unsigned long long base; unsigned long long size; diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 07e8396d472b..958eb5cd2a9e 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -47,8 +47,10 @@ static struct fadump_mem_struct fdm; static const struct fadump_mem_struct *fdm_active; static DEFINE_MUTEX(fadump_mutex); -struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES]; +struct fad_crash_memory_ranges *crash_memory_ranges; +int crash_memory_ranges_size; int crash_mem_ranges; +int max_crash_mem_ranges; /* Scan the Firmware Assisted dump configuration details. */ int __init early_init_dt_scan_fw_dump(unsigned long node, @@ -868,38 +870,88 @@ static int __init process_fadump(const struct fadump_mem_struct *fdm_active) return 0; } -static inline void fadump_add_crash_memory(unsigned long long base, - unsigned long long end) +static void free_crash_memory_ranges(void) +{ + kfree(crash_memory_ranges); + crash_memory_ranges = NULL; + crash_memory_ranges_size = 0; + max_crash_mem_ranges = 0; +} + +/* + * Allocate or reallocate crash memory ranges array in incremental units + * of PAGE_SIZE. + */ +static int allocate_crash_memory_ranges(void) +{ + struct fad_crash_memory_ranges *new_array; + u64 new_size; + + new_size = crash_memory_ranges_size + PAGE_SIZE; + pr_debug("Allocating %llu bytes of memory for crash memory ranges\n", + new_size); + + new_array = krealloc(crash_memory_ranges, new_size, GFP_KERNEL); + if (new_array == NULL) { + pr_err("Insufficient memory for setting up crash memory ranges\n"); + free_crash_memory_ranges(); + return -ENOMEM; + } + + crash_memory_ranges = new_array; + crash_memory_ranges_size = new_size; + max_crash_mem_ranges = (new_size / + sizeof(struct fad_crash_memory_ranges)); + return 0; +} + +static inline int fadump_add_crash_memory(unsigned long long base, + unsigned long long end) { if (base == end) - return; + return 0; + + if (crash_mem_ranges == max_crash_mem_ranges) { + int ret; + + ret = allocate_crash_memory_ranges(); + if (ret) + return ret; + } pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n", crash_mem_ranges, base, end - 1, (end - base)); crash_memory_ranges[crash_mem_ranges].base = base; crash_memory_ranges[crash_mem_ranges].size = end - base; crash_mem_ranges++; + return 0; } -static void fadump_exclude_reserved_area(unsigned long long start, +static int fadump_exclude_reserved_area(unsigned long long start, unsigned long long end) { unsigned long long ra_start, ra_end; + int ret = 0; ra_start = fw_dump.reserve_dump_area_start; ra_end = ra_start + fw_dump.reserve_dump_area_size; if ((ra_start < end) && (ra_end > start)) { if ((start < ra_start) && (end > ra_end)) { - fadump_add_crash_memory(start, ra_start); - fadump_add_crash_memory(ra_end, end); + ret = fadump_add_crash_memory(start, ra_start); + if (ret) + return ret; + + ret = fadump_add_crash_memory(ra_end, end); } else if (start < ra_start) { - fadump_add_crash_memory(start, ra_start); + ret = fadump_add_crash_memory(start, ra_start); } else if (ra_end < end) { - fadump_add_crash_memory(ra_end, end); + ret = fadump_add_crash_memory(ra_end, end); } } else - fadump_add_crash_memory(start, end); + ret = fadump_add_crash_memory(start, end); + + return ret; } static int fadump_init_elfcore_header(char *bufp) @@ -939,10 +991,11 @@ static int fadump_init_elfcore_header(char *bufp) * Traverse through memblock structure and setup crash memory ranges. These * ranges will be used create PT_LOAD program headers in elfcore header. */ -static void fadump_setup_crash_memory_ranges(void) +static int fadump_setup_crash_memory_ranges(void) { struct memblock_region *reg; unsigned long long start, end; + int ret; pr_debug("Setup crash memory ranges.\n"); crash_mem_ranges = 0; @@ -953,7 +1006,9 @@ static void fadump_setup_crash_memory_ranges(void) * specified during fadump registration. We need to create a separate * program header for this chunk with the correct offset. */ - fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size); + ret = fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size); + if (ret) + return ret; for_each_memblock(memory, reg) { start = (unsigned long long)reg->base; @@ -973,8 +1028,12 @@ static void fadump_setup_crash_memory_ranges(void) } /* add this range excluding the reserved dump area. */ - fadump_exclude_reserved_area(start, end); + ret = fadump_exclude_reserved_area(start, end); + if (ret) + return ret; } + + return 0; } /* @@ -1097,6 +1156,7 @@ static int register_fadump(void) { unsigned long addr; void *vaddr; + int ret; /* * If no memory is reserved then we can not register for firmware- @@ -1105,7 +1165,9 @@ static int register_fadump(void) if (!fw_dump.reserve_dump_area_size) return -ENODEV; - fadump_setup_crash_memory_ranges(); + ret = fadump_setup_crash_memory_ranges(); + if (ret) + return ret; addr = be64_to_cpu(fdm.rmr_region.destination_address) + be64_to_cpu(fdm.rmr_region.source_len); /* Initialize fadump crash info header. */ @@ -1183,6 +1245,7 @@ void fadump_cleanup(void) } else if (fw_dump.dump_registered) { /* Un-register Firmware-assisted dump if it was registered. */ fadump_unregister_dump(&fdm); + free_crash_memory_ranges(); } } -- cgit From ced1bf52f47783135b985d2aacf53fa77fd72e2e Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Tue, 7 Aug 2018 02:12:54 +0530 Subject: powerpc/fadump: merge adjacent memory ranges to reduce PT_LOAD segements With dynamic memory allocation support for crash memory ranges array, there is no hard limit on the no. of crash memory ranges kernel could export, but program headers count could overflow in the /proc/vmcore ELF file while exporting each memory range as PT_LOAD segment. Reduce the likelihood of a such scenario, by folding adjacent crash memory ranges which minimizes the total number of PT_LOAD segments. Signed-off-by: Hari Bathini Reviewed-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/fadump.c | 45 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 958eb5cd2a9e..986ec476fd5d 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -908,22 +908,41 @@ static int allocate_crash_memory_ranges(void) static inline int fadump_add_crash_memory(unsigned long long base, unsigned long long end) { + u64 start, size; + bool is_adjacent = false; + if (base == end) return 0; - if (crash_mem_ranges == max_crash_mem_ranges) { - int ret; + /* + * Fold adjacent memory ranges to bring down the memory ranges/ + * PT_LOAD segments count. + */ + if (crash_mem_ranges) { + start = crash_memory_ranges[crash_mem_ranges - 1].base; + size = crash_memory_ranges[crash_mem_ranges - 1].size; - ret = allocate_crash_memory_ranges(); - if (ret) - return ret; + if ((start + size) == base) + is_adjacent = true; + } + if (!is_adjacent) { + /* resize the array on reaching the limit */ + if (crash_mem_ranges == max_crash_mem_ranges) { + int ret; + + ret = allocate_crash_memory_ranges(); + if (ret) + return ret; + } + + start = base; + crash_memory_ranges[crash_mem_ranges].base = start; + crash_mem_ranges++; } + crash_memory_ranges[crash_mem_ranges - 1].size = (end - start); pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n", - crash_mem_ranges, base, end - 1, (end - base)); - crash_memory_ranges[crash_mem_ranges].base = base; - crash_memory_ranges[crash_mem_ranges].size = end - base; - crash_mem_ranges++; + (crash_mem_ranges - 1), start, end - 1, (end - start)); return 0; } @@ -999,6 +1018,14 @@ static int fadump_setup_crash_memory_ranges(void) pr_debug("Setup crash memory ranges.\n"); crash_mem_ranges = 0; + + /* allocate memory for crash memory ranges for the first time */ + if (!max_crash_mem_ranges) { + ret = allocate_crash_memory_ranges(); + if (ret) + return ret; + } + /* * add the first memory chunk (RMA_START through boot_memory_size) as * a separate memory chunk. The reason is, at the time crash firmware -- cgit From cd813e1cd7122f2c261dce5b54d1e0c97f80e1a5 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Tue, 7 Aug 2018 19:46:46 +0530 Subject: powerpc/pseries: Fix endianness while restoring of r3 in MCE handler. During Machine Check interrupt on pseries platform, register r3 points RTAS extended event log passed by hypervisor. Since hypervisor uses r3 to pass pointer to rtas log, it stores the original r3 value at the start of the memory (first 8 bytes) pointed by r3. Since hypervisor stores this info and rtas log is in BE format, linux should make sure to restore r3 value in correct endian format. Without this patch when MCE handler, after recovery, returns to code that that caused the MCE may end up with Data SLB access interrupt for invalid address followed by kernel panic or hang. Severe Machine check interrupt [Recovered] NIP [d00000000ca301b8]: init_module+0x1b8/0x338 [bork_kernel] Initiator: CPU Error type: SLB [Multihit] Effective address: d00000000ca70000 cpu 0xa: Vector: 380 (Data SLB Access) at [c0000000fc7775b0] pc: c0000000009694c0: vsnprintf+0x80/0x480 lr: c0000000009698e0: vscnprintf+0x20/0x60 sp: c0000000fc777830 msr: 8000000002009033 dar: a803a30c000000d0 current = 0xc00000000bc9ef00 paca = 0xc00000001eca5c00 softe: 3 irq_happened: 0x01 pid = 8860, comm = insmod vscnprintf+0x20/0x60 vprintk_emit+0xb4/0x4b0 vprintk_func+0x5c/0xd0 printk+0x38/0x4c init_module+0x1c0/0x338 [bork_kernel] do_one_initcall+0x54/0x230 do_init_module+0x8c/0x248 load_module+0x12b8/0x15b0 sys_finit_module+0xa8/0x110 system_call+0x58/0x6c --- Exception: c00 (System Call) at 00007fff8bda0644 SP (7fffdfbfe980) is in userspace This patch fixes this issue. Fixes: a08a53ea4c97 ("powerpc/le: Enable RTAS events support") Cc: stable@vger.kernel.org # v3.15+ Reviewed-by: Nicholas Piggin Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/ras.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 14a46b07ab2f..851ce326874a 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -367,7 +367,7 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) } savep = __va(regs->gpr[3]); - regs->gpr[3] = savep[0]; /* restore original r3 */ + regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ h = (struct rtas_error_log *)&savep[1]; /* Use the per cpu buffer from paca to store rtas error log */ -- cgit From fa54a981ea7a852c145b05c95abba11e81fc1157 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 9 Aug 2018 08:14:41 +0000 Subject: powerpc/lib: Use patch_site to patch copy_32 functions once cache is enabled The symbol memcpy_nocache_branch defined in order to allow patching of memset function once cache is enabled leads to confusing reports by perf tool. Using the new patch_site functionality solves this issue. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/asm-prototypes.h | 1 + arch/powerpc/kernel/setup_32.c | 7 +++---- arch/powerpc/lib/copy_32.S | 9 ++++++--- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 70fdc5b9b9fb..1f4691ce4126 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -146,6 +146,7 @@ void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr); /* Patch sites */ extern s32 patch__call_flush_count_cache; extern s32 patch__flush_count_cache_return; +extern s32 patch__memset_nocache, patch__memcpy_nocache; extern long flush_count_cache; diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 0e3743343280..ba969278bf4d 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -97,11 +97,10 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) * We do the initial parsing of the flat device-tree and prepares * for the MMU to be fully initialized. */ -extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */ - notrace void __init machine_init(u64 dt_ptr) { - unsigned int *addr = &memset_nocache_branch; + unsigned int *addr = (unsigned int *)((unsigned long)&patch__memset_nocache + + patch__memset_nocache); unsigned long insn; /* Configure static keys first, now that we're relocated. */ @@ -110,7 +109,7 @@ notrace void __init machine_init(u64 dt_ptr) /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); - patch_instruction((unsigned int *)&memcpy, PPC_INST_NOP); + patch_instruction_site(&patch__memcpy_nocache, PPC_INST_NOP); insn = create_cond_branch(addr, branch_target(addr), 0x820000); patch_instruction(addr, insn); /* replace b by bne cr0 */ diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index da425bb6b369..ba66846fe973 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -13,6 +13,7 @@ #include #include #include +#include #define COPY_16_BYTES \ lwz r7,4(r4); \ @@ -107,8 +108,8 @@ _GLOBAL(memset) * Skip optimised bloc until cache is enabled. Will be replaced * by 'bne' during boot to use normal procedure if r4 is not zero */ -_GLOBAL(memset_nocache_branch) - b 2f +5: b 2f + patch_site 5b, patch__memset_nocache clrlwi r7,r6,32-LG_CACHELINE_BYTES add r8,r7,r5 @@ -168,7 +169,9 @@ _GLOBAL(memmove) /* fall through */ _GLOBAL(memcpy) - b generic_memcpy +1: b generic_memcpy + patch_site 1b, patch__memcpy_nocache + add r7,r3,r5 /* test if the src & dst overlap */ add r8,r4,r5 cmplw 0,r4,r7 -- cgit From 646dbe40fa2a54118975792fa9b98cefac7d232e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 8 Aug 2018 15:36:34 +0000 Subject: powerpc/mm: remove huge_pte_offset_and_shift() prototype huge_pte_offset_and_shift() has never existed Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hugetlb.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 3225eb6402cc..2d00cc530083 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -84,9 +84,6 @@ static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, return dir + idx; } -pte_t *huge_pte_offset_and_shift(struct mm_struct *mm, - unsigned long addr, unsigned *shift); - void flush_dcache_icache_hugepage(struct page *page); int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, -- cgit From 302c7b0c4ff5aed585419603f835dee30207e5d5 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 22 Nov 2016 17:20:09 +0800 Subject: powerpc/xmon: Add address lookup for percpu symbols Currently, in xmon, there is no obvious way to get an address for a percpu symbol for a particular cpu. Having such an ability would be good for debugging the system when percpu variables got involved. Therefore, this patch introduces a new xmon command "lp" to lookup the address for percpu symbols. Usage of "lp" is similar to "ls", except that we could add a cpu number to choose the variable of which cpu we want to lookup. If no cpu number is given, lookup for current cpu. Signed-off-by: Boqun Feng Signed-off-by: Michael Ellerman --- arch/powerpc/xmon/xmon.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index b7dd683b0470..7c4aa5b00302 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -56,6 +56,7 @@ #include #include #include +#include #ifdef CONFIG_PPC64 #include @@ -244,6 +245,7 @@ Commands:\n\ f flush cache\n\ la lookup symbol+offset of specified address\n\ ls lookup address of specified symbol\n\ + lp s [#] lookup address of percpu symbol s for current cpu, or cpu #\n\ m examine/change memory\n\ mm move a block of memory\n\ ms set a block of memory\n\ @@ -3352,7 +3354,8 @@ static void symbol_lookup(void) { int type = inchar(); - unsigned long addr; + unsigned long addr, cpu; + void __percpu *ptr = NULL; static char tmp[64]; switch (type) { @@ -3373,6 +3376,34 @@ symbol_lookup(void) printf("Symbol '%s' not found.\n", tmp); sync(); } + catch_memory_errors = 0; + termch = 0; + break; + case 'p': + getstring(tmp, 64); + if (setjmp(bus_error_jmp) == 0) { + catch_memory_errors = 1; + sync(); + ptr = (void __percpu *)kallsyms_lookup_name(tmp); + sync(); + } + + if (ptr && + ptr >= (void __percpu *)__per_cpu_start && + ptr < (void __percpu *)__per_cpu_end) + { + if (scanhex(&cpu) && cpu < num_possible_cpus()) { + addr = (unsigned long)per_cpu_ptr(ptr, cpu); + } else { + cpu = raw_smp_processor_id(); + addr = (unsigned long)this_cpu_ptr(ptr); + } + + printf("%s for cpu 0x%lx: %lx\n", tmp, cpu, addr); + } else { + printf("Percpu symbol '%s' not found.\n", tmp); + } + catch_memory_errors = 0; termch = 0; break; -- cgit From baedcdf5054c151a33e34392af7d8c3a244f16e6 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sun, 11 Mar 2018 09:03:42 +0100 Subject: powerpc/kexec: Use common error handling code in setup_new_fdt() Add a jump target so that a bit of exception handling can be better reused at the end of this function. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Reviewed-by: Thiago Jung Bauermann Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/machine_kexec_file_64.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c index 0bd23dc789a4..c77e95e9b384 100644 --- a/arch/powerpc/kernel/machine_kexec_file_64.c +++ b/arch/powerpc/kernel/machine_kexec_file_64.c @@ -269,18 +269,14 @@ int setup_new_fdt(const struct kimage *image, void *fdt, ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-start", initrd_load_addr); - if (ret < 0) { - pr_err("Error setting up the new device tree.\n"); - return -EINVAL; - } + if (ret < 0) + goto err; /* initrd-end is the first address after the initrd image. */ ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-end", initrd_load_addr + initrd_len); - if (ret < 0) { - pr_err("Error setting up the new device tree.\n"); - return -EINVAL; - } + if (ret < 0) + goto err; ret = fdt_add_mem_rsv(fdt, initrd_load_addr, initrd_len); if (ret) { @@ -292,10 +288,8 @@ int setup_new_fdt(const struct kimage *image, void *fdt, if (cmdline != NULL) { ret = fdt_setprop_string(fdt, chosen_node, "bootargs", cmdline); - if (ret < 0) { - pr_err("Error setting up the new device tree.\n"); - return -EINVAL; - } + if (ret < 0) + goto err; } else { ret = fdt_delprop(fdt, chosen_node, "bootargs"); if (ret && ret != -FDT_ERR_NOTFOUND) { @@ -311,10 +305,12 @@ int setup_new_fdt(const struct kimage *image, void *fdt, } ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0); - if (ret) { - pr_err("Error setting up the new device tree.\n"); - return -EINVAL; - } + if (ret) + goto err; return 0; + +err: + pr_err("Error setting up the new device tree.\n"); + return -EINVAL; } -- cgit From 2fff0f07b8441a481eb5fc0bfcd0e46d6dbd90db Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 22 Mar 2018 21:20:02 +0100 Subject: powerpc/powermac: Add missing include of header pmac.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The header `pmac.h` was not included, leading to the following warnings, treated as error with W=1: arch/powerpc/platforms/powermac/time.c:69:13: error: no previous prototype for ‘pmac_time_init’ [-Werror=missing-prototypes] arch/powerpc/platforms/powermac/time.c:207:15: error: no previous prototype for ‘pmac_get_boot_time’ [-Werror=missing-prototypes] arch/powerpc/platforms/powermac/time.c:222:6: error: no previous prototype for ‘pmac_get_rtc_time’ [-Werror=missing-prototypes] arch/powerpc/platforms/powermac/time.c:240:5: error: no previous prototype for ‘pmac_set_rtc_time’ [-Werror=missing-prototypes] arch/powerpc/platforms/powermac/time.c:259:12: error: no previous prototype for ‘via_calibrate_decr’ [-Werror=missing-prototypes] arch/powerpc/platforms/powermac/time.c:311:13: error: no previous prototype for ‘pmac_calibrate_decr’ [-Werror=missing-prototypes] The function `via_calibrate_decr` was made static to silence a warning. Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powermac/time.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c index 12e6e4d30602..f92c1918fb56 100644 --- a/arch/powerpc/platforms/powermac/time.c +++ b/arch/powerpc/platforms/powermac/time.c @@ -34,6 +34,8 @@ #include #include +#include "pmac.h" + #undef DEBUG #ifdef DEBUG @@ -249,7 +251,7 @@ int pmac_set_rtc_time(struct rtc_time *tm) * Calibrate the decrementer register using VIA timer 1. * This is used both on powermacs and CHRP machines. */ -int __init via_calibrate_decr(void) +static int __init via_calibrate_decr(void) { struct device_node *vias; volatile unsigned char __iomem *via; -- cgit From e4ecafb14fd9cd77d8f4320af1922e700db56d2b Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 22 Mar 2018 22:05:28 +0100 Subject: cxl: remove a dead branch In commit 14baf4d9c739 ("cxl: Add guest-specific code") the following code was added: if (afu->crs_len < 0) { dev_err(&afu->dev, "Unexpected configuration record size value\n"); return -EINVAL; } However the variable `crs_len` is of type u64 and cannot be compared < 0. Remove the dead code section. Fix the following warning treated as error with W=1: ../drivers/misc/cxl/guest.c:919:19: error: comparison of unsigned expression < 0 is always false [-Werror=type-limits] Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman --- drivers/misc/cxl/guest.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c index f5dc740fcd13..3bc0c15d4d85 100644 --- a/drivers/misc/cxl/guest.c +++ b/drivers/misc/cxl/guest.c @@ -913,11 +913,6 @@ static int afu_properties_look_ok(struct cxl_afu *afu) return -EINVAL; } - if (afu->crs_len < 0) { - dev_err(&afu->dev, "Unexpected configuration record size value\n"); - return -EINVAL; - } - return 0; } -- cgit From 8921305c1ec9d9ea23e5f8eac3063021e836530a Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Wed, 28 Mar 2018 21:30:28 +0200 Subject: powerpc/powermac: Remove variable x that's never read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the value of x is never intended to be read, remove it. Fix warning treated as error with W=1: arch/powerpc/platforms/powermac/udbg_scc.c:76:9: error: variable ‘x’ set but not used Suggested-by: Christophe Leroy Signed-off-by: Mathieu Malaterre Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powermac/udbg_scc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c index d83135a9830e..8901973ed683 100644 --- a/arch/powerpc/platforms/powermac/udbg_scc.c +++ b/arch/powerpc/platforms/powermac/udbg_scc.c @@ -73,7 +73,7 @@ void udbg_scc_init(int force_scc) struct device_node *stdout = NULL, *escc = NULL, *macio = NULL; struct device_node *ch, *ch_def = NULL, *ch_a = NULL; const char *path; - int i, x; + int i; escc = of_find_node_by_name(NULL, "escc"); if (escc == NULL) @@ -120,7 +120,7 @@ void udbg_scc_init(int force_scc) mb(); for (i = 20000; i != 0; --i) - x = in_8(sccc); + in_8(sccc); out_8(sccc, 0x09); /* reset A or B side */ out_8(sccc, 0xc0); -- cgit From 618a89d738fdd97f6fc58b7b666c7a84b2bb95e3 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Wed, 28 Mar 2018 21:39:35 +0200 Subject: powerpc/powermac: Make some functions static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These functions can all be static, make it so. Fix warnings treated as errors with W=1: arch/powerpc/platforms/powermac/pci.c:1022:6: error: no previous prototype for ‘pmac_pci_fixup_ohci’ arch/powerpc/platforms/powermac/pci.c:1057:6: error: no previous prototype for ‘pmac_pci_fixup_cardbus’ arch/powerpc/platforms/powermac/pci.c:1094:6: error: no previous prototype for ‘pmac_pci_fixup_pciata’ Remove has_address declaration and assignment since it's not used. Also add gcc attribute unused to fix a warning treated as error with W=1: arch/powerpc/platforms/powermac/pci.c:784:19: error: variable ‘has_address’ set but not used arch/powerpc/platforms/powermac/pci.c:907:22: error: variable ‘ht’ set but not used Suggested-by: Christophe Leroy Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powermac/pci.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index df762bb3c735..04527d13d5a4 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -781,12 +781,12 @@ static int __init pmac_add_bridge(struct device_node *dev) struct resource rsrc; char *disp_name; const int *bus_range; - int primary = 1, has_address = 0; + int primary = 1; DBG("Adding PCI host bridge %pOF\n", dev); /* Fetch host bridge registers address */ - has_address = (of_address_to_resource(dev, 0, &rsrc) == 0); + of_address_to_resource(dev, 0, &rsrc); /* Get bus range if any */ bus_range = of_get_property(dev, "bus-range", &len); @@ -904,7 +904,7 @@ static int pmac_pci_root_bridge_prepare(struct pci_host_bridge *bridge) void __init pmac_pci_init(void) { struct device_node *np, *root; - struct device_node *ht = NULL; + struct device_node *ht __maybe_unused = NULL; pci_set_flags(PCI_CAN_SKIP_ISA_ALIGN); @@ -1019,7 +1019,7 @@ static bool pmac_pci_enable_device_hook(struct pci_dev *dev) return true; } -void pmac_pci_fixup_ohci(struct pci_dev *dev) +static void pmac_pci_fixup_ohci(struct pci_dev *dev) { struct device_node *node = pci_device_to_OF_node(dev); @@ -1054,7 +1054,7 @@ void __init pmac_pcibios_after_init(void) } } -void pmac_pci_fixup_cardbus(struct pci_dev* dev) +static void pmac_pci_fixup_cardbus(struct pci_dev *dev) { if (!machine_is(powermac)) return; @@ -1091,7 +1091,7 @@ void pmac_pci_fixup_cardbus(struct pci_dev* dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_TI, PCI_ANY_ID, pmac_pci_fixup_cardbus); -void pmac_pci_fixup_pciata(struct pci_dev* dev) +static void pmac_pci_fixup_pciata(struct pci_dev *dev) { u8 progif = 0; -- cgit From eab00a208eb63d863b6cc0a03f68b4424ef5add8 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Wed, 4 Apr 2018 22:08:35 +0200 Subject: powerpc: Move `path` variable inside DEBUG_PROM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add gcc attribute unused for two variables. Fix warnings treated as errors with W=1: arch/powerpc/kernel/prom_init.c:1388:8: error: variable ‘path’ set but not used [-Werror=unused-but-set-variable] Suggested-by: Christophe Leroy Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom_init.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 0433bf24a10d..9b38a2e5dd35 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -628,7 +628,7 @@ static void __init early_cmdline_parse(void) const char *opt; char *p; - int l = 0; + int l __maybe_unused = 0; prom_cmd_line[0] = 0; p = prom_cmd_line; @@ -1421,7 +1421,10 @@ static void __init reserve_mem(u64 base, u64 size) static void __init prom_init_mem(void) { phandle node; - char *path, type[64]; +#ifdef DEBUG_PROM + char *path; +#endif + char type[64]; unsigned int plen; cell_t *p, *endp; __be32 val; @@ -1442,7 +1445,9 @@ static void __init prom_init_mem(void) prom_debug("root_size_cells: %x\n", rsc); prom_debug("scanning memory:\n"); +#ifdef DEBUG_PROM path = prom_scratch; +#endif for (node = 0; prom_next_node(&node); ) { type[0] = 0; -- cgit From f2c6d0d1092e3da2bd36c768777e883fa3908548 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Fri, 22 Jun 2018 21:26:53 +0200 Subject: powerpc/32: Include setup.h header file to fix warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure to include setup.h to provide the following prototypes: - irqstack_early_init - setup_power_save - initialize_cache_info Fix the following warnings (treated as error in W=1): arch/powerpc/kernel/setup_32.c:198:13: error: no previous prototype for ‘irqstack_early_init’ arch/powerpc/kernel/setup_32.c:238:13: error: no previous prototype for ‘setup_power_save’ arch/powerpc/kernel/setup_32.c:253:13: error: no previous prototype for ‘initialize_cache_info’ Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_32.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index ba969278bf4d..8c507be12c3c 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -43,6 +43,8 @@ #include #include +#include "setup.h" + #define DBG(fmt...) extern void bootx_init(unsigned long r4, unsigned long phys); -- cgit From fce278af81daf8599f9e94883cbe43068b6c4785 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Fri, 22 Jun 2018 21:27:47 +0200 Subject: powerpc/mm: remove warning about ‘type’ being set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ‘type’ is only used when CONFIG_DEBUG_HIGHMEM is set. So add a possibly unused tag to variable. Remove warning treated as error with W=1: arch/powerpc/mm/highmem.c:59:6: error: variable ‘type’ set but not used [-Werror=unused-but-set-variable] Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman --- arch/powerpc/mm/highmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c index 668e87d03f9e..82a0e37557a5 100644 --- a/arch/powerpc/mm/highmem.c +++ b/arch/powerpc/mm/highmem.c @@ -56,7 +56,7 @@ EXPORT_SYMBOL(kmap_atomic_prot); void __kunmap_atomic(void *kvaddr) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - int type; + int type __maybe_unused; if (vaddr < __fix_to_virt(FIX_KMAP_END)) { pagefault_enable(); -- cgit From 0b6aa1a20add96437c46db77c9bae2d7529dfbc1 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 9 Aug 2018 19:06:59 +0530 Subject: powerpc/mm/tlbflush: update the mmu_gather page size while iterating address range This patch makes sure we update the mmu_gather page size even if we are requesting for a fullmm flush. This avoids triggering VM_WARN_ON in code paths like __tlb_remove_page_size that explicitly check for removing range page size to be same as mmu gather page size. Fixes: 5a6099346c41 ("powerpc/64s/radix: tlb do not flush on page size when fullmm") Signed-off-by: Aneesh Kumar K.V Acked-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/tlb.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index 97ecef697e1b..f0e571b2dc7c 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -49,13 +49,11 @@ static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, unsigned int page_size) { - if (tlb->fullmm) - return; - if (!tlb->page_size) tlb->page_size = page_size; else if (tlb->page_size != page_size) { - tlb_flush_mmu(tlb); + if (!tlb->fullmm) + tlb_flush_mmu(tlb); /* * update the page size after flush for the new * mmu_gather. -- cgit From ae24ce5e12127eeef6bf946c3ee0e95f3797caaf Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 9 Aug 2018 19:07:20 +0530 Subject: powerpc/powernv/idle: Fix build error Fix the below build error using strlcpy instead of strncpy In function 'pnv_parse_cpuidle_dt', inlined from 'pnv_init_idle_states' at arch/powerpc/platforms/powernv/idle.c:840:7, inlined from '__machine_initcall_powernv_pnv_init_idle_states' at arch/powerpc/platforms/powernv/idle.c:870:1: arch/powerpc/platforms/powernv/idle.c:820:3: error: 'strncpy' specified bound 16 equals destination size [-Werror=stringop-truncation] strncpy(pnv_idle_states[i].name, temp_string[i], ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PNV_IDLE_NAME_LEN); Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index ecb002c5db83..35f699ebb662 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -817,7 +817,7 @@ static int pnv_parse_cpuidle_dt(void) goto out; } for (i = 0; i < nr_idle_states; i++) - strncpy(pnv_idle_states[i].name, temp_string[i], + strlcpy(pnv_idle_states[i].name, temp_string[i], PNV_IDLE_NAME_LEN); nr_pnv_idle_states = nr_idle_states; rc = 0; -- cgit From e7e81847478b37a3958a3163171bf6aee99f87d7 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 10 Aug 2018 16:42:48 +1000 Subject: powerpc/64s: move machine check SLB flushing to mm/slb.c The machine check code that flushes and restores bolted segments in real mode belongs in mm/slb.c. This will also be used by pseries machine check and idle code in future changes. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 3 +++ arch/powerpc/kernel/mce_power.c | 26 +++++++----------- arch/powerpc/mm/slb.c | 39 +++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 2f74bdc805e0..b3520b549cba 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -497,6 +497,9 @@ extern void hpte_init_native(void); extern void slb_initialize(void); extern void slb_flush_and_rebolt(void); +void slb_flush_all_realmode(void); +void __slb_restore_bolted_realmode(void); +void slb_restore_bolted_realmode(void); extern void slb_vmalloc_update(void); extern void slb_set_size(u16 size); diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index d6756af6ec78..3497c8329c1d 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -62,11 +62,8 @@ static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) #ifdef CONFIG_PPC_BOOK3S_64 static void flush_and_reload_slb(void) { - struct slb_shadow *slb; - unsigned long i, n; - /* Invalidate all SLBs */ - asm volatile("slbmte %0,%0; slbia" : : "r" (0)); + slb_flush_all_realmode(); #ifdef CONFIG_KVM_BOOK3S_HANDLER /* @@ -76,22 +73,17 @@ static void flush_and_reload_slb(void) if (get_paca()->kvm_hstate.in_guest) return; #endif - - /* For host kernel, reload the SLBs from shadow SLB buffer. */ - slb = get_slb_shadow(); - if (!slb) + if (early_radix_enabled()) return; - n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE); - - /* Load up the SLB entries from shadow SLB */ - for (i = 0; i < n; i++) { - unsigned long rb = be64_to_cpu(slb->save_area[i].esid); - unsigned long rs = be64_to_cpu(slb->save_area[i].vsid); + /* + * This probably shouldn't happen, but it may be possible it's + * called in early boot before SLB shadows are allocated. + */ + if (!get_slb_shadow()) + return; - rb = (rb & ~0xFFFul) | i; - asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb)); - } + slb_restore_bolted_realmode(); } #endif diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index cb796724a6fc..0b095fa54049 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -90,6 +90,45 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize, : "memory" ); } +/* + * Insert bolted entries into SLB (which may not be empty, so don't clear + * slb_cache_ptr). + */ +void __slb_restore_bolted_realmode(void) +{ + struct slb_shadow *p = get_slb_shadow(); + enum slb_index index; + + /* No isync needed because realmode. */ + for (index = 0; index < SLB_NUM_BOLTED; index++) { + asm volatile("slbmte %0,%1" : + : "r" (be64_to_cpu(p->save_area[index].vsid)), + "r" (be64_to_cpu(p->save_area[index].esid))); + } +} + +/* + * Insert the bolted entries into an empty SLB. + * This is not the same as rebolt because the bolted segments are not + * changed, just loaded from the shadow area. + */ +void slb_restore_bolted_realmode(void) +{ + __slb_restore_bolted_realmode(); + get_paca()->slb_cache_ptr = 0; +} + +/* + * This flushes all SLB entries including 0, so it must be realmode. + */ +void slb_flush_all_realmode(void) +{ + /* + * This flushes all SLB entries including 0, so it must be realmode. + */ + asm volatile("slbmte %0,%0; slbia" : : "r" (0)); +} + static void __slb_flush_and_rebolt(void) { /* If you change this make sure you change SLB_NUM_BOLTED -- cgit From f405b510c93eeb7390d0e2c6ef8d12af9a33021a Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 9 Aug 2018 19:06:42 +0530 Subject: powerpc/mm/hash: Remove unnecessary do { } while(0) loop Avoid coverity false warnings like: *** CID 187347: Control flow issues (UNREACHABLE) /arch/powerpc/mm/hash_native_64.c: 819 in native_flush_hash_range() 813 slot += hidx & _PTEIDX_GROUP_IX; 814 hptep = htab_address + slot; 815 want_v = hpte_encode_avpn(vpn, psize, ssize); 816 hpte_v = hpte_get_old_v(hptep); 817 818 if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) >>> CID 187347: Control flow issues (UNREACHABLE) Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash-64k.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index c81793d47af9..f82ee8a3b561 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -137,10 +137,9 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index); shift = mmu_psize_defs[psize].shift; \ for (index = 0; vpn < __end; index++, \ vpn += (1L << (shift - VPN_SHIFT))) { \ - if (!__split || __rpte_sub_valid(rpte, index)) \ - do { + if (!__split || __rpte_sub_valid(rpte, index)) -#define pte_iterate_hashed_end() } while(0); } } while(0) +#define pte_iterate_hashed_end() } } while(0) #define pte_pagesize_index(mm, addr, pte) \ (((pte) & H_PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K) -- cgit From f7a6947cd49b7ff4e03f1b4f7e7b223003d752ca Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 10 Jul 2018 16:20:56 +1000 Subject: powerpc/uaccess: Enable get_user(u64, *p) on 32-bit Currently if you build a 32-bit powerpc kernel and use get_user() to load a u64 value it will fail to build with eg: kernel/rseq.o: In function `rseq_get_rseq_cs': kernel/rseq.c:123: undefined reference to `__get_user_bad' This is hitting the check in __get_user_size() that makes sure the size we're copying doesn't exceed the size of the destination: #define __get_user_size(x, ptr, size, retval) do { retval = 0; __chk_user_ptr(ptr); if (size > sizeof(x)) (x) = __get_user_bad(); Which doesn't immediately make sense because the size of the destination is u64, but it's not really, because __get_user_check() etc. internally create an unsigned long and copy into that: #define __get_user_check(x, ptr, size) ({ long __gu_err = -EFAULT; unsigned long __gu_val = 0; The problem being that on 32-bit unsigned long is not big enough to hold a u64. We can fix this with a trick from hpa in the x86 code, we statically check the type of x and set the type of __gu_val to either unsigned long or unsigned long long. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/uaccess.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 643cfbd5bcb5..bac225bb7f64 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -249,10 +249,17 @@ do { \ } \ } while (0) +/* + * This is a type: either unsigned long, if the argument fits into + * that type, or otherwise unsigned long long. + */ +#define __long_type(x) \ + __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) + #define __get_user_nocheck(x, ptr, size) \ ({ \ long __gu_err; \ - unsigned long __gu_val; \ + __long_type(*(ptr)) __gu_val; \ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ @@ -266,7 +273,7 @@ do { \ #define __get_user_check(x, ptr, size) \ ({ \ long __gu_err = -EFAULT; \ - unsigned long __gu_val = 0; \ + __long_type(*(ptr)) __gu_val = 0; \ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ might_fault(); \ if (access_ok(VERIFY_READ, __gu_addr, (size))) { \ @@ -280,7 +287,7 @@ do { \ #define __get_user_nosleep(x, ptr, size) \ ({ \ long __gu_err; \ - unsigned long __gu_val; \ + __long_type(*(ptr)) __gu_val; \ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ barrier_nospec(); \ -- cgit From a2dc009afa9ae8b92305be7728676562a104cb40 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 13 Aug 2018 11:14:57 +0530 Subject: powerpc/mm/book3s/radix: Add mapping statistics Add statistics that show how memory is mapped within the kernel linear mapping. This is similar to commit 37cd944c8d8f ("s390/pgtable: add mapping statistics") We don't do this with Hash translation mode. Hash uses one size (mmu_linear_psize) to map the kernel linear mapping and we print the linear psize during boot as below. "Page orders: linear mapping = 24, virtual = 16, io = 16, vmemmap = 24" A sample output looks like: DirectMap4k: 0 kB DirectMap64k: 18432 kB DirectMap2M: 1030144 kB DirectMap1G: 11534336 kB Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgalloc.h | 7 +++++++ arch/powerpc/include/asm/book3s/64/radix.h | 3 +++ arch/powerpc/mm/pgtable-book3s64.c | 22 ++++++++++++++++++++++ arch/powerpc/mm/pgtable-radix.c | 19 ++++++++++++++----- 4 files changed, 46 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 76234a14b97d..391ed2c3b697 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -227,4 +227,11 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, #define check_pgt_cache() do { } while (0) +extern atomic_long_t direct_pages_count[MMU_PAGE_COUNT]; +static inline void update_page_count(int psize, long count) +{ + if (IS_ENABLED(CONFIG_PROC_FS)) + atomic_long_add(count, &direct_pages_count[psize]); +} + #endif /* _ASM_POWERPC_BOOK3S_64_PGALLOC_H */ diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 77440e837869..7d1a3d1543fc 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -32,6 +32,9 @@ #define RADIX_PUD_BAD_BITS 0x60000000000000e0UL #define RADIX_PGD_BAD_BITS 0x60000000000000e0UL +#define RADIX_PMD_SHIFT (PAGE_SHIFT + RADIX_PTE_INDEX_SIZE) +#define RADIX_PUD_SHIFT (RADIX_PMD_SHIFT + RADIX_PMD_INDEX_SIZE) +#define RADIX_PGD_SHIFT (RADIX_PUD_SHIFT + RADIX_PUD_INDEX_SIZE) /* * Size of EA range mapped by our pagetables. */ diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index 78d0b3d5ebad..01d7c0f7c4f0 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -455,3 +455,25 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index) return pgtable_free(table, index); } #endif + +#ifdef CONFIG_PROC_FS +atomic_long_t direct_pages_count[MMU_PAGE_COUNT]; + +void arch_report_meminfo(struct seq_file *m) +{ + /* + * Hash maps the memory with one size mmu_linear_psize. + * So don't bother to print these on hash + */ + if (!radix_enabled()) + return; + seq_printf(m, "DirectMap4k: %8lu kB\n", + atomic_long_read(&direct_pages_count[MMU_PAGE_4K]) << 2); + seq_printf(m, "DirectMap64k: %8lu kB\n", + atomic_long_read(&direct_pages_count[MMU_PAGE_64K]) << 6); + seq_printf(m, "DirectMap2M: %8lu kB\n", + atomic_long_read(&direct_pages_count[MMU_PAGE_2M]) << 11); + seq_printf(m, "DirectMap1G: %8lu kB\n", + atomic_long_read(&direct_pages_count[MMU_PAGE_1G]) << 20); +} +#endif /* CONFIG_PROC_FS */ diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index bba168d02235..7be99fd9af15 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -267,6 +267,7 @@ static int __meminit create_physical_mapping(unsigned long start, #else int split_text_mapping = 0; #endif + int psize; start = _ALIGN_UP(start, PAGE_SIZE); for (addr = start; addr < end; addr += mapping_size) { @@ -280,13 +281,17 @@ static int __meminit create_physical_mapping(unsigned long start, retry: if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE && mmu_psize_defs[MMU_PAGE_1G].shift && - PUD_SIZE <= max_mapping_size) + PUD_SIZE <= max_mapping_size) { mapping_size = PUD_SIZE; - else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE && - mmu_psize_defs[MMU_PAGE_2M].shift) + psize = MMU_PAGE_1G; + } else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE && + mmu_psize_defs[MMU_PAGE_2M].shift) { mapping_size = PMD_SIZE; - else + psize = MMU_PAGE_2M; + } else { mapping_size = PAGE_SIZE; + psize = mmu_virtual_psize; + } if (split_text_mapping && (mapping_size == PUD_SIZE) && (addr <= __pa_symbol(__init_begin)) && @@ -297,8 +302,10 @@ retry: if (split_text_mapping && (mapping_size == PMD_SIZE) && (addr <= __pa_symbol(__init_begin)) && - (addr + mapping_size) >= __pa_symbol(_stext)) + (addr + mapping_size) >= __pa_symbol(_stext)) { mapping_size = PAGE_SIZE; + psize = mmu_virtual_psize; + } if (mapping_size != previous_size) { print_mapping(start, addr, previous_size); @@ -316,6 +323,8 @@ retry: rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end); if (rc) return rc; + + update_page_count(psize, 1); } print_mapping(start, addr, mapping_size); -- cgit