From 8f36479d0eda3c078ee4e1ec79ea9a2650cd0996 Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Mon, 17 Jul 2017 16:12:43 +0300 Subject: powerpc: allow compiling with GENERIC_MSI_IRQ_DOMAIN This allows building powerpc with the GENERIC_MSI_IRQ_DOMAIN Kconfig by enabling the asm-generic msi.h in Kbuild. Without this, there's a compilation error [1] because powerpc, as most arches, doesn't provide an asm/msi.h. [1] In file included from ./include/linux/kvm_host.h:20:0, from ./arch/powerpc/include/asm/kvm_ppc.h:30, from arch/powerpc/kernel/dbell.c:20: ./include/linux/msi.h:195:21: fatal error: asm/msi.h: No such file or directory Signed-off-by: Laurentiu Tudor Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 5c4fbc80dc6c..2542ea15d338 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -8,3 +8,4 @@ generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += rwsem.h generic-y += vtime.h +generic-y += msi.h -- cgit From 28a5db0061014c8afbbb98560cf420c29bc4d8e1 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 19 Jul 2017 03:06:32 +0530 Subject: powerpc/powernv: Add IMC OPAL APIs In-Memory Collection (IMC) counters are performance monitoring infrastructure. These counters need special sequence of SCOMs to init/start/stop which is handled by OPAL. And OPAL provides three APIs to init and control these IMC engines. OPAL API documentation: https://github.com/open-power/skiboot/blob/master/doc/opal-api/opal-imc-counters.rst Patch updates the kernel side powernv platform code to support the new OPAL APIs Signed-off-by: Hemant Kumar Signed-off-by: Anju T Sudhakar Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal-api.h | 12 +++++++++++- arch/powerpc/include/asm/opal.h | 5 +++++ 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 3130a73652c7..7df005965634 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -190,7 +190,10 @@ #define OPAL_NPU_INIT_CONTEXT 146 #define OPAL_NPU_DESTROY_CONTEXT 147 #define OPAL_NPU_MAP_LPAR 148 -#define OPAL_LAST 148 +#define OPAL_IMC_COUNTERS_INIT 149 +#define OPAL_IMC_COUNTERS_START 150 +#define OPAL_IMC_COUNTERS_STOP 151 +#define OPAL_LAST 151 /* Device tree flags */ @@ -1084,6 +1087,13 @@ enum { XIVE_DUMP_EMU_STATE = 5, }; +/* "type" argument options for OPAL_IMC_COUNTERS_* calls */ +enum { + OPAL_IMC_COUNTERS_NEST = 1, + OPAL_IMC_COUNTERS_CORE = 2, +}; + + #endif /* __ASSEMBLY__ */ #endif /* __OPAL_API_H */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 588fb1c23af9..6b8513c3ad40 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -268,6 +268,11 @@ int64_t opal_xive_free_irq(uint32_t girq); int64_t opal_xive_sync(uint32_t type, uint32_t id); int64_t opal_xive_dump(uint32_t type, uint32_t id); +int64_t opal_imc_counters_init(uint32_t type, uint64_t address, + uint64_t cpu_pir); +int64_t opal_imc_counters_start(uint32_t type, uint64_t cpu_pir); +int64_t opal_imc_counters_stop(uint32_t type, uint64_t cpu_pir); + /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); -- cgit From 8f95faaac56c18b32d0e23ace55417a440abdb7e Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 19 Jul 2017 03:06:33 +0530 Subject: powerpc/powernv: Detect and create IMC device Code to create platform device for the In-Memory Collection (IMC) counters. Platform devices are created based on the IMC compatibility. New header file created to contain the data structures and macros needed for In-Memory Collection (IMC) counter pmu devices. The device tree for IMC counters starts at the node "imc-counters". This node contains all the IMC PMU nodes and event nodes for these IMC PMUs. Device probe() parses the device to locate three possible IMC device types (Nest/Core/Thread). Function then branch to parse each unit nodes to populate vital information such as device memory sizes, event nodes information, base address for reserve memory access (if any) and so on. Simple bare-minimum shutdown function added which only "stops" the engines. Signed-off-by: Anju T Sudhakar Signed-off-by: Hemant Kumar Signed-off-by: Madhavan Srinivasan [mpe: Fix build with CONFIG_PERF_EVENTS=n] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/imc-pmu.h | 128 +++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 arch/powerpc/include/asm/imc-pmu.h (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h new file mode 100644 index 000000000000..7f74c282710f --- /dev/null +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -0,0 +1,128 @@ +#ifndef __ASM_POWERPC_IMC_PMU_H +#define __ASM_POWERPC_IMC_PMU_H + +/* + * IMC Nest Performance Monitor counter support. + * + * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation. + * (C) 2017 Anju T Sudhakar, IBM Corporation. + * (C) 2017 Hemant K Shaw, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or later version. + */ + +#include +#include +#include +#include +#include + +/* + * For static allocation of some of the structures. + */ +#define IMC_MAX_PMUS 32 + +/* + * Compatibility macros for IMC devices + */ +#define IMC_DTB_COMPAT "ibm,opal-in-memory-counters" +#define IMC_DTB_UNIT_COMPAT "ibm,imc-counters" + + +/* + * LDBAR: Counter address and Enable/Disable macro. + * perf/imc-pmu.c has the LDBAR layout information. + */ +#define THREAD_IMC_LDBAR_MASK 0x0003ffffffffe000ULL +#define THREAD_IMC_ENABLE 0x8000000000000000ULL + +/* + * Structure to hold memory address information for imc units. + */ +struct imc_mem_info { + u64 *vbase; + u32 id; +}; + +/* + * Place holder for nest pmu events and values. + */ +struct imc_events { + u32 value; + char *name; + char *unit; + char *scale; +}; + +/* Event attribute array index */ +#define IMC_FORMAT_ATTR 0 +#define IMC_EVENT_ATTR 1 +#define IMC_CPUMASK_ATTR 2 +#define IMC_NULL_ATTR 3 + +/* PMU Format attribute macros */ +#define IMC_EVENT_OFFSET_MASK 0xffffffffULL + +/* + * Device tree parser code detects IMC pmu support and + * registers new IMC pmus. This structure will hold the + * pmu functions, events, counter memory information + * and attrs for each imc pmu and will be referenced at + * the time of pmu registration. + */ +struct imc_pmu { + struct pmu pmu; + struct imc_mem_info *mem_info; + struct imc_events **events; + /* + * Attribute groups for the PMU. Slot 0 used for + * format attribute, slot 1 used for cpusmask attribute, + * slot 2 used for event attribute. Slot 3 keep as + * NULL. + */ + const struct attribute_group *attr_groups[4]; + u32 counter_mem_size; + int domain; + /* + * flag to notify whether the memory is mmaped + * or allocated by kernel. + */ + bool imc_counter_mmaped; +}; + +/* + * Structure to hold id, lock and reference count for the imc events which + * are inited. + */ +struct imc_pmu_ref { + struct mutex lock; + unsigned int id; + int refc; +}; + +/* + * In-Memory Collection Counters type. + * Data comes from Device tree. + * Three device type are supported. + */ + +enum { + IMC_TYPE_THREAD = 0x1, + IMC_TYPE_CORE = 0x4, + IMC_TYPE_CHIP = 0x10, +}; + +/* + * Domains for IMC PMUs + */ +#define IMC_DOMAIN_NEST 1 +#define IMC_DOMAIN_CORE 2 +#define IMC_DOMAIN_THREAD 3 + +extern int init_imc_pmu(struct device_node *parent, + struct imc_pmu *pmu_ptr, int pmu_id); +extern void thread_imc_disable(void); +#endif /* __ASM_POWERPC_IMC_PMU_H */ -- cgit From e1c1cfed54326fd2b17c78f0c85092167fc0783b Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Fri, 21 Jul 2017 16:11:37 +0530 Subject: powerpc/powernv: Save/Restore additional SPRs for stop4 cpuidle The stop4 idle state on POWER9 is a deep idle state which loses hypervisor resources, but whose latency is low enough that it can be exposed via cpuidle. Until now, the deep idle states which lose hypervisor resources (eg: winkle) were only exposed via CPU-Hotplug. Hence currently on wakeup from such states, barring a few SPRs which need to be restored to their older value, rest of the SPRS are reinitialized to their values corresponding to that at boot time. When stop4 is used in the context of cpuidle, we want these additional SPRs to be restored to their older value, to ensure that the context on the CPU coming back from idle is same as it was before going idle. In this patch, we define a SPR save area in PACA (since we have used up the volatile register space in the stack) and on POWER9, we restore SPRN_PID, SPRN_LDBAR, SPRN_FSCR, SPRN_HFSCR, SPRN_MMCRA, SPRN_MMCR1, SPRN_MMCR2 to the values they had before entering stop. Signed-off-by: Gautham R. Shenoy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpuidle.h | 11 +++++++++++ arch/powerpc/include/asm/paca.h | 7 +++++++ 2 files changed, 18 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 52586f9956bb..8a174cba5567 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -67,6 +67,17 @@ #define ERR_DEEP_STATE_ESL_MISMATCH -2 #ifndef __ASSEMBLY__ +/* Additional SPRs that need to be saved/restored during stop */ +struct stop_sprs { + u64 pid; + u64 ldbar; + u64 fscr; + u64 hfscr; + u64 mmcr1; + u64 mmcr2; + u64 mmcra; +}; + extern u32 pnv_fastsleep_workaround_at_entry[]; extern u32 pnv_fastsleep_workaround_at_exit[]; diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index dc88a31cc79a..04b60af027ae 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -31,6 +31,7 @@ #endif #include #include +#include register struct paca_struct *local_paca asm("r13"); @@ -183,6 +184,12 @@ struct paca_struct { struct paca_struct **thread_sibling_pacas; /* The PSSCR value that the kernel requested before going to stop */ u64 requested_psscr; + + /* + * Save area for additional SPRs that need to be + * saved/restored during cpuidle stop. + */ + struct stop_sprs stop_sprs; #endif #ifdef CONFIG_PPC_STD_MMU_64 -- cgit From a46cc7a90fd8d95bfbb2b27080efe872a1a51db4 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 19 Jul 2017 14:49:05 +1000 Subject: powerpc/mm/radix: Improve TLB/PWC flushes At the moment we have to rather sub-optimal flushing behaviours: - flush_tlb_mm() will flush the PWC which is unnecessary (for example when doing a fork) - A large unmap will call flush_tlb_pwc() multiple times causing us to perform that fairly expensive operation repeatedly. This happens often in batches of 3 on every new process. So we change flush_tlb_mm() to only flush the TLB, and we use the existing "need_flush_all" flag in struct mmu_gather to indicate that the PWC needs flushing. Unfortunately, flush_tlb_range() still needs to do a full flush for now as it's used by the THP collapsing. We will fix that later. Signed-off-by: Benjamin Herrenschmidt Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/tlbflush-radix.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index cc7fbde4f53c..7196999cdc82 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -22,22 +22,20 @@ extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end extern void radix__local_flush_tlb_mm(struct mm_struct *mm); extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -extern void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize); extern void radix__tlb_flush(struct mmu_gather *tlb); #ifdef CONFIG_SMP extern void radix__flush_tlb_mm(struct mm_struct *mm); extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize); #else #define radix__flush_tlb_mm(mm) radix__local_flush_tlb_mm(mm) #define radix__flush_tlb_page(vma,addr) radix__local_flush_tlb_page(vma,addr) #define radix__flush_tlb_page_psize(mm,addr,p) radix__local_flush_tlb_page_psize(mm,addr,p) -#define radix__flush_tlb_pwc(tlb, addr) radix__local_flush_tlb_pwc(tlb, addr) #endif +extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, unsigned long page_size); extern void radix__flush_tlb_lpid(unsigned long lpid); -- cgit From 424de9c6e3f89399fc11afc1f53f89c5329132da Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 19 Jul 2017 14:49:06 +1000 Subject: powerpc/mm/radix: Avoid flushing the PWC on every flush_tlb_range We do that because it's used by THP pmd collapsing, so use instead a dedicated flush function. Signed-off-by: Benjamin Herrenschmidt Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/tlbflush-radix.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 7196999cdc82..9b433a624bf3 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -36,6 +36,7 @@ extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmad #define radix__flush_tlb_page_psize(mm,addr,p) radix__local_flush_tlb_page_psize(mm,addr,p) #endif extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); +extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr); extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, unsigned long page_size); extern void radix__flush_tlb_lpid(unsigned long lpid); -- cgit From 870cfe77a91e91cac5937784d73b9d27b6a12296 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 19 Jul 2017 14:49:26 +1000 Subject: powerpc/mm: Update definitions of DSISR bits This updates the definitions for the various DSISR bits to match both some historical stuff and to match new bits on POWER9. In addition, we define some masks corresponding to the "bad" faults on Book3S, and some masks corresponding to the bits that match between DSISR and SRR1 for a DSI and an ISI. This comes with a small code update to change the definition of DSISR_PGDIRFAULT which becomes DSISR_PRTABLE_FAULT to match architecture 3.0B Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 69 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 59 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index a3b6575c7842..73be2f71dbbb 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -272,16 +272,65 @@ #define SPRN_DAR 0x013 /* Data Address Register */ #define SPRN_DBCR 0x136 /* e300 Data Breakpoint Control Reg */ #define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */ -#define DSISR_NOHPTE 0x40000000 /* no translation found */ -#define DSISR_PROTFAULT 0x08000000 /* protection fault */ -#define DSISR_BADACCESS 0x04000000 /* bad access to CI or G */ -#define DSISR_ISSTORE 0x02000000 /* access was a store */ -#define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */ -#define DSISR_NOSEGMENT 0x00200000 /* SLB miss */ -#define DSISR_KEYFAULT 0x00200000 /* Key fault */ -#define DSISR_UNSUPP_MMU 0x00080000 /* Unsupported MMU config */ -#define DSISR_SET_RC 0x00040000 /* Failed setting of R/C bits */ -#define DSISR_PGDIRFAULT 0x00020000 /* Fault on page directory */ +#define DSISR_BAD_DIRECT_ST 0x80000000 /* Obsolete: Direct store error */ +#define DSISR_NOHPTE 0x40000000 /* no translation found */ +#define DSISR_ATTR_CONFLICT 0x20000000 /* P9: Process vs. Partition attr */ +#define DSISR_NOEXEC_OR_G 0x10000000 /* Alias of SRR1 bit, see below */ +#define DSISR_PROTFAULT 0x08000000 /* protection fault */ +#define DSISR_BADACCESS 0x04000000 /* bad access to CI or G */ +#define DSISR_ISSTORE 0x02000000 /* access was a store */ +#define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */ +#define DSISR_NOSEGMENT 0x00200000 /* STAB miss (unsupported) */ +#define DSISR_KEYFAULT 0x00200000 /* Storage Key fault */ +#define DSISR_BAD_EXT_CTRL 0x00100000 /* Obsolete: External ctrl error */ +#define DSISR_UNSUPP_MMU 0x00080000 /* P9: Unsupported MMU config */ +#define DSISR_SET_RC 0x00040000 /* P9: Failed setting of R/C bits */ +#define DSISR_PRTABLE_FAULT 0x00020000 /* P9: Fault on process table */ +#define DSISR_ICSWX_NO_CT 0x00004000 /* P7: icswx unavailable cp type */ +#define DSISR_BAD_COPYPASTE 0x00000008 /* P9: Copy/Paste on wrong memtype */ +#define DSISR_BAD_AMO 0x00000004 /* P9: Incorrect AMO opcode */ +#define DSISR_BAD_CI_LDST 0x00000002 /* P8: Bad HV CI load/store */ + +/* + * DSISR_NOEXEC_OR_G doesn't actually exist. This bit is always + * 0 on DSIs. However, on ISIs, the corresponding bit in SRR1 + * indicates an attempt at executing from a no-execute PTE + * or segment or from a guarded page. + * + * We add a definition here for completeness as we alias + * DSISR and SRR1 in do_page_fault. + */ + +/* + * DSISR bits that are treated as a fault. Any bit set + * here will skip hash_page, and cause do_page_fault to + * trigger a SIGBUS or SIGSEGV: + */ +#define DSISR_BAD_FAULT_32S (DSISR_BAD_DIRECT_ST | \ + DSISR_BADACCESS | \ + DSISR_BAD_EXT_CTRL) +#define DSISR_BAD_FAULT_64S (DSISR_BAD_FAULT_32S | \ + DSISR_ATTR_CONFLICT | \ + DSISR_KEYFAULT | \ + DSISR_UNSUPP_MMU | \ + DSISR_PRTABLE_FAULT | \ + DSISR_ICSWX_NO_CT | \ + DSISR_BAD_COPYPASTE | \ + DSISR_BAD_AMO | \ + DSISR_BAD_CI_LDST) +/* + * These bits are equivalent in SRR1 and DSISR for 0x400 + * instruction access interrupts on Book3S + */ +#define DSISR_SRR1_MATCH_32S (DSISR_NOHPTE | \ + DSISR_NOEXEC_OR_G | \ + DSISR_PROTFAULT) +#define DSISR_SRR1_MATCH_64S (DSISR_SRR1_MATCH_32S | \ + DSISR_KEYFAULT | \ + DSISR_UNSUPP_MMU | \ + DSISR_SET_RC | \ + DSISR_PRTABLE_FAULT) + #define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */ #define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ #define SPRN_CIR 0x11B /* Chip Information Register (hyper, R/0) */ -- cgit From 6ff4d3e9665274b69cff47f64cc20183465a1de2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 19 Jul 2017 14:49:46 +1000 Subject: powerpc: Remove old unused icswx based coprocessor support We have a whole pile of unused code to maintain the ACOP register, allocate coprocessor PIDs and handle ACOP faults. This mechanism was used for the HFI adapter on POWER7 which is dead and gone and whose driver never went upstream. It was used on some A2 core based stuff that also never saw the light of day. Take out all that code. There is still some POWER8 coprocessor code that uses icswx but it's kernel only and thus doesn't use any of that infrastructure. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu.h | 5 ----- arch/powerpc/include/asm/mmu_context.h | 6 ------ arch/powerpc/include/asm/reg_booke.h | 3 --- 3 files changed, 14 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 77529a3e3811..cbf1945f4338 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -96,11 +96,6 @@ typedef struct { #ifdef CONFIG_PPC_SUBPAGE_PROT struct subpage_prot_table spt; #endif /* CONFIG_PPC_SUBPAGE_PROT */ -#ifdef CONFIG_PPC_ICSWX - struct spinlock *cop_lockp; /* guard acop and cop_pid */ - unsigned long acop; /* mask of enabled coprocessor types */ - unsigned int cop_pid; /* pid value used with coprocessors */ -#endif /* CONFIG_PPC_ICSWX */ #ifdef CONFIG_PPC_64K_PAGES /* for 4K PTE fragment support */ void *pte_frag; diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index da7e9432fa8f..eb749c86dca5 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -96,12 +96,6 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, if (prev == next) return; -#ifdef CONFIG_PPC_ICSWX - /* Switch coprocessor context only if prev or next uses a coprocessor */ - if (prev->context.acop || next->context.acop) - switch_cop(next); -#endif /* CONFIG_PPC_ICSWX */ - /* We must stop all altivec streams before changing the HW * context */ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 737e012ef56e..eb2a33d5df26 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -221,10 +221,7 @@ #define SPRN_CSRR0 SPRN_SRR2 /* Critical Save and Restore Register 0 */ #define SPRN_CSRR1 SPRN_SRR3 /* Critical Save and Restore Register 1 */ #endif - -#ifdef CONFIG_PPC_ICSWX #define SPRN_HACOP 0x15F /* Hypervisor Available Coprocessor Register */ -#endif /* Bit definitions for CCR1. */ #define CCR1_DPC 0x00000100 /* Disable L1 I-Cache/D-Cache parity checking */ -- cgit From 2552910084a5e12e280caf082ab01468e187a064 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Fri, 4 Aug 2017 11:55:14 +0200 Subject: powerpc/powernv: Enable PCI peer-to-peer P9 has support for PCI peer-to-peer, enabling a device to write in the MMIO space of another device directly, without interrupting the CPU. This patch adds support for it on powernv, by adding a new API to be called by drivers. The pnv_pci_set_p2p(...) call configures an 'initiator', i.e the device which will issue the MMIO operation, and a 'target', i.e. the device on the receiving side. P9 really only supports MMIO stores for the time being but that's expected to change in the future, so the API allows to define both load and store operations. /* PCI p2p descriptor */ #define OPAL_PCI_P2P_ENABLE 0x1 #define OPAL_PCI_P2P_LOAD 0x2 #define OPAL_PCI_P2P_STORE 0x4 int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, u64 desc) It uses a new OPAL call, as the configuration magic is done on the PHBs by skiboot. Signed-off-by: Frederic Barrat Reviewed-by: Russell Currey [mpe: Drop unrelated OPAL calls, s/uint64_t/u64/, minor formatting] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal-api.h | 8 +++++++- arch/powerpc/include/asm/opal.h | 2 ++ arch/powerpc/include/asm/pnv-pci.h | 2 ++ 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 7df005965634..ced1ef21e981 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -193,7 +193,8 @@ #define OPAL_IMC_COUNTERS_INIT 149 #define OPAL_IMC_COUNTERS_START 150 #define OPAL_IMC_COUNTERS_STOP 151 -#define OPAL_LAST 151 +#define OPAL_PCI_SET_P2P 157 +#define OPAL_LAST 157 /* Device tree flags */ @@ -1094,6 +1095,11 @@ enum { }; +/* PCI p2p descriptor */ +#define OPAL_PCI_P2P_ENABLE 0x1 +#define OPAL_PCI_P2P_LOAD 0x2 +#define OPAL_PCI_P2P_STORE 0x4 + #endif /* __ASSEMBLY__ */ #endif /* __OPAL_API_H */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 6b8513c3ad40..5a715e66f910 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -267,6 +267,8 @@ int64_t opal_xive_allocate_irq(uint32_t chip_id); int64_t opal_xive_free_irq(uint32_t girq); int64_t opal_xive_sync(uint32_t type, uint32_t id); int64_t opal_xive_dump(uint32_t type, uint32_t id); +int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target, + uint64_t desc, uint16_t pe_number); int64_t opal_imc_counters_init(uint32_t type, uint64_t address, uint64_t cpu_pir); diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index de9681034353..3e5cf251ad9a 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -26,6 +26,8 @@ extern int pnv_pci_get_presence_state(uint64_t id, uint8_t *state); extern int pnv_pci_get_power_state(uint64_t id, uint8_t *state); extern int pnv_pci_set_power_state(uint64_t id, uint8_t state, struct opal_msg *msg); +extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, + u64 desc); int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode); int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, -- cgit From e66ca3db5917f4bcad039d3a3df9f1003797c249 Mon Sep 17 00:00:00 2001 From: Matt Brown Date: Fri, 4 Aug 2017 11:12:18 +1000 Subject: powerpc/powernv: Use darn instruction for get_random_seed() on Power9 This adds powernv_get_random_darn() which utilises the darn instruction, introduced in ISA v3.0/POWER9. The darn instruction can potentially return an error, which is supported by the get_random_seed() API, in normal usage if we see an error we just return that to the caller. However when detecting whether darn is functional at boot we try up to 10 times, before deciding that darn doesn't work and failing the registration of get_random_seed(). That way an intermittent failure at boot doesn't deprive the system of randomness until the next reboot. Signed-off-by: Matt Brown [mpe: Move init into a function, tweak change log] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index fa9ebaead91e..041ba15aa2b9 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -193,6 +193,7 @@ #define PPC_INST_CLRBHRB 0x7c00035c #define PPC_INST_COPY 0x7c20060c #define PPC_INST_CP_ABORT 0x7c00068c +#define PPC_INST_DARN 0x7c0005e6 #define PPC_INST_DCBA 0x7c0005ec #define PPC_INST_DCBA_MASK 0xfc0007fe #define PPC_INST_DCBAL 0x7c2005ec @@ -395,6 +396,9 @@ #define PPC_CP_ABORT stringify_in_c(.long PPC_INST_CP_ABORT) #define PPC_COPY(a, b) stringify_in_c(.long PPC_INST_COPY | \ ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_DARN(t, l) stringify_in_c(.long PPC_INST_DARN | \ + ___PPC_RT(t) | \ + (((l) & 0x3) << 16)) #define PPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \ __PPC_RA(a) | __PPC_RB(b)) #define PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \ -- cgit From 63ee9b2ff9d306efaa61b04b8710fafe339ae441 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 1 Aug 2017 20:29:22 +1000 Subject: powerpc/mm/book3s64: Make KERN_IO_START a variable Currently KERN_IO_START is defined as: #define KERN_IO_START (KERN_VIRT_START + (KERN_VIRT_SIZE >> 1)) Although it looks like a constant, both the components are actually variables, to allow us to have a different value between Radix and Hash with a single kernel. However that still requires both Radix and Hash to place the kernel IO region at the same location relative to the start and end of the kernel virtual region (namely 1/2 way through it), and we'd like to change that. So split KERN_IO_START out into its own variable, and initialise it for Radix and Hash. In the medium term we should be able to reconsolidate this, by doing a more involved rearrangement of the location of the regions. Signed-off-by: Michael Ellerman Reviewed-by: Aneesh Kumar K.V Acked-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash.h | 2 ++ arch/powerpc/include/asm/book3s/64/pgtable.h | 3 ++- arch/powerpc/include/asm/book3s/64/radix.h | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 36fc7bfe9e11..d613653ed5b9 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -51,6 +51,8 @@ #define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE >> 1) #define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE) +#define H_KERN_IO_START H_VMALLOC_END + /* * Region IDs */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index d1da415e283c..18a8580d3ddc 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -272,8 +272,10 @@ extern unsigned long __vmalloc_end; extern unsigned long __kernel_virt_start; extern unsigned long __kernel_virt_size; +extern unsigned long __kernel_io_start; #define KERN_VIRT_START __kernel_virt_start #define KERN_VIRT_SIZE __kernel_virt_size +#define KERN_IO_START __kernel_io_start extern struct page *vmemmap; extern unsigned long ioremap_bot; extern unsigned long pci_io_base; @@ -298,7 +300,6 @@ extern unsigned long pci_io_base; * PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE */ -#define KERN_IO_START (KERN_VIRT_START + (KERN_VIRT_SIZE >> 1)) #define FULL_IO_SIZE 0x80000000ul #define ISA_IO_BASE (KERN_IO_START) #define ISA_IO_END (KERN_IO_START + 0x10000ul) diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 544440b5aff3..1e5ba94e62ef 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -110,6 +110,8 @@ */ #define RADIX_VMEMMAP_BASE (RADIX_VMALLOC_END) +#define RADIX_KERN_IO_START (RADIX_KERN_VIRT_START + (RADIX_KERN_VIRT_SIZE >> 1)) + #ifndef __ASSEMBLY__ #define RADIX_PTE_TABLE_SIZE (sizeof(pte_t) << RADIX_PTE_INDEX_SIZE) #define RADIX_PMD_TABLE_SIZE (sizeof(pmd_t) << RADIX_PMD_INDEX_SIZE) -- cgit From 21a0e8c14bf61472723d2acc83f98ab35ff321b4 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 1 Aug 2017 20:29:24 +1000 Subject: powerpc/mm/hash64: Make vmalloc 56T on hash On 64-bit book3s, with the hash MMU, we currently define the kernel virtual space (vmalloc, ioremap etc.), to be 16T in size. This is a leftover from pre v3.7 when our user VM was also 16T. Of that 16T we split it 50/50, with half used for PCI IO and ioremap and the other 8T for vmalloc. We never bothered to make it any bigger because 8T of vmalloc ought to be enough for anybody. But it turns out that's not true, the per cpu allocator wants large amounts of vmalloc space, not to make large allocations, but to allow a large stride between allocations, because we use pcpu_embed_first_chunk(). With a bit of juggling we can increase the entire kernel virtual space to 64T. The only real complication is the check of the address in the SLB miss handler, see the comment in the code. Although we could continue to split virtual space 50/50 as we do now, no one seems to be running out of PCI IO or ioremap space. So instead keep that as 8T, and use the remaining 56T for vmalloc. In future we should be able to increase the kernel virtual space to 512T, the code already supports that, it just needs testing on older hardware. Signed-off-by: Michael Ellerman Reviewed-by: Aneesh Kumar K.V --- arch/powerpc/include/asm/book3s/64/hash.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index d613653ed5b9..f88452019114 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -40,7 +40,7 @@ * Define the address range of the kernel non-linear virtual area */ #define H_KERN_VIRT_START ASM_CONST(0xD000000000000000) -#define H_KERN_VIRT_SIZE ASM_CONST(0x0000100000000000) +#define H_KERN_VIRT_SIZE ASM_CONST(0x0000400000000000) /* 64T */ /* * The vmalloc space starts at the beginning of that region, and @@ -48,7 +48,7 @@ * (we keep a quarter for the virtual memmap) */ #define H_VMALLOC_START H_KERN_VIRT_START -#define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE >> 1) +#define H_VMALLOC_SIZE ASM_CONST(0x380000000000) /* 56T */ #define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE) #define H_KERN_IO_START H_VMALLOC_END -- cgit From 8a583c0a8d316d8ea52ea78491174ab1a3e9ef9d Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Sat, 5 Aug 2017 19:55:11 +0200 Subject: powerpc: Fix invalid use of register expressions binutils >= 2.26 now warns about misuse of register expressions in assembler operands that are actually literals, for example: arch/powerpc/kernel/entry_64.S:535: Warning: invalid register expression In practice these are almost all uses of r0 that should just be a literal 0. Signed-off-by: Andreas Schwab [mpe: Mention r0 is almost always the culprit, fold in purgatory change] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc_asm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 6baeeb9acd0d..daeda2bbe12a 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -439,7 +439,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601) .machine push ; \ .machine "power4" ; \ lis scratch,0x60000000@h; \ - dcbt r0,scratch,0b01010; \ + dcbt 0,scratch,0b01010; \ .machine pop /* -- cgit From ca41ad4377072e3e51593a2a9593c6cdbf4a5c0d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 Aug 2017 22:00:53 +1000 Subject: powerpc: Add irq accounting for system reset interrupts Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hardirq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 8add8b861e8d..64b73b03d473 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -12,6 +12,7 @@ typedef struct { unsigned int mce_exceptions; unsigned int spurious_irqs; unsigned int hmi_exceptions; + unsigned int sreset_irqs; #ifdef CONFIG_PPC_DOORBELL unsigned int doorbell_irqs; #endif -- cgit From 04019bf8ebb3c8cd66d75046054aeb264ba2db54 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 Aug 2017 22:00:54 +1000 Subject: powerpc: Add irq accounting for watchdog interrupts This adds an irq counter for the watchdog soft-NMI. This interrupt only fires when interrupts are soft-disabled, so it will not increment much even when the watchdog is running. However it's useful for debugging and sanity checking. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hardirq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 64b73b03d473..c97603d617e3 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -13,6 +13,9 @@ typedef struct { unsigned int spurious_irqs; unsigned int hmi_exceptions; unsigned int sreset_irqs; +#ifdef CONFIG_PPC_WATCHDOG + unsigned int soft_nmi_irqs; +#endif #ifdef CONFIG_PPC_DOORBELL unsigned int doorbell_irqs; #endif -- cgit From cb8b340de21e1c57e1c6d4f26ccc4af46a3ed559 Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Thu, 10 Aug 2017 09:01:18 +0530 Subject: powerpc/powernv: Add support for powercap framework Adds a generic powercap framework to change the system powercap inband through OPAL-OCC command/response interface. Signed-off-by: Shilpasri G Bhat Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal-api.h | 3 +++ arch/powerpc/include/asm/opal.h | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index ced1ef21e981..b87305b22746 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -42,6 +42,7 @@ #define OPAL_I2C_STOP_ERR -24 #define OPAL_XIVE_PROVISIONING -31 #define OPAL_XIVE_FREE_ACTIVE -32 +#define OPAL_TIMEOUT -33 /* API Tokens (in r0) */ #define OPAL_INVALID_CALL -1 @@ -193,6 +194,8 @@ #define OPAL_IMC_COUNTERS_INIT 149 #define OPAL_IMC_COUNTERS_START 150 #define OPAL_IMC_COUNTERS_STOP 151 +#define OPAL_GET_POWERCAP 152 +#define OPAL_SET_POWERCAP 153 #define OPAL_PCI_SET_P2P 157 #define OPAL_LAST 157 diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 5a715e66f910..6f09ab74aa7b 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -275,6 +275,9 @@ int64_t opal_imc_counters_init(uint32_t type, uint64_t address, int64_t opal_imc_counters_start(uint32_t type, uint64_t cpu_pir); int64_t opal_imc_counters_stop(uint32_t type, uint64_t cpu_pir); +int opal_get_powercap(u32 handle, int token, u32 *pcap); +int opal_set_powercap(u32 handle, int token, u32 pcap); + /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); @@ -352,6 +355,8 @@ static inline int opal_get_async_rc(struct opal_msg msg) void opal_wake_poller(void); +void opal_powercap_init(void); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_OPAL_H */ -- cgit From 8e84b2d1f0f6a00b6476790f7bce6dcbffe91980 Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Thu, 10 Aug 2017 09:01:19 +0530 Subject: powerpc/powernv: Add support to set power-shifting-ratio This patch adds support to set power-shifting-ratio which hints the firmware how to distribute/throttle power between different entities in a system (e.g CPU v/s GPU). This ratio is used by OCC for power capping algorithm. Signed-off-by: Shilpasri G Bhat Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal-api.h | 2 ++ arch/powerpc/include/asm/opal.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index b87305b22746..0cb7d110c0b4 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -196,6 +196,8 @@ #define OPAL_IMC_COUNTERS_STOP 151 #define OPAL_GET_POWERCAP 152 #define OPAL_SET_POWERCAP 153 +#define OPAL_GET_POWER_SHIFT_RATIO 154 +#define OPAL_SET_POWER_SHIFT_RATIO 155 #define OPAL_PCI_SET_P2P 157 #define OPAL_LAST 157 diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 6f09ab74aa7b..d87ffcb16b61 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -277,6 +277,8 @@ int64_t opal_imc_counters_stop(uint32_t type, uint64_t cpu_pir); int opal_get_powercap(u32 handle, int token, u32 *pcap); int opal_set_powercap(u32 handle, int token, u32 pcap); +int opal_get_power_shift_ratio(u32 handle, int token, u32 *psr); +int opal_set_power_shift_ratio(u32 handle, int token, u32 psr); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, @@ -356,6 +358,7 @@ static inline int opal_get_async_rc(struct opal_msg msg) void opal_wake_poller(void); void opal_powercap_init(void); +void opal_psr_init(void); #endif /* __ASSEMBLY__ */ -- cgit From bf9571550f529335caa59f41827d180908759916 Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Thu, 10 Aug 2017 09:01:20 +0530 Subject: powerpc/powernv: Add support to clear sensor groups data Adds support for clearing different sensor groups. OCC inband sensor groups like CSM, Profiler, Job Scheduler can be cleared using this driver. The min/max of all sensors belonging to these sensor groups will be cleared. Signed-off-by: Shilpasri G Bhat Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal-api.h | 1 + arch/powerpc/include/asm/opal.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 0cb7d110c0b4..450a60b81d2a 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -198,6 +198,7 @@ #define OPAL_SET_POWERCAP 153 #define OPAL_GET_POWER_SHIFT_RATIO 154 #define OPAL_SET_POWER_SHIFT_RATIO 155 +#define OPAL_SENSOR_GROUP_CLEAR 156 #define OPAL_PCI_SET_P2P 157 #define OPAL_LAST 157 diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index d87ffcb16b61..97ff192f5cfb 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -279,6 +279,7 @@ int opal_get_powercap(u32 handle, int token, u32 *pcap); int opal_set_powercap(u32 handle, int token, u32 pcap); int opal_get_power_shift_ratio(u32 handle, int token, u32 *psr); int opal_set_power_shift_ratio(u32 handle, int token, u32 psr); +int opal_sensor_group_clear(u32 group_hndl, int token); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, @@ -359,6 +360,7 @@ void opal_wake_poller(void); void opal_powercap_init(void); void opal_psr_init(void); +void opal_sensor_groups_init(void); #endif /* __ASSEMBLY__ */ -- cgit From d30a5a5262ca64d58aa07fb2ecd7f992df83b4bc Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 8 Aug 2017 16:39:25 +1000 Subject: powerpc/traps: Use SRR1 defines for program check reasons Currently we open code the reason codes for program checks. Instead use the existing SRR1 defines. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 73be2f71dbbb..c0600e9e0ff5 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -724,6 +724,7 @@ * may not be recoverable */ #define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */ #define SRR1_WS_DEEP 0x00010000 /* All resources maintained */ +#define SRR1_PROGTM 0x00200000 /* TM Bad Thing */ #define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */ #define SRR1_PROGILL 0x00080000 /* Illegal instruction */ #define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */ -- cgit From 72e4b2cdf07b4c43115f058ed74d694eab5d6454 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 8 Aug 2017 13:58:50 +0200 Subject: powerpc/time: refactor MFTB() to limit number of ifdefs The 8xx cannot access the TBL and TBU registers using mfspr/mtspr It must be accessed using mftb/mftbu Due to this, there is a number of places with #ifdef CONFIG_8xx This patch defines new macros MFTBL(x) and MFTBU(x) on the same model as MFTB(x) and tries to make use of them as much as possible. In arch/powerpc/include/asm/timex.h, we also remove the ifdef for the asm() operands as the compiler doesn't mind unused operands Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc_asm.h | 12 +++++++++--- arch/powerpc/include/asm/timex.h | 4 ---- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index daeda2bbe12a..1d7ff3156a97 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -378,10 +378,16 @@ BEGIN_FTR_SECTION_NESTED(96); \ cmpwi dest,0; \ beq- 90b; \ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) -#elif defined(CONFIG_8xx) -#define MFTB(dest) mftb dest #else -#define MFTB(dest) mfspr dest, SPRN_TBRL +#define MFTB(dest) MFTBL(dest) +#endif + +#ifdef CONFIG_PPC_8xx +#define MFTBL(dest) mftb dest +#define MFTBU(dest) mftbu dest +#else +#define MFTBL(dest) mfspr dest, SPRN_TBRL +#define MFTBU(dest) mfspr dest, SPRN_TBRU #endif #ifndef CONFIG_SMP diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h index 2cf846edb3fc..b467dbcb0fb7 100644 --- a/arch/powerpc/include/asm/timex.h +++ b/arch/powerpc/include/asm/timex.h @@ -45,11 +45,7 @@ static inline cycles_t get_cycles(void) " .long 0\n" " .long 0\n" ".previous" -#ifdef CONFIG_8xx - : "=r" (ret) : "i" (CPU_FTR_601)); -#else : "=r" (ret) : "i" (CPU_FTR_601), "i" (SPRN_TBRL)); -#endif return ret; #endif } -- cgit From 968159c0031ac1e07ab4426397e786c9c483f068 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 8 Aug 2017 13:58:54 +0200 Subject: powerpc/8xx: Getting rid of remaining use of CONFIG_8xx Two config options exist to define powerpc MPC8xx: * CONFIG_PPC_8xx * CONFIG_8xx arch/powerpc/platforms/Kconfig.cputype has contained the following comment about CONFIG_8xx item for some years: "# this is temp to handle compat with arch=ppc" arch/powerpc is now the only place with remaining use of CONFIG_8xx: get rid of them. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cache.h | 2 +- arch/powerpc/include/asm/cputable.h | 4 ++-- arch/powerpc/include/asm/fs_pd.h | 2 +- arch/powerpc/include/asm/nohash/32/pgtable.h | 2 +- arch/powerpc/include/asm/ppc_asm.h | 2 +- arch/powerpc/include/asm/reg.h | 10 +++++----- arch/powerpc/include/asm/timex.h | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index 5a90292afbad..d122f7f957ce 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -5,7 +5,7 @@ /* bytes per L1 cache line */ -#if defined(CONFIG_8xx) || defined(CONFIG_403GCX) +#if defined(CONFIG_PPC_8xx) || defined(CONFIG_403GCX) #define L1_CACHE_SHIFT 4 #define MAX_COPY_PREFETCH 1 #elif defined(CONFIG_PPC_E500MC) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index d02ad93bf708..a9bf921f4efc 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -513,7 +513,7 @@ enum { #else CPU_FTRS_GENERIC_32 | #endif -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx CPU_FTRS_8XX | #endif #ifdef CONFIG_40x @@ -565,7 +565,7 @@ enum { #else CPU_FTRS_GENERIC_32 & #endif -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx CPU_FTRS_8XX & #endif #ifdef CONFIG_40x diff --git a/arch/powerpc/include/asm/fs_pd.h b/arch/powerpc/include/asm/fs_pd.h index f79d6c74eb2a..8def56ec05c6 100644 --- a/arch/powerpc/include/asm/fs_pd.h +++ b/arch/powerpc/include/asm/fs_pd.h @@ -26,7 +26,7 @@ #define cpm2_unmap(addr) do {} while(0) #endif -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx #include extern immap_t __iomem *mpc8xx_immr; diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 91314268f04f..9278eaa7ca59 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -121,7 +121,7 @@ extern int icache_44x_need_flush; #include #elif defined(CONFIG_FSL_BOOKE) #include -#elif defined(CONFIG_8xx) +#elif defined(CONFIG_PPC_8xx) #include #endif diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 1d7ff3156a97..36f3e41c9fbe 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -417,7 +417,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601) * and they must be used. */ -#if !defined(CONFIG_4xx) && !defined(CONFIG_8xx) +#if !defined(CONFIG_4xx) && !defined(CONFIG_PPC_8xx) #define tlbia \ li r4,1024; \ mtctr r4; \ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c0600e9e0ff5..70722e5b93e7 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -22,9 +22,9 @@ #include #endif -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx #include -#endif /* CONFIG_8xx */ +#endif /* CONFIG_PPC_8xx */ #define MSR_SF_LG 63 /* Enable 64 bit mode */ #define MSR_ISF_LG 61 /* Interrupt 64b mode valid on 630 */ @@ -135,7 +135,7 @@ #define MSR_KERNEL (MSR_ | MSR_64BIT) #define MSR_USER32 (MSR_ | MSR_PR | MSR_EE) #define MSR_USER64 (MSR_USER32 | MSR_64BIT) -#elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_8xx) +#elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx) /* Default MSR for kernel mode. */ #define MSR_KERNEL (MSR_ME|MSR_RI|MSR_IR|MSR_DR) #define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE) @@ -1164,7 +1164,7 @@ #endif #endif -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx #define SPRN_SPRG_SCRATCH0 SPRN_SPRG0 #define SPRN_SPRG_SCRATCH1 SPRN_SPRG1 #define SPRN_SPRG_SCRATCH2 SPRN_SPRG2 @@ -1363,7 +1363,7 @@ static inline void msr_check_and_clear(unsigned long bits) #else /* __powerpc64__ */ -#if defined(CONFIG_8xx) +#if defined(CONFIG_PPC_8xx) #define mftbl() ({unsigned long rval; \ asm volatile("mftbl %0" : "=r" (rval)); rval;}) #define mftbu() ({unsigned long rval; \ diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h index b467dbcb0fb7..cb61eae5b7ed 100644 --- a/arch/powerpc/include/asm/timex.h +++ b/arch/powerpc/include/asm/timex.h @@ -29,7 +29,7 @@ static inline cycles_t get_cycles(void) ret = 0; __asm__ __volatile__( -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx "97: mftb %0\n" #else "97: mfspr %0, %2\n" -- cgit From 3ee87674e0212a152419a479dfb1eed501bab386 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 8 Aug 2017 13:58:58 +0200 Subject: powerpc/8xx: Use symbolic PVR value For the 8xx, PVR values defined in arch/powerpc/include/asm/reg.h are nowhere used. Remove all defines and add PVR_8xx Use it in arch/powerpc/kernel/cputable.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 70722e5b93e7..c36823d64ec9 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1247,10 +1247,8 @@ * differentiated by the version number in the Communication Processor * Module (CPM). */ -#define PVR_821 0x00500000 -#define PVR_823 PVR_821 -#define PVR_850 PVR_821 -#define PVR_860 PVR_821 +#define PVR_8xx 0x00500000 + #define PVR_8240 0x00810100 #define PVR_8245 0x80811014 #define PVR_8260 PVR_8240 -- cgit From 5b6c133e0801007117cf4a466cb56de86e186138 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 15 Aug 2017 20:02:56 +1000 Subject: powerpc/mm/nohash: Move definition of PGALLOC_GFP to fix build errors In some obscure Book3E configs (randconfig) we can end up missing a definition for PGALLOC_GFP in pgtable_64.c. Fix it by moving the definition to asm/pgalloc.h. Fixes: de3b87611dd1 ("powerpc/mm/book(e)(3s)/64: Add page table accounting") Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgalloc.h | 2 -- arch/powerpc/include/asm/pgalloc.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index e2329db9d6f4..1fcfa425cefa 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -41,8 +41,6 @@ extern struct kmem_cache *pgtable_cache[]; pgtable_cache[(shift) - 1]; \ }) -#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO - extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int); extern void pte_fragment_free(unsigned long *, int); extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h index d795c5d5789c..45ae1212ab8a 100644 --- a/arch/powerpc/include/asm/pgalloc.h +++ b/arch/powerpc/include/asm/pgalloc.h @@ -17,6 +17,8 @@ static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp) } #endif /* MODULE */ +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) + #ifdef CONFIG_PPC_BOOK3S #include #else -- cgit From 3184cc4b6f6a1dc0c1745aafe2b14b1206ef3187 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 2 Aug 2017 15:51:03 +0200 Subject: powerpc/mm: Fix kernel RAM protection after freeing unused memory on PPC32 As seen below, allthough the init sections have been freed, the associated memory area is still marked as executable in the page tables. ~ dmesg [ 5.860093] Freeing unused kernel memory: 592K (c0570000 - c0604000) ~ cat /sys/kernel/debug/kernel_page_tables ---[ Start of kernel VM ]--- 0xc0000000-0xc0497fff 4704K rw X present dirty accessed shared 0xc0498000-0xc056ffff 864K rw present dirty accessed shared 0xc0570000-0xc059ffff 192K rw X present dirty accessed shared 0xc05a0000-0xc7ffffff 125312K rw present dirty accessed shared ---[ vmalloc() Area ]--- This patch fixes that. The implementation is done by reusing the change_page_attr() function implemented for CONFIG_DEBUG_PAGEALLOC Signed-off-by: Christophe Leroy Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index afae9a336136..ab7f44475b1f 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -81,7 +81,7 @@ unsigned long vmalloc_to_phys(void *vmalloc_addr); void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); void pgtable_cache_init(void); -#ifdef CONFIG_STRICT_KERNEL_RWX +#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32) void mark_initmem_nx(void); #else static inline void mark_initmem_nx(void) { } -- cgit From 86b19520e7ef5539eb081c76fe2f5c955180205f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 2 Aug 2017 15:51:07 +0200 Subject: powerpc/mm: declare some local functions static get_pteptr() and __mapin_ram_chunk() are only used locally, so define them static Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/pgtable.h | 3 --- arch/powerpc/include/asm/nohash/32/pgtable.h | 3 --- 2 files changed, 6 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 7fb755880409..17c8766777f1 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -294,9 +294,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, - pmd_t **pmdp); - int map_kernel_page(unsigned long va, phys_addr_t pa, int flags); /* Generic accessors to PTE bits */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 9278eaa7ca59..185c6a47f9ba 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -337,9 +337,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, - pmd_t **pmdp); - int map_kernel_page(unsigned long va, phys_addr_t pa, int flags); #endif /* !__ASSEMBLY__ */ -- cgit From 4cfac2f9c7f116af8516d0b3d0e7383189eca376 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 2 Aug 2017 15:51:09 +0200 Subject: powerpc/mm: Simplify __set_fixmap() __set_fixmap() uses __fix_to_virt() then does the boundary checks by it self. Instead, we can use fix_to_virt() which does the verification at build time. For this, we need to use it inline so that GCC can see the real value of idx at buildtime. In the meantime, we remove the 'fixmaps' variable. This variable is set but has never been used from the beginning (commit 2c419bdeca1d9 ("[POWERPC] Port fixmap from x86 and use for kmap_atomic")) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/fixmap.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 4508b322f2cd..6c40dfda5912 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -17,6 +17,7 @@ #ifndef __ASSEMBLY__ #include #include +#include #ifdef CONFIG_HIGHMEM #include #include @@ -62,9 +63,6 @@ enum fixed_addresses { __end_of_fixed_addresses }; -extern void __set_fixmap (enum fixed_addresses idx, - phys_addr_t phys, pgprot_t flags); - #define __FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) @@ -72,5 +70,11 @@ extern void __set_fixmap (enum fixed_addresses idx, #include +static inline void __set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + map_kernel_page(fix_to_virt(idx), phys, pgprot_val(flags)); +} + #endif /* !__ASSEMBLY__ */ #endif -- cgit From ca8afd4046255ac046f8229d5159c6d213e37b22 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 12 Jul 2017 17:03:42 +0200 Subject: powerpc/hugetlb: fix page rights verification in gup_hugepte() gup_hugepte() checks if pages are present and readable, and when 'write' is set, also checks if the pages are writable. Initially this was done by checking if _PAGE_PRESENT and _PAGE_READ were set. In addition, _PAGE_WRITE was verified for write accesses. The problem is that we have to handle the three following cases: 1/ The target defines __PAGE_READ and __PAGE_WRITE 2/ The target defines __PAGE_RW 3/ The target defines __PAGE_RO In case 1/, this is obvious In case 2/, __PAGE_READ is defined as 0 and __PAGE_WRITE as __PAGE_RW so it works as well. But in case 3, __PAGE_RW is defined as 0, which means __PAGE_WRITE is 0 and then the test returns true (page writable) in all cases. A first correction was attempted in commit 6b8cb66a6a7cc ("powerpc: Fix usage of _PAGE_RO in hugepage"), but that fix is wrong: instead of checking that the page is writable when write is requested, it checks that the page is NOT writable when write is NOT requested. This patch adds a new pte_read() helper to check whether a page is readable or not. This avoids handling all possible cases in gup_hugepte(). Then gup_hugepte() is modified to use pte_present(), pte_read() and pte_write() instead of the raw flags. Signed-off-by: Christophe Leroy Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/pgtable.h | 1 + arch/powerpc/include/asm/book3s/64/pgtable.h | 5 +++++ arch/powerpc/include/asm/nohash/pgtable.h | 1 + 3 files changed, 7 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 17c8766777f1..4d453f979553 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -298,6 +298,7 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, int flags); /* Generic accessors to PTE bits */ static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} +static inline int pte_read(pte_t pte) { return 1; } static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); } static inline int pte_young(pte_t pte) { return !!(pte_val(pte) & _PAGE_ACCESSED); } static inline int pte_special(pte_t pte) { return !!(pte_val(pte) & _PAGE_SPECIAL); } diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 18a8580d3ddc..1071d52f382d 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -410,6 +410,11 @@ static inline int pte_write(pte_t pte) return __pte_write(pte) || pte_savedwrite(pte); } +static inline int pte_read(pte_t pte) +{ + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_READ)); +} + #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index e5805ad78e12..17989c3d9a24 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -14,6 +14,7 @@ static inline int pte_write(pte_t pte) { return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO; } +static inline int pte_read(pte_t pte) { return 1; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } -- cgit From 79cc38ded1e1ac86e69c90f604efadd50b0b3762 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 28 Jul 2017 10:31:26 +0530 Subject: powerpc/mm/hugetlb: Add support for reserving gigantic huge pages via kernel command line With commit aa888a74977a8 ("hugetlb: support larger than MAX_ORDER") we added support for allocating gigantic hugepages via kernel command line. Switch ppc64 arch specific code to use that. W.r.t FSL support, we now limit our allocation range using BOOTMEM_ALLOC_ACCESSIBLE. We use the kernel command line to do reservation of hugetlb pages on powernv platforms. On pseries hash mmu mode the supported gigantic huge page size is 16GB and that can only be allocated with hypervisor assist. For pseries the command line option doesn't do the allocation. Instead pseries does gigantic hugepage allocation based on hypervisor hint that is specified via "ibm,expected#pages" property of the memory node. Cc: Scott Wood Cc: Christophe Leroy Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 2 +- arch/powerpc/include/asm/hugetlb.h | 14 -------------- 2 files changed, 1 insertion(+), 15 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 6981a52b3887..f28d21c69f79 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -468,7 +468,7 @@ extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, int psize, int ssize); int htab_remove_mapping(unsigned long vstart, unsigned long vend, int psize, int ssize); -extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); +extern void pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); #ifdef CONFIG_PPC_PSERIES diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 7f4025a6c69e..b8a0fb442c64 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -218,18 +218,4 @@ static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, } #endif /* CONFIG_HUGETLB_PAGE */ -/* - * FSL Book3E platforms require special gpage handling - the gpages - * are reserved early in the boot process by memblock instead of via - * the .dts as on IBM platforms. - */ -#if defined(CONFIG_HUGETLB_PAGE) && (defined(CONFIG_PPC_FSL_BOOK3E) || \ - defined(CONFIG_PPC_8xx)) -extern void __init reserve_hugetlb_gpages(void); -#else -static inline void reserve_hugetlb_gpages(void) -{ -} -#endif - #endif /* _ASM_POWERPC_HUGETLB_H */ -- cgit From 4ae279c2c96ab38a78b954d218790a8f6db714e5 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 28 Jul 2017 10:31:27 +0530 Subject: powerpc/mm/hugetlb: Allow runtime allocation of 16G. Now that we have GIGANTIC_PAGE enabled on powerpc, use this for 16G hugepages with hash translation mode. Depending on the total system memory we have, we may be able to allocate 16G hugepages runtime. This also remove the hugetlb setup difference between hash/radix translation mode. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hugetlb.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index 5c28bd6f2ae1..2d1ca488ca44 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -54,9 +54,7 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE static inline bool gigantic_page_supported(void) { - if (radix_enabled()) - return true; - return false; + return true; } #endif -- cgit From 7baebe54a64af44dc34d0bd51462f09fe07a2acb Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Wed, 26 Jul 2017 21:34:30 +0800 Subject: powerpc/topology: Remove the unused parent_node() macro Commit a7be6e5a7f8d ("mm: drop useless local parameters of __register_one_node()") removes the last user of parent_node(). The parent_node() macro in POWERPC platform is unnecessary. Remove it for cleanup. Reported-by: Michael Ellerman Signed-off-by: Dou Liyang Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/topology.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index dc4e15937ccf..2d84bca8d053 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -16,8 +16,6 @@ struct device_node; #include -#define parent_node(node) (node) - #define cpumask_of_node(node) ((node) == -1 ? \ cpu_all_mask : \ node_to_cpumask_map[node]) -- cgit From 694fc88ce271fd48f7939c032c1247fef81db57f Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 28 Mar 2017 01:07:40 +0530 Subject: powerpc/string: Implement optimized memset variants Based on Matthew Wilcox's patches for other architectures. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/string.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index da3cdffca440..b0e82466d4e8 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -23,6 +23,30 @@ extern void * memmove(void *,const void *,__kernel_size_t); extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); +#ifdef CONFIG_PPC64 +#define __HAVE_ARCH_MEMSET16 +#define __HAVE_ARCH_MEMSET32 +#define __HAVE_ARCH_MEMSET64 + +extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t); +extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t); +extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t); + +static inline void *memset16(uint16_t *p, uint16_t v, __kernel_size_t n) +{ + return __memset16(p, v, n * 2); +} + +static inline void *memset32(uint32_t *p, uint32_t v, __kernel_size_t n) +{ + return __memset32(p, v, n * 4); +} + +static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n) +{ + return __memset64(p, v, n * 8); +} +#endif #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_STRING_H */ -- cgit From 94171b19c3f1f4d9d4c0e3aaa1aa161def1ec7ea Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 27 Jul 2017 11:54:53 +0530 Subject: powerpc/mm: Rename find_linux_pte_or_hugepte() Add newer helpers to make the function usage simpler. It is always recommended to use find_current_mm_pte() for walking the page table. If we cannot use find_current_mm_pte(), it should be documented why the said usage of __find_linux_pte() is safe against a parallel THP split. For now we have KVM code using __find_linux_pte(). This is because kvm code ends up calling __find_linux_pte() in real mode with MSR_EE=0 but with PACA soft_enabled = 1. We may want to fix that later and make sure we keep the MSR_EE and PACA soft_enabled in sync. When we do that we can switch kvm to use find_linux_pte(). Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pgtable.h | 10 +--------- arch/powerpc/include/asm/pte-walk.h | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 9 deletions(-) create mode 100644 arch/powerpc/include/asm/pte-walk.h (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index afae9a336136..eb9d57defb75 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -66,16 +66,8 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_large(pmd) 0 #endif -pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, - bool *is_thp, unsigned *shift); -static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, - bool *is_thp, unsigned *shift) -{ - VM_WARN(!arch_irqs_disabled(), - "%s called with irq enabled\n", __func__); - return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift); -} +/* can we use this in kvm */ unsigned long vmalloc_to_phys(void *vmalloc_addr); void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h new file mode 100644 index 000000000000..2d633e9d686c --- /dev/null +++ b/arch/powerpc/include/asm/pte-walk.h @@ -0,0 +1,35 @@ +#ifndef _ASM_POWERPC_PTE_WALK_H +#define _ASM_POWERPC_PTE_WALK_H + +#include + +/* Don't use this directly */ +extern pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, + bool *is_thp, unsigned *hshift); + +static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea, + bool *is_thp, unsigned *hshift) +{ + VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__); + return __find_linux_pte(pgdir, ea, is_thp, hshift); +} + +static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift) +{ + pgd_t *pgdir = init_mm.pgd; + return __find_linux_pte(pgdir, ea, NULL, hshift); +} +/* + * This is what we should always use. Any other lockless page table lookup needs + * careful audit against THP split. + */ +static inline pte_t *find_current_mm_pte(pgd_t *pgdir, unsigned long ea, + bool *is_thp, unsigned *hshift) +{ + VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__); + VM_WARN(pgdir != current->mm->pgd, + "%s lock less page table lookup called on wrong mm\n", __func__); + return __find_linux_pte(pgdir, ea, is_thp, hshift); +} + +#endif /* _ASM_POWERPC_PTE_WALK_H */ -- cgit From fa4531f753f1c80d21b5eb86ec5c0229310c5fb0 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 27 Jul 2017 11:54:54 +0530 Subject: powerpc/mm: Don't send IPI to all cpus on THP updates Now that we made sure that lockless walk of linux page table is mostly limitted to current task(current->mm->pgdir) we can update the THP update sequence to only send IPI to CPUs on which this task has run. This helps in reducing the IPI overload on systems with large number of CPUs. WRT kvm even though kvm is walking page table with vpc->arch.pgdir, it is done only on secondary CPUs and in that case we have primary CPU added to task's mm cpumask. Sending an IPI to primary will force the secondary to do a vm exit and hence this mm cpumask usage is safe here. WRT CAPI, we still end up walking linux page table with capi context MM. For now the pte lookup serialization sends an IPI to all CPUs in CPI is in use. We can further improve this by adding the CAPI interrupt handling CPU to task mm cpumask. That will be done in a later patch. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 1071d52f382d..a43c60aa1c2c 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1165,6 +1165,7 @@ static inline bool arch_needs_pgtable_deposit(void) return false; return true; } +extern void serialize_against_pte_lookup(struct mm_struct *mm); static inline pmd_t pmd_mkdevmap(pmd_t pmd) -- cgit From 43ed84a891b70165a621a5c92196949efd57be39 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 24 Jul 2017 14:27:58 +1000 Subject: powerpc/mm: Move pgdir setting into a helper Makes switch_mm_irqs_off() a bit more readable Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 2ab328f0159e..992123e4e7f6 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -77,6 +77,26 @@ extern void switch_cop(struct mm_struct *next); extern int use_cop(unsigned long acop, struct mm_struct *mm); extern void drop_cop(unsigned long acop, struct mm_struct *mm); +#if defined(CONFIG_PPC32) +static inline void switch_mm_pgdir(struct task_struct *tsk, + struct mm_struct *mm) +{ + /* 32-bit keeps track of the current PGDIR in the thread struct */ + tsk->thread.pgdir = mm->pgd; +} +#elif defined(CONFIG_PPC_BOOK3E_64) +static inline void switch_mm_pgdir(struct task_struct *tsk, + struct mm_struct *mm) +{ + /* 64-bit Book3E keeps track of current PGD in the PACA */ + get_paca()->pgd = mm->pgd; +} +#else +static inline void switch_mm_pgdir(struct task_struct *tsk, + struct mm_struct *mm) { } +#endif + + /* * switch_mm is the entry point called from the architecture independent * code in kernel/sched/core.c @@ -111,15 +131,9 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, new_on_cpu = true; } - /* 32-bit keeps track of the current PGDIR in the thread struct */ -#ifdef CONFIG_PPC32 - tsk->thread.pgdir = next->pgd; -#endif /* CONFIG_PPC32 */ + /* Some subarchs need to track the PGD elsewhere */ + switch_mm_pgdir(tsk, next); - /* 64-bit Book3E keeps track of current PGD in the PACA */ -#ifdef CONFIG_PPC_BOOK3E_64 - get_paca()->pgd = next->pgd; -#endif /* Nothing else to do if we aren't actually switching */ if (prev == next) return; -- cgit From 058ccc3465d9b8fb34d59ca099a8a7ace1a62cdd Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 24 Jul 2017 14:27:59 +1000 Subject: powerpc/mm: Avoid double irq save/restore in activate_mm It calls switch_mm() which already does the irq save/restore these days. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 992123e4e7f6..fb99c27bbf5e 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -176,11 +176,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { - unsigned long flags; - - local_irq_save(flags); switch_mm(prev, next, current); - local_irq_restore(flags); } /* We don't currently use enter_lazy_tlb() for anything */ -- cgit From a619e59c075c66e530a88e57b45bb0417e2f4fff Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 24 Jul 2017 14:28:02 +1000 Subject: powerpc/mm: Optimize detection of thread local mm's Instead of comparing the whole CPU mask every time, let's keep a counter of how many bits are set in the mask. Thus testing for a local mm only requires testing if that counter is 1 and the current CPU bit is set in the mask. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu.h | 3 +++ arch/powerpc/include/asm/mmu_context.h | 9 +++++++++ arch/powerpc/include/asm/tlb.h | 11 ++++++++++- 3 files changed, 22 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 1a220cdff923..c3b00e8ff791 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -83,6 +83,9 @@ typedef struct { mm_context_id_t id; u16 user_psize; /* page size index */ + /* Number of bits in the mm_cpumask */ + atomic_t active_cpus; + /* NPU NMMU context */ struct npu_context *npu_context; diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index fb99c27bbf5e..2338abf6101a 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -96,6 +96,14 @@ static inline void switch_mm_pgdir(struct task_struct *tsk, struct mm_struct *mm) { } #endif +#ifdef CONFIG_PPC_BOOK3S_64 +static inline void inc_mm_active_cpus(struct mm_struct *mm) +{ + atomic_inc(&mm->context.active_cpus); +} +#else +static inline void inc_mm_active_cpus(struct mm_struct *mm) { } +#endif /* * switch_mm is the entry point called from the architecture independent @@ -110,6 +118,7 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, /* Mark this context has been used on the new CPU */ if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) { cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + inc_mm_active_cpus(next); /* * This full barrier orders the store to the cpumask above vs diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index 609557569f65..a7eabff27a0f 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -69,13 +69,22 @@ static inline int mm_is_core_local(struct mm_struct *mm) topology_sibling_cpumask(smp_processor_id())); } +#ifdef CONFIG_PPC_BOOK3S_64 +static inline int mm_is_thread_local(struct mm_struct *mm) +{ + if (atomic_read(&mm->context.active_cpus) > 1) + return false; + return cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)); +} +#else /* CONFIG_PPC_BOOK3S_64 */ static inline int mm_is_thread_local(struct mm_struct *mm) { return cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())); } +#endif /* !CONFIG_PPC_BOOK3S_64 */ -#else +#else /* CONFIG_SMP */ static inline int mm_is_core_local(struct mm_struct *mm) { return 1; -- cgit From 3a2df3798d4da2fc40052c25f0d9c687b1467d53 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 24 Jul 2017 14:28:03 +1000 Subject: powerpc/mm: Make switch_mm_irqs_off() out of line It's too big to be inline, there is no reason to keep it that way. Signed-off-by: Benjamin Herrenschmidt [mpe: Rework to incorporate the comment changes via fixes branch] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 89 +--------------------------------- 1 file changed, 2 insertions(+), 87 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 2338abf6101a..309592589e30 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -77,93 +77,8 @@ extern void switch_cop(struct mm_struct *next); extern int use_cop(unsigned long acop, struct mm_struct *mm); extern void drop_cop(unsigned long acop, struct mm_struct *mm); -#if defined(CONFIG_PPC32) -static inline void switch_mm_pgdir(struct task_struct *tsk, - struct mm_struct *mm) -{ - /* 32-bit keeps track of the current PGDIR in the thread struct */ - tsk->thread.pgdir = mm->pgd; -} -#elif defined(CONFIG_PPC_BOOK3E_64) -static inline void switch_mm_pgdir(struct task_struct *tsk, - struct mm_struct *mm) -{ - /* 64-bit Book3E keeps track of current PGD in the PACA */ - get_paca()->pgd = mm->pgd; -} -#else -static inline void switch_mm_pgdir(struct task_struct *tsk, - struct mm_struct *mm) { } -#endif - -#ifdef CONFIG_PPC_BOOK3S_64 -static inline void inc_mm_active_cpus(struct mm_struct *mm) -{ - atomic_inc(&mm->context.active_cpus); -} -#else -static inline void inc_mm_active_cpus(struct mm_struct *mm) { } -#endif - -/* - * switch_mm is the entry point called from the architecture independent - * code in kernel/sched/core.c - */ -static inline void switch_mm_irqs_off(struct mm_struct *prev, - struct mm_struct *next, - struct task_struct *tsk) -{ - bool new_on_cpu = false; - - /* Mark this context has been used on the new CPU */ - if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) { - cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); - inc_mm_active_cpus(next); - - /* - * This full barrier orders the store to the cpumask above vs - * a subsequent operation which allows this CPU to begin loading - * translations for next. - * - * When using the radix MMU that operation is the load of the - * MMU context id, which is then moved to SPRN_PID. - * - * For the hash MMU it is either the first load from slb_cache - * in switch_slb(), and/or the store of paca->mm_ctx_id in - * copy_mm_to_paca(). - * - * On the read side the barrier is in pte_xchg(), which orders - * the store to the PTE vs the load of mm_cpumask. - */ - smp_mb(); - - new_on_cpu = true; - } - - /* Some subarchs need to track the PGD elsewhere */ - switch_mm_pgdir(tsk, next); - - /* Nothing else to do if we aren't actually switching */ - if (prev == next) - return; - - /* We must stop all altivec streams before changing the HW - * context - */ -#ifdef CONFIG_ALTIVEC - if (cpu_has_feature(CPU_FTR_ALTIVEC)) - asm volatile ("dssall"); -#endif /* CONFIG_ALTIVEC */ - - if (new_on_cpu) - radix_kvm_prefetch_workaround(next); - - /* - * The actual HW switching method differs between the various - * sub architectures. Out of line for now - */ - switch_mmu_context(prev, next, tsk); -} +extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk); static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) -- cgit From 94a04bc25a2c6296bd0c5e82c10e8231c2b11f77 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 25 Aug 2017 14:30:33 +1000 Subject: KVM: PPC: Book3S HV: POWER9 does not require secondary thread management POWER9 CPUs have independent MMU contexts per thread, so KVM does not need to quiesce secondary threads, so the hwthread_req/hwthread_state protocol does not have to be used. So patch it away on POWER9, and patch away the branch from the Linux idle wakeup to kvm_start_guest that is never used. Add a warning and error out of kvmppc_grab_hwthread in case it is ever called on POWER9. This avoids a hwsync in the idle wakeup path on POWER9. Signed-off-by: Nicholas Piggin Acked-by: Paul Mackerras [mpe: Use WARN(...) instead of WARN_ON()/pr_err(...)] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/kvm_book3s_asm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 7cea76f11c26..83596f32f50b 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -104,6 +104,10 @@ struct kvmppc_host_state { u8 napping; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* + * hwthread_req/hwthread_state pair is used to pull sibling threads + * out of guest on pre-ISAv3.0B CPUs where threads share MMU. + */ u8 hwthread_req; u8 hwthread_state; u8 host_ipi; -- cgit From aafc8a8300ec0f20b99c72e5a10e22535cb99dd1 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 29 Aug 2017 21:36:35 +1000 Subject: powerpc/64s: Move IDLE_STATE_ENTER_SEQ[_NORET] into idle_book3s.S This macro is only used in idle_book3s.S, move it in there and add a more descriptive comment. Signed-off-by: Nicholas Piggin [mpe: Split out of larger patch and write change log] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpuidle.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 8a174cba5567..eb43b5c3a7b5 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -101,20 +101,4 @@ static inline void report_invalid_psscr_val(u64 psscr_val, int err) #endif -/* Idle state entry routines */ -#ifdef CONFIG_PPC_P7_NAP -#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ - /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ - std r0,0(r1); \ - ptesync; \ - ld r0,0(r1); \ -236: cmpd cr0,r0,r0; \ - bne 236b; \ - IDLE_INST; \ - -#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ - IDLE_STATE_ENTER_SEQ(IDLE_INST) \ - b . -#endif /* CONFIG_PPC_P7_NAP */ - #endif -- cgit From 70412c55d419e971785094e9f7880fdbcd690520 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 28 Aug 2017 14:27:19 +1000 Subject: powerpc/64: Fix watchdog configuration regressions This fixes a couple more bits of fallout from the new hard lockup watchdog patch. It restores the required hw_nmi_get_sample_period() function for the perf watchdog, and removes some function declarations on 64e that are only defined for 64s. This fixes the 64e build when the hardlockup detector is enabled. It restores the default behaviour of disabling the perf watchdog, and also fixes disabling the 64s watchdog when running as a guest. Fixes: 2104180a53 ("powerpc/64s: implement arch-specific hardlockup watchdog") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nmi.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index 6f8e79cd35d8..3760150a0ff0 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -1,9 +1,8 @@ #ifndef _ASM_NMI_H #define _ASM_NMI_H -#ifdef CONFIG_HARDLOCKUP_DETECTOR +#ifdef CONFIG_PPC_WATCHDOG extern void arch_touch_nmi_watchdog(void); - extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace -- cgit From a3b2cb30f252b21a6f962e0dd107c8b897ca65e4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 5 Jul 2017 13:56:25 +1000 Subject: powerpc: Do not call ppc_md.panic in fadump panic notifier If fadump is not registered, and no other crash or debug handlers are registered, the powerpc panic handler stops the guest before the generic panic code can push out debug information to the console. Currently, system reset injection causes the guest to silently stop. Stop calling ppc_md.panic in the panic notifier. crash_fadump already does rtas_os_term() to terminate the guest if fadump is registered. Remove ppc_md.panic. Move fadump panic notifier into fadump code. Signed-off-by: Nicholas Piggin Reviewed-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/machdep.h | 1 - arch/powerpc/include/asm/setup.h | 1 - 2 files changed, 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index cd2fc1cc1cc7..73b92017b6d7 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -76,7 +76,6 @@ struct machdep_calls { void __noreturn (*restart)(char *cmd); void __noreturn (*halt)(void); - void (*panic)(char *str); void (*cpu_die)(void); long (*time_init)(void); /* Optional, may be NULL */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 654d64c9f3ac..3a3fb0ca68f5 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -23,7 +23,6 @@ extern void reloc_got2(unsigned long); void check_for_initrd(void); void initmem_init(void); -void setup_panic(void); #define ARCH_PANIC_TIMEOUT 180 #ifdef CONFIG_PPC_PSERIES -- cgit From b746e3e01e70d23ef53dcde1203ab78a1b7ac514 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 19 Jul 2017 16:59:10 +1000 Subject: powerpc/powernv: Flush console before platform error reboot Unrecovered MCE and HMI errors are sent through a special restart OPAL call to log the platform error. The downside is that they don't go through normal Linux crash paths, so they don't give much information to the Linux console. Change this by providing a special crash function which does some of the console flushing from the panic() path before calling firmware to reboot. The downside of this is a little more code to execute before reaching the firmware reboot. However in practice, it's critical to get the Linux console messages output in order to debug a problem. So this is a desirable tradeoff. Note on the implementation: It is difficult to plumb a custom reboot handler into the panic path, because panic does a little bit too much work. For example, it will try to delay with the timebase, but that may be corrupted in some cases resulting in a hang without reaching the platform reboot. Another problem is that panic can invoke the crash dump code which is not what we want in the case of a hardware platform error. Long-term the best solution will be to rework the panic path so it can be suitable for this kind of panic, but for now we just duplicate a bit of the code. Signed-off-by: Nicholas Piggin Reviewed-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 97ff192f5cfb..726c23304a57 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -50,7 +50,7 @@ int64_t opal_tpo_write(uint64_t token, uint32_t year_mon_day, uint32_t hour_min); int64_t opal_cec_power_down(uint64_t request); int64_t opal_cec_reboot(void); -int64_t opal_cec_reboot2(uint32_t reboot_type, char *diag); +int64_t opal_cec_reboot2(uint32_t reboot_type, const char *diag); int64_t opal_read_nvram(uint64_t buffer, uint64_t size, uint64_t offset); int64_t opal_write_nvram(uint64_t buffer, uint64_t size, uint64_t offset); int64_t opal_handle_interrupt(uint64_t isn, __be64 *outstanding_event_mask); -- cgit From 6fcd6baa90aeec9dcbe30786e15c125bf50503b2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 19 Jul 2017 16:59:11 +1000 Subject: powerpc/powernv: Use kernel crash path for machine checks There are quite a few machine check exceptions that can be caused by kernel bugs. To make debugging easier, use the kernel crash path in cases of synchronous machine checks that occur in kernel mode, if that would not result in the machine going straight to panic or crash dump. There is a downside here that die()ing the process in kernel mode can still leave the system unstable. panic_on_oops will always force the system to fail-stop, so systems where that behaviour is important will still do the right thing. As a test, when triggering an i-side 0111b error (ifetch from foreign address) in kernel mode process context on POWER9, the kernel currently dies quickly like this: Severe Machine check interrupt [Not recovered] NIP [ffff000000000000]: 0xffff000000000000 Initiator: CPU Error type: Real address [Instruction fetch (foreign)] [ 127.426651616,0] OPAL: Reboot requested due to Platform error. Effective[ 127.426693712,3] OPAL: Reboot requested due to Platform error. address: ffff000000000000 opal: Reboot type 1 not supported Kernel panic - not syncing: PowerNV Unrecovered Machine Check CPU: 56 PID: 4425 Comm: syscall Tainted: G M 4.12.0-rc1-13857-ga4700a261072-dirty #35 Call Trace: [ 128.017988928,4] IPMI: BUG: Dropping ESEL on the floor due to buggy/mising code in OPAL for this BMC Rebooting in 10 seconds.. Trying to free IRQ 496 from IRQ context! After this patch, the process is killed and the kernel continues with this message, which gives enough information to identify the offending branch (i.e., with CFAR): Severe Machine check interrupt [Not recovered] NIP [ffff000000000000]: 0xffff000000000000 Initiator: CPU Error type: Real address [Instruction fetch (foreign)] Effective address: ffff000000000000 Oops: Machine check, sig: 7 [#1] SMP NR_CPUS=2048 NUMA PowerNV Modules linked in: iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 ... CPU: 22 PID: 4436 Comm: syscall Tainted: G M 4.12.0-rc1-13857-ga4700a261072-dirty #36 task: c000000932300000 task.stack: c000000932380000 NIP: ffff000000000000 LR: 00000000217706a4 CTR: ffff000000000000 REGS: c00000000fc8fd80 TRAP: 0200 Tainted: G M (4.12.0-rc1-13857-ga4700a261072-dirty) MSR: 90000000001c1003 CR: 24000484 XER: 20000000 CFAR: c000000000004c80 DAR: 0000000021770a90 DSISR: 0a000000 SOFTE: 1 GPR00: 0000000000001ebe 00007fffce4818b0 0000000021797f00 0000000000000000 GPR04: 00007fff8007ac24 0000000044000484 0000000000004000 00007fff801405e8 GPR08: 900000000280f033 0000000024000484 0000000000000000 0000000000000030 GPR12: 9000000000001003 00007fff801bc370 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR24: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR28: 00007fff801b0000 0000000000000000 00000000217707a0 00007fffce481918 NIP [ffff000000000000] 0xffff000000000000 LR [00000000217706a4] 0x217706a4 Call Trace: Instruction dump: XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX Signed-off-by: Nicholas Piggin Reviewed-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/bug.h | 1 + arch/powerpc/include/asm/fadump.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 87fcc1948817..7ee763d3bea9 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -133,6 +133,7 @@ extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); extern void bad_page_fault(struct pt_regs *, unsigned long, int); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void die(const char *, struct pt_regs *, long); +extern bool die_will_crash(void); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index ce88bbe1d809..5a23010af600 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -209,11 +209,13 @@ extern int early_init_dt_scan_fw_dump(unsigned long node, extern int fadump_reserve_mem(void); extern int setup_fadump(void); extern int is_fadump_active(void); +extern int should_fadump_crash(void); extern void crash_fadump(struct pt_regs *, const char *); extern void fadump_cleanup(void); #else /* CONFIG_FA_DUMP */ static inline int is_fadump_active(void) { return 0; } +static inline int should_fadump_crash(void) { return 0; } static inline void crash_fadump(struct pt_regs *regs, const char *str) { } #endif #endif -- cgit From 8bae6a23198defc5576e4c6f1f97822883d705ae Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 29 Aug 2017 17:34:00 +1000 Subject: powerpc/eeh: Reduce to one the number of places where edev is allocated arch/powerpc/kernel/eeh_dev.c:57 is the only legit place where edev is allocated; other 2 places allocate it on stack and in the heap for a very short period of time to use eeh_pe_get() as takes edev. This changes eeh_pe_get() to receive required parameters explicitly. This removes unnecessary temporary allocation of edev. This uses the "pe_no" name instead of the "pe_config_addr" name as it actually is a PE number and not a config space address as it seemed. Signed-off-by: Alexey Kardashevskiy Reviewed-by: Andrew Donnellan Acked-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 8e37b71674f4..26a6a43f8799 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -262,7 +262,8 @@ typedef void *(*eeh_traverse_func)(void *data, void *flag); void eeh_set_pe_aux_size(int size); int eeh_phb_pe_create(struct pci_controller *phb); struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb); -struct eeh_pe *eeh_pe_get(struct eeh_dev *edev); +struct eeh_pe *eeh_pe_get(struct pci_controller *phb, + int pe_no, int config_addr); int eeh_add_to_parent_pe(struct eeh_dev *edev); int eeh_rmv_from_parent_pe(struct eeh_dev *edev); void eeh_pe_update_time_stamp(struct eeh_pe *pe); -- cgit From 69672bd7489f8a995e9cb89655dc1dcee555dadb Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 29 Aug 2017 17:34:01 +1000 Subject: powerpc/eeh: Remove unnecessary pointer to phb from eeh_dev The eeh_dev struct already holds a pointer to pci_dn which it does not exist without and pci_dn itself holds the very same pointer so just use it. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 26a6a43f8799..777d37aa0a7f 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -141,7 +141,6 @@ struct eeh_dev { struct eeh_pe *pe; /* Associated PE */ struct list_head list; /* Form link list in the PE */ struct list_head rmv_list; /* Record the removed edevs */ - struct pci_controller *phb; /* Associated PHB */ struct pci_dn *pdn; /* Associated PCI device node */ struct pci_dev *pdev; /* Associated PCI device */ bool in_error; /* Error flag for edev */ -- cgit From 405b33a76d26e426276c088b38dc7efba7538ff2 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 29 Aug 2017 17:34:02 +1000 Subject: powerpc/eeh: Remove unnecessary config_addr from eeh_dev The eeh_dev struct hold a config space address of an associated node and the very same address is also stored in the pci_dn struct which is always present during the eeh_dev lifetime. This uses bus:devfn directly from pci_dn instead of cached and packed config_addr. Since config_addr is made from device's bus:dev.fn, there is no point in keeping it in the debugfs either so remove that too. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 777d37aa0a7f..9847ae3a12d1 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -131,7 +131,6 @@ static inline bool eeh_pe_passed(struct eeh_pe *pe) struct eeh_dev { int mode; /* EEH mode */ int class_code; /* Class code of the device */ - int config_addr; /* Config address */ int pe_config_addr; /* PE config address */ u32 config_space[16]; /* Saved PCI config space */ int pcix_cap; /* Saved PCIx capability */ -- cgit From f1e08232ede8d1888d51e94940645f93a8462d75 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 29 Aug 2017 17:34:04 +1000 Subject: powerpc/pci: Remove OF node back pointer from pci_dn The check_req() helper uses pci_get_pdn() to get an OF node pointer. pci_get_pdn() returns a pci_dn pointer which either: 1) from the OF node returned by pci_device_to_OF_node(); 2) from the parent child_list where entries don't have OF node pointers. Since check_req() does not care about 2), it can call pci_device_to_OF_node() directly, hence the change. The find_pe_dn() helper uses embedded pci_dn to get an OF node which is also stored in edev->pdev so let's take a shortcut and call pci_device_to_OF_node() directly. With these 2 changes, we can finally get rid of the OF node back pointer. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 56c67d3f0108..0b8aa1fe2d5f 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -195,7 +195,6 @@ struct pci_dn { struct pci_dn *parent; struct pci_controller *phb; /* for pci devices */ struct iommu_table_group *table_group; /* for phb's or bridges */ - struct device_node *node; /* back-pointer to the device_node */ int pci_ext_config_space; /* for pci devices */ -- cgit From 967689141eb37c4365eac0fac82d857773098475 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Mon, 28 Aug 2017 23:23:31 -0700 Subject: powerpc/powernv/vas: Define macros, register fields and structures Define macros for the VAS hardware registers and bit-fields as well as couple of data structures needed by the VAS driver. Signed-off-by: Sukadev Bhattiprolu [mpe: Fixup include guard to use _ASM_POWERPC_VAS_H] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/vas.h | 45 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 arch/powerpc/include/asm/vas.h (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h new file mode 100644 index 000000000000..9f81db68f27b --- /dev/null +++ b/arch/powerpc/include/asm/vas.h @@ -0,0 +1,45 @@ +/* + * Copyright 2016-17 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_POWERPC_VAS_H +#define _ASM_POWERPC_VAS_H + +/* + * Min and max FIFO sizes are based on Version 1.05 Section 3.1.4.25 + * (Local FIFO Size Register) of the VAS workbook. + */ +#define VAS_RX_FIFO_SIZE_MIN (1 << 10) /* 1KB */ +#define VAS_RX_FIFO_SIZE_MAX (8 << 20) /* 8MB */ + +/* + * Threshold Control Mode: Have paste operation fail if the number of + * requests in receive FIFO exceeds a threshold. + * + * NOTE: No special error code yet if paste is rejected because of these + * limits. So users can't distinguish between this and other errors. + */ +#define VAS_THRESH_DISABLED 0 +#define VAS_THRESH_FIFO_GT_HALF_FULL 1 +#define VAS_THRESH_FIFO_GT_QTR_FULL 2 +#define VAS_THRESH_FIFO_GT_EIGHTH_FULL 3 + +/* + * Co-processor Engine type. + */ +enum vas_cop_type { + VAS_COP_TYPE_FAULT, + VAS_COP_TYPE_842, + VAS_COP_TYPE_842_HIPRI, + VAS_COP_TYPE_GZIP, + VAS_COP_TYPE_GZIP_HIPRI, + VAS_COP_TYPE_FTW, + VAS_COP_TYPE_MAX, +}; + +#endif /* __ASM_POWERPC_VAS_H */ -- cgit From b6622a339e8670a1025d4dd84be473c76dabed33 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Mon, 28 Aug 2017 23:23:32 -0700 Subject: powerpc/powernv: Move GET_FIELD/SET_FIELD to vas.h Move the GET_FIELD and SET_FIELD macros to vas.h as VAS and other users of VAS, including NX-842 can use those macros. There is a lot of related code between the VAS/NX kernel drivers and skiboot. For consistency, switch the order of parameters in SET_FIELD to match the order in skiboot. Signed-off-by: Sukadev Bhattiprolu Reviewed-by: Dan Streetman Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/vas.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 9f81db68f27b..7dcf3bded343 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -29,6 +29,14 @@ #define VAS_THRESH_FIFO_GT_QTR_FULL 2 #define VAS_THRESH_FIFO_GT_EIGHTH_FULL 3 +/* + * Get/Set bit fields + */ +#define GET_FIELD(m, v) (((v) & (m)) >> MASK_LSH(m)) +#define MASK_LSH(m) (__builtin_ffsl(m) - 1) +#define SET_FIELD(m, v, val) \ + (((v) & ~(m)) | ((((typeof(v))(val)) << MASK_LSH(m)) & (m))) + /* * Co-processor Engine type. */ -- cgit From 62c4eda4fabe89709ec43dcf1efe9fbea007a734 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Mon, 28 Aug 2017 23:23:37 -0700 Subject: powerpc/powernv/vas: Define vas_rx_win_open() interface Define the vas_rx_win_open() interface. This interface is intended to be used by the Nest Accelerator (NX) driver(s) to setup receive windows for one or more NX engines (which implement compression & encryption algorithms in the hardware). Follow-on patches will provide an interface to close the window and to open a send window that kernel subsystems can use to access the NX engines. The interface to open a receive window is expected to be invoked for each instance of VAS in the system. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/vas.h | 45 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 7dcf3bded343..18c59c3ade3d 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -50,4 +50,49 @@ enum vas_cop_type { VAS_COP_TYPE_MAX, }; +/* + * Receive window attributes specified by the (in-kernel) owner of window. + */ +struct vas_rx_win_attr { + void *rx_fifo; + int rx_fifo_size; + int wcreds_max; + + bool pin_win; + bool rej_no_credit; + bool tx_wcred_mode; + bool rx_wcred_mode; + bool tx_win_ord_mode; + bool rx_win_ord_mode; + bool data_stamp; + bool nx_win; + bool fault_win; + bool user_win; + bool notify_disable; + bool intr_disable; + bool notify_early; + + int lnotify_lpid; + int lnotify_pid; + int lnotify_tid; + u32 pswid; + + int tc_mode; +}; + +/* + * Helper to initialize receive window attributes to defaults for an + * NX window. + */ +void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop); + +/* + * Open a VAS receive window for the instance of VAS identified by @vasid + * Use @attr to initialize the attributes of the window. + * + * Return a handle to the window or ERR_PTR() on error. + */ +struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, + struct vas_rx_win_attr *attr); + #endif /* __ASM_POWERPC_VAS_H */ -- cgit From 98271d4198699947d66d6f8a02c09bd27cb90022 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Mon, 28 Aug 2017 23:23:38 -0700 Subject: powerpc/powernv/vas: Define vas_win_close() interface Define the vas_win_close() interface which should be used to close a send or receive windows. While the hardware configurations required to open send and receive windows differ, the configuration to close a window is the same for both. So we use a single interface to close the window. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/vas.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 18c59c3ade3d..96bc24d61625 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -95,4 +95,11 @@ void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop) struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, struct vas_rx_win_attr *attr); +/* + * Close the send or receive window identified by @win. For receive windows + * return -EAGAIN if there are active send windows attached to this receive + * window. + */ +int vas_win_close(struct vas_window *win); + #endif /* __ASM_POWERPC_VAS_H */ -- cgit From 5239af679a07427647b009ebb9c70b1a03ebca9b Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Mon, 28 Aug 2017 23:23:39 -0700 Subject: powerpc/powernv/vas: Define vas_tx_win_open() Define an interface to open a VAS send window. This interface is intended to be used the Nest Accelerator (NX) driver(s) to open a send window and use it to submit compression/encryption requests to a VAS receive window. The receive window, identified by the [vasid, cop] parameters, must already be open in VAS (i.e connected to an NX engine). Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/vas.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 96bc24d61625..033461c17637 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -80,6 +80,29 @@ struct vas_rx_win_attr { int tc_mode; }; +/* + * Window attributes specified by the in-kernel owner of a send window. + */ +struct vas_tx_win_attr { + enum vas_cop_type cop; + int wcreds_max; + int lpid; + int pidr; /* hardware PID (from SPRN_PID) */ + int pid; /* linux process id */ + int pswid; + int rsvd_txbuf_count; + int tc_mode; + + bool user_win; + bool pin_win; + bool rej_no_credit; + bool rsvd_txbuf_enable; + bool tx_wcred_mode; + bool rx_wcred_mode; + bool tx_win_ord_mode; + bool rx_win_ord_mode; +}; + /* * Helper to initialize receive window attributes to defaults for an * NX window. @@ -95,6 +118,25 @@ void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop) struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, struct vas_rx_win_attr *attr); +/* + * Helper to initialize send window attributes to defaults for an NX window. + */ +extern void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, + enum vas_cop_type cop); + +/* + * Open a VAS send window for the instance of VAS identified by @vasid + * and the co-processor type @cop. Use @attr to initialize attributes + * of the window. + * + * Note: The instance of VAS must already have an open receive window for + * the coprocessor type @cop. + * + * Return a handle to the send window or ERR_PTR() on error. + */ +struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, + struct vas_tx_win_attr *attr); + /* * Close the send or receive window identified by @win. For receive windows * return -EAGAIN if there are active send windows attached to this receive -- cgit From 2392c8c8c0450293625dbef19ff5e206fb7b6749 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Mon, 28 Aug 2017 23:23:40 -0700 Subject: powerpc/powernv/vas: Define copy/paste interfaces Define interfaces (wrappers) to the 'copy' and 'paste' instructions (which are new in PowerISA 3.0). These are intended to be used to by NX driver(s) to submit Coprocessor Request Blocks (CRBs) to the NX hardware engines. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 2 ++ arch/powerpc/include/asm/vas.h | 12 ++++++++++++ 2 files changed, 14 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 041ba15aa2b9..b5e1b51b8ba9 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -418,6 +418,8 @@ ___PPC_RB(b)) #define PPC_MSGCLRP(b) stringify_in_c(.long PPC_INST_MSGCLRP | \ ___PPC_RB(b)) +#define PPC_PASTE(a, b) stringify_in_c(.long PPC_INST_PASTE | \ + ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_INST_POPCNTB | \ __PPC_RA(a) | __PPC_RS(s)) #define PPC_POPCNTD(a, s) stringify_in_c(.long PPC_INST_POPCNTD | \ diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 033461c17637..fd5963acd658 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -144,4 +144,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, */ int vas_win_close(struct vas_window *win); +/* + * Copy the co-processor request block (CRB) @crb into the local L2 cache. + */ +int vas_copy_crb(void *crb, int offset); + +/* + * Paste a previously copied CRB (see vas_copy_crb()) from the L2 cache to + * the hardware address associated with the window @win. @re is expected/ + * assumed to be true for NX windows. + */ +int vas_paste_crb(struct vas_window *win, int offset, bool re); + #endif /* __ASM_POWERPC_VAS_H */ -- cgit From d1e1b351f50f9e5941f436f6c63949731979e00c Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 30 Aug 2017 21:45:09 +1000 Subject: powerpc/xmon: Add ISA v3.0 SPRs to SPR dump Add support for printing the PIDR/TIDR for ISA 300 and PSSCR and PTCR in ISA 3.0 hypervisor mode. SPRN_PSSCR_PR is the privileged mode access and is used when we are not in hypervisor mode. Signed-off-by: Balbir Singh [mpe: Split out of larger patch] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c36823d64ec9..2c4366ada976 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -356,6 +356,7 @@ #define SPRN_PMSR 0x355 /* Power Management Status Reg */ #define SPRN_PMMAR 0x356 /* Power Management Memory Activity Register */ #define SPRN_PSSCR 0x357 /* Processor Stop Status and Control Register (ISA 3.0) */ +#define SPRN_PSSCR_PR 0x337 /* PSSCR ISA 3.0, privileged mode access */ #define SPRN_PMCR 0x374 /* Power Management Control Register */ /* HFSCR and FSCR bit numbers are the same */ -- cgit From eb039161da2ff388cc30d076badd8e06fb015f33 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Thu, 9 Mar 2017 16:42:12 +1100 Subject: powerpc/asm: Convert .llong directives to .8byte .llong is an undocumented PPC specific directive. The generic equivalent is .quad, but even better (because it's self describing) is .8byte. Convert all .llong directives to .8byte. Signed-off-by: Tobin C. Harding Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/asm-compat.h | 2 +- arch/powerpc/include/asm/feature-fixups.h | 6 +++--- arch/powerpc/include/asm/reg.h | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index cee3aa087653..7f2a7702596c 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h @@ -25,7 +25,7 @@ #define PPC_LCMPI stringify_in_c(cmpdi) #define PPC_LCMPLI stringify_in_c(cmpldi) #define PPC_LCMP stringify_in_c(cmpd) -#define PPC_LONG stringify_in_c(.llong) +#define PPC_LONG stringify_in_c(.8byte) #define PPC_LONG_ALIGN stringify_in_c(.balign 8) #define PPC_TLNEI stringify_in_c(tdnei) #define PPC_LLARX(t, a, b, eh) PPC_LDARX(t, a, b, eh) diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 2de2319b99e2..8f88f771cc55 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -19,11 +19,11 @@ */ #if defined(CONFIG_PPC64) && !defined(__powerpc64__) /* 64 bits kernel, 32 bits code (ie. vdso32) */ -#define FTR_ENTRY_LONG .llong +#define FTR_ENTRY_LONG .8byte #define FTR_ENTRY_OFFSET .long 0xffffffff; .long #elif defined(CONFIG_PPC64) -#define FTR_ENTRY_LONG .llong -#define FTR_ENTRY_OFFSET .llong +#define FTR_ENTRY_LONG .8byte +#define FTR_ENTRY_OFFSET .8byte #else #define FTR_ENTRY_LONG .long #define FTR_ENTRY_OFFSET .long diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 2c4366ada976..f92eaf7a4c0d 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1344,12 +1344,12 @@ static inline void msr_check_and_clear(unsigned long bits) ".section __ftr_fixup,\"a\"\n" \ ".align 3\n" \ "98:\n" \ - " .llong %1\n" \ - " .llong %1\n" \ - " .llong 97b-98b\n" \ - " .llong 99b-98b\n" \ - " .llong 0\n" \ - " .llong 0\n" \ + " .8byte %1\n" \ + " .8byte %1\n" \ + " .8byte 97b-98b\n" \ + " .8byte 99b-98b\n" \ + " .8byte 0\n" \ + " .8byte 0\n" \ ".previous" \ : "=r" (rval) \ : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \ -- cgit From 2a636a56d2d39676fe85190dec102c7440e24977 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Thu, 29 Jun 2017 17:12:55 +1000 Subject: powerpc/smp: Add cpu_l2_cache_map We want to add an extra level to the CPU scheduler topology to account for cores which share a cache. To do this we need to build a cpumask for each CPU that indicates which CPUs share this cache to use as an input to the scheduler. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/smp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 8ea98504f900..fac963e10d39 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -97,6 +97,7 @@ static inline void set_hard_smp_processor_id(int cpu, int phys) #endif DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map); +DECLARE_PER_CPU(cpumask_var_t, cpu_l2_cache_map); DECLARE_PER_CPU(cpumask_var_t, cpu_core_map); static inline struct cpumask *cpu_sibling_mask(int cpu) @@ -109,6 +110,11 @@ static inline struct cpumask *cpu_core_mask(int cpu) return per_cpu(cpu_core_map, cpu); } +static inline struct cpumask *cpu_l2_cache_mask(int cpu) +{ + return per_cpu(cpu_l2_cache_map, cpu); +} + extern int cpu_to_core_id(int cpu); /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers. -- cgit From 93b2d3cf3733b4060d3623161551f51ea1ab5499 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 Aug 2017 14:12:24 +1000 Subject: powerpc: Correct instruction code for xxlor instruction The instruction code for xxlor that commit 0016a4cf5582 ("powerpc: Emulate most Book I instructions in emulate_step()", 2010-06-15) added is actually the code for xxlnor. It is used in get_vsr() and put_vsr() and the effect of the error is that if emulate_step is used to emulate a VSX load or store from any register other than vsr0, the bitwise complement of the correct value will be loaded or stored. This corrects the error. Fixes: 0016a4cf5582 ("powerpc: Emulate most Book I instructions in emulate_step()") Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index b5e1b51b8ba9..3dd8a6e43bb2 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -262,7 +262,7 @@ #define PPC_INST_TLBSRX_DOT 0x7c0006a5 #define PPC_INST_VPMSUMW 0x10000488 #define PPC_INST_VPMSUMD 0x100004c8 -#define PPC_INST_XXLOR 0xf0000510 +#define PPC_INST_XXLOR 0xf0000490 #define PPC_INST_XXSWAPD 0xf0000250 #define PPC_INST_XVCPSGNDP 0xf0000780 #define PPC_INST_TRECHKPT 0x7c0007dd -- cgit From 3cdfcbfd32b9d1c0d4a6fa80ee9c390927aab948 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 Aug 2017 14:12:25 +1000 Subject: powerpc: Change analyse_instr so it doesn't modify *regs The analyse_instr function currently doesn't just work out what an instruction does, it also executes those instructions whose effect is only to update CPU registers that are stored in struct pt_regs. This is undesirable because optprobes uses analyse_instr to work out if an instruction could be successfully emulated in future. This changes analyse_instr so it doesn't modify *regs; instead it stores information in the instruction_op structure to indicate what registers (GPRs, CR, XER, LR) would be set and what value they would be set to. A companion function called emulate_update_regs() can then use that information to update a pt_regs struct appropriately. As a minor cleanup, this replaces inline asm using the cntlzw and cntlzd instructions with calls to __builtin_clz() and __builtin_clzl(). Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/sstep.h | 52 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index d3a42cc45a82..442e6363eb5a 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -23,9 +23,6 @@ struct pt_regs; #define IS_RFID(instr) (((instr) & 0xfc0007fe) == 0x4c000024) #define IS_RFI(instr) (((instr) & 0xfc0007fe) == 0x4c000064) -/* Emulate instructions that cause a transfer of control. */ -extern int emulate_step(struct pt_regs *regs, unsigned int instr); - enum instruction_type { COMPUTE, /* arith/logical/CR op, etc. */ LOAD, @@ -55,11 +52,29 @@ enum instruction_type { #define INSTR_TYPE_MASK 0x1f +/* Compute flags, ORed in with type */ +#define SETREG 0x20 +#define SETCC 0x40 +#define SETXER 0x80 + +/* Branch flags, ORed in with type */ +#define SETLK 0x20 +#define BRTAKEN 0x40 +#define DECCTR 0x80 + /* Load/store flags, ORed in with type */ #define SIGNEXT 0x20 #define UPDATE 0x40 /* matches bit in opcode 31 instructions */ #define BYTEREV 0x80 +/* Barrier type field, ORed in with type */ +#define BARRIER_MASK 0xe0 +#define BARRIER_SYNC 0x00 +#define BARRIER_ISYNC 0x20 +#define BARRIER_EIEIO 0x40 +#define BARRIER_LWSYNC 0x60 +#define BARRIER_PTESYNC 0x80 + /* Cacheop values, ORed in with type */ #define CACHEOP_MASK 0x700 #define DCBST 0 @@ -83,7 +98,36 @@ struct instruction_op { int update_reg; /* For MFSPR */ int spr; + u32 ccval; + u32 xerval; }; -extern int analyse_instr(struct instruction_op *op, struct pt_regs *regs, +/* + * Decode an instruction, and return information about it in *op + * without changing *regs. + * + * Return value is 1 if the instruction can be emulated just by + * updating *regs with the information in *op, -1 if we need the + * GPRs but *regs doesn't contain the full register set, or 0 + * otherwise. + */ +extern int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, unsigned int instr); + +/* + * Emulate an instruction that can be executed just by updating + * fields in *regs. + */ +void emulate_update_regs(struct pt_regs *reg, struct instruction_op *op); + +/* + * Emulate instructions that cause a transfer of control, + * arithmetic/logical instructions, loads and stores, + * cache operations and barriers. + * + * Returns 1 if the instruction was emulated successfully, + * 0 if it could not be emulated, or -1 for an instruction that + * should not be emulated (rfid, mtmsrd clearing MSR_RI, etc.). + */ +extern int emulate_step(struct pt_regs *regs, unsigned int instr); + -- cgit From 350779a29f11f80ac66a8b38a7718ad30f003f18 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 Aug 2017 14:12:27 +1000 Subject: powerpc: Handle most loads and stores in instruction emulation code This extends the instruction emulation infrastructure in sstep.c to handle all the load and store instructions defined in the Power ISA v3.0, except for the atomic memory operations, ldmx (which was never implemented), lfdp/stfdp, and the vector element load/stores. The instructions added are: Integer loads and stores: lbarx, lharx, lqarx, stbcx., sthcx., stqcx., lq, stq. VSX loads and stores: lxsiwzx, lxsiwax, stxsiwx, lxvx, lxvl, lxvll, lxvdsx, lxvwsx, stxvx, stxvl, stxvll, lxsspx, lxsdx, stxsspx, stxsdx, lxvw4x, lxsibzx, lxvh8x, lxsihzx, lxvb16x, stxvw4x, stxsibx, stxvh8x, stxsihx, stxvb16x, lxsd, lxssp, lxv, stxsd, stxssp, stxv. These instructions are handled both in the analyse_instr phase and in the emulate_step phase. The code for lxvd2ux and stxvd2ux has been taken out, as those instructions were never implemented in any processor and have been taken out of the architecture, and their opcodes have been reused for other instructions in POWER9 (lxvb16x and stxvb16x). The emulation for the VSX loads and stores uses helper functions which don't access registers or memory directly, which can hopefully be reused by KVM later. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 8 ++++++++ arch/powerpc/include/asm/sstep.h | 21 +++++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 3dd8a6e43bb2..ce0930d68857 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -205,6 +205,8 @@ #define PPC_INST_ISEL_MASK 0xfc00003e #define PPC_INST_LDARX 0x7c0000a8 #define PPC_INST_STDCX 0x7c0001ad +#define PPC_INST_LQARX 0x7c000228 +#define PPC_INST_STQCX 0x7c00016d #define PPC_INST_LSWI 0x7c0004aa #define PPC_INST_LSWX 0x7c00042a #define PPC_INST_LWARX 0x7c000028 @@ -403,12 +405,18 @@ __PPC_RA(a) | __PPC_RB(b)) #define PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \ __PPC_RA(a) | __PPC_RB(b)) +#define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LQARX | \ + ___PPC_RT(t) | ___PPC_RA(a) | \ + ___PPC_RB(b) | __PPC_EH(eh)) #define PPC_LDARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LDARX | \ ___PPC_RT(t) | ___PPC_RA(a) | \ ___PPC_RB(b) | __PPC_EH(eh)) #define PPC_LWARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LWARX | \ ___PPC_RT(t) | ___PPC_RA(a) | \ ___PPC_RB(b) | __PPC_EH(eh)) +#define PPC_STQCX(t, a, b) stringify_in_c(.long PPC_INST_STQCX | \ + ___PPC_RT(t) | ___PPC_RA(a) | \ + ___PPC_RB(b)) #define PPC_MSGSND(b) stringify_in_c(.long PPC_INST_MSGSND | \ ___PPC_RB(b)) #define PPC_MSGSYNC stringify_in_c(.long PPC_INST_MSGSYNC) diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 442e6363eb5a..980197024c0b 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -83,6 +83,12 @@ enum instruction_type { #define DCBT 0x300 #define ICBI 0x400 +/* VSX flags values */ +#define VSX_FPCONV 1 /* do floating point SP/DP conversion */ +#define VSX_SPLAT 2 /* store loaded value into all elements */ +#define VSX_LDLEFT 4 /* load VSX register from left */ +#define VSX_CHECK_VEC 8 /* check MSR_VEC not MSR_VSX for reg >= 32 */ + /* Size field in type word */ #define SIZE(n) ((n) << 8) #define GETSIZE(w) ((w) >> 8) @@ -100,6 +106,17 @@ struct instruction_op { int spr; u32 ccval; u32 xerval; + u8 element_size; /* for VSX/VMX loads/stores */ + u8 vsx_flags; +}; + +union vsx_reg { + u8 b[16]; + u16 h[8]; + u32 w[4]; + unsigned long d[2]; + float fp[4]; + double dp[2]; }; /* @@ -131,3 +148,7 @@ void emulate_update_regs(struct pt_regs *reg, struct instruction_op *op); */ extern int emulate_step(struct pt_regs *regs, unsigned int instr); +extern void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, + const void *mem); +extern void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, + void *mem); -- cgit From d120cdbce68c3739f94f733bec376460fb9cbc14 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 Aug 2017 14:12:28 +1000 Subject: powerpc/64: Fix update forms of loads and stores to write 64-bit EA When a 64-bit processor is executing in 32-bit mode, the update forms of load and store instructions are required by the architecture to write the full 64-bit effective address into the RA register, though only the bottom 32 bits are used to address memory. Currently, the instruction emulation code writes the truncated address to the RA register. This fixes it by keeping the full 64-bit EA in the instruction_op structure, truncating the address in emulate_step() where it is used to address memory, rather than in the address computations in analyse_instr(). Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/sstep.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 980197024c0b..4fcc2c9a6ed5 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -25,7 +25,7 @@ struct pt_regs; enum instruction_type { COMPUTE, /* arith/logical/CR op, etc. */ - LOAD, + LOAD, /* load and store types need to be contiguous */ LOAD_MULTI, LOAD_FP, LOAD_VMX, @@ -52,6 +52,8 @@ enum instruction_type { #define INSTR_TYPE_MASK 0x1f +#define OP_IS_LOAD_STORE(type) (LOAD <= (type) && (type) <= STCX) + /* Compute flags, ORed in with type */ #define SETREG 0x20 #define SETCC 0x40 -- cgit From c22435a5f3d8f85ea162ae523a6ba60a58521ba5 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 Aug 2017 14:12:33 +1000 Subject: powerpc: Emulate FP/vector/VSX loads/stores correctly when regs not live At present, the analyse_instr/emulate_step code checks for the relevant MSR_FP/VEC/VSX bit being set when a FP/VMX/VSX load or store is decoded, but doesn't recheck the bit before reading or writing the relevant FP/VMX/VSX register in emulate_step(). Since we don't have preemption disabled, it is possible that we get preempted between checking the MSR bit and doing the register access. If that happened, then the registers would have been saved to the thread_struct for the current process. Accesses to the CPU registers would then potentially read stale values, or write values that would never be seen by the user process. Another way that the registers can become non-live is if a page fault occurs when accessing user memory, and the page fault code calls a copy routine that wants to use the VMX or VSX registers. To fix this, the code for all the FP/VMX/VSX loads gets restructured so that it forms an image in a local variable of the desired register contents, then disables preemption, checks the MSR bit and either sets the CPU register or writes the value to the thread struct. Similarly, the code for stores checks the MSR bit, copies either the CPU register or the thread struct to a local variable, then reenables preemption and then copies the register image to memory. If the instruction being emulated is in the kernel, then we must not use the register values in the thread_struct. In this case, if the relevant MSR enable bit is not set, then emulate_step refuses to emulate the instruction. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/sstep.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 4fcc2c9a6ed5..474a99255689 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -119,6 +119,7 @@ union vsx_reg { unsigned long d[2]; float fp[4]; double dp[2]; + __vector128 v; }; /* -- cgit From b2543f7b20bb2a551ed340447d7303f0ce4644f1 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 Aug 2017 14:12:36 +1000 Subject: powerpc: Emulate the dcbz instruction This adds code to analyse_instr() and emulate_step() to understand the dcbz (data cache block zero) instruction. The emulate_dcbz() function is made public so it can be used by the alignment handler in future. (The apparently unnecessary cropping of the address to 32 bits is there because it will be needed in that situation.) Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/sstep.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 474a99255689..793639a5aa5e 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -84,6 +84,7 @@ enum instruction_type { #define DCBTST 0x200 #define DCBT 0x300 #define ICBI 0x400 +#define DCBZ 0x500 /* VSX flags values */ #define VSX_FPCONV 1 /* do floating point SP/DP conversion */ @@ -155,3 +156,4 @@ extern void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, const void *mem); extern void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, void *mem); +extern int emulate_dcbz(unsigned long ea, struct pt_regs *regs); -- cgit From d955189ae42796621fb439e5e778ccaeebc2a1e7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 Aug 2017 14:12:38 +1000 Subject: powerpc: Handle opposite-endian processes in emulation code This adds code to the load and store emulation code to byte-swap the data appropriately when the process being emulated is set to the opposite endianness to that of the kernel. This also enables the emulation for the multiple-register loads and stores (lmw, stmw, lswi, stswi, lswx, stswx) to work for little-endian. In little-endian mode, the partial word at the end of a transfer for lsw*/stsw* (when the byte count is not a multiple of 4) is loaded/stored at the least-significant end of the register. Additionally, this fixes a bug in the previous code in that it could call read_mem/write_mem with a byte count that was not 1, 2, 4 or 8. Note that this only works correctly on processors with "true" little-endian mode, such as IBM POWER processors from POWER6 on, not the so-called "PowerPC" little-endian mode that uses address swizzling as implemented on the old 32-bit 603, 604, 740/750, 74xx CPUs. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/sstep.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 793639a5aa5e..958c2c55bcfe 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -153,7 +153,8 @@ void emulate_update_regs(struct pt_regs *reg, struct instruction_op *op); extern int emulate_step(struct pt_regs *regs, unsigned int instr); extern void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, - const void *mem); -extern void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, - void *mem); + const void *mem, bool cross_endian); +extern void emulate_vsx_store(struct instruction_op *op, + const union vsx_reg *reg, void *mem, + bool cross_endian); extern int emulate_dcbz(unsigned long ea, struct pt_regs *regs); -- cgit From a53d5182e24c22986ad0e99e52f8fe343ee7d7ac Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 Aug 2017 14:12:39 +1000 Subject: powerpc: Separate out load/store emulation into its own function This moves the parts of emulate_step() that deal with emulating load and store instructions into a new function called emulate_loadstore(). This is to make it possible to reuse this code in the alignment handler. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/sstep.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 958c2c55bcfe..309d1c5de143 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -152,6 +152,15 @@ void emulate_update_regs(struct pt_regs *reg, struct instruction_op *op); */ extern int emulate_step(struct pt_regs *regs, unsigned int instr); +/* + * Emulate a load or store instruction by reading/writing the + * memory of the current process. FP/VMX/VSX registers are assumed + * to hold live values if the appropriate enable bit in regs->msr is + * set; otherwise this will use the saved values in the thread struct + * for user-mode accesses. + */ +extern int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op); + extern void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, const void *mem, bool cross_endian); extern void emulate_vsx_store(struct instruction_op *op, -- cgit From d2b65ac6526a82965212b632d42687251e122a36 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 Aug 2017 16:34:09 +1000 Subject: powerpc: Emulate load/store floating point as integer word instructions This adds emulation for the lfiwax, lfiwzx and stfiwx instructions. This necessitated adding a new flag to indicate whether a floating point or an integer conversion was needed for LOAD_FP and STORE_FP, so this moves the size field in op->type up 4 bits. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/sstep.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 309d1c5de143..ab9d849644d0 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -68,6 +68,7 @@ enum instruction_type { #define SIGNEXT 0x20 #define UPDATE 0x40 /* matches bit in opcode 31 instructions */ #define BYTEREV 0x80 +#define FPCONV 0x100 /* Barrier type field, ORed in with type */ #define BARRIER_MASK 0xe0 @@ -93,8 +94,8 @@ enum instruction_type { #define VSX_CHECK_VEC 8 /* check MSR_VEC not MSR_VSX for reg >= 32 */ /* Size field in type word */ -#define SIZE(n) ((n) << 8) -#define GETSIZE(w) ((w) >> 8) +#define SIZE(n) ((n) << 12) +#define GETSIZE(w) ((w) >> 12) #define MKOP(t, f, s) ((t) | (f) | SIZE(s)) -- cgit From da74f659205ea08cb0fd0b3050637b0e0eb31520 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 23 Aug 2017 16:54:32 +0200 Subject: powerpc/32: add memset16() Commit 694fc88ce271f ("powerpc/string: Implement optimized memset variants") added memset16(), memset32() and memset64() for the 64 bits PPC. On 32 bits, memset64() is not relevant, and as shown below, the generic version of memset32() gives a good code, so only memset16() is candidate for an optimised version. 000009c0 : 9c0: 2c 05 00 00 cmpwi r5,0 9c4: 39 23 ff fc addi r9,r3,-4 9c8: 4d 82 00 20 beqlr 9cc: 7c a9 03 a6 mtctr r5 9d0: 94 89 00 04 stwu r4,4(r9) 9d4: 42 00 ff fc bdnz 9d0 9d8: 4e 80 00 20 blr The last part of memset() handling the not 4-bytes multiples operates on bytes, making it unsuitable for handling word without modification. As it would increase memset() complexity, it is better to implement memset16() from scratch. In addition it has the advantage of allowing a more optimised memset16() than what we would have by using the memset() function. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/string.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index b0e82466d4e8..cc9addefb51c 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -10,6 +10,7 @@ #define __HAVE_ARCH_MEMMOVE #define __HAVE_ARCH_MEMCMP #define __HAVE_ARCH_MEMCHR +#define __HAVE_ARCH_MEMSET16 extern char * strcpy(char *,const char *); extern char * strncpy(char *,const char *, __kernel_size_t); @@ -24,7 +25,6 @@ extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); #ifdef CONFIG_PPC64 -#define __HAVE_ARCH_MEMSET16 #define __HAVE_ARCH_MEMSET32 #define __HAVE_ARCH_MEMSET64 @@ -46,6 +46,8 @@ static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n) { return __memset64(p, v, n * 8); } +#else +extern void *memset16(uint16_t *, uint16_t, __kernel_size_t); #endif #endif /* __KERNEL__ */ -- cgit From 146e9f1b65478643f2729a97ccb8be60bb4492e5 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Thu, 31 Aug 2017 00:18:18 -0700 Subject: crypto/nx: Add P9 NX specific error codes for 842 engine This patch adds changes for checking P9 specific 842 engine error codes. These errros are reported in coprocessor status block (CSB) for failures. Signed-off-by: Haren Myneni Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/icswx.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/icswx.h b/arch/powerpc/include/asm/icswx.h index 27e588f6c72e..6a2c87577541 100644 --- a/arch/powerpc/include/asm/icswx.h +++ b/arch/powerpc/include/asm/icswx.h @@ -69,7 +69,10 @@ struct coprocessor_completion_block { #define CSB_CC_WR_PROTECTION (16) #define CSB_CC_UNKNOWN_CODE (17) #define CSB_CC_ABORT (18) +#define CSB_CC_EXCEED_BYTE_COUNT (19) /* P9 or later */ #define CSB_CC_TRANSPORT (20) +#define CSB_CC_INVALID_CRB (21) /* P9 or later */ +#define CSB_CC_INVALID_DDE (30) /* P9 or later */ #define CSB_CC_SEGMENTED_DDL (31) #define CSB_CC_PROGRESS_POINT (32) #define CSB_CC_DDE_OVERFLOW (33) -- cgit From eac1e731b59ee3b5f5e641a7765c7ed41ed26226 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 30 Aug 2017 21:46:11 +0200 Subject: powerpc/xive: guest exploitation of the XIVE interrupt controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the framework for using XIVE in a PowerVM guest. The support is very similar to the native one in a much simpler form. Each source is associated with an Event State Buffer (ESB). This is a two bit state machine which is used to trigger events. The bits are named "P" (pending) and "Q" (queued) and can be controlled by MMIO. The Guest OS registers event (or notifications) queues on which the HW will post event data for a target to notify. Instead of OPAL calls, a set of Hypervisors call are used to configure the interrupt sources and the event/notification queues of the guest: - H_INT_GET_SOURCE_INFO used to obtain the address of the MMIO page of the Event State Buffer (PQ bits) entry associated with the source. - H_INT_SET_SOURCE_CONFIG assigns a source to a "target". - H_INT_GET_SOURCE_CONFIG determines to which "target" and "priority" is assigned to a source - H_INT_GET_QUEUE_INFO returns the address of the notification management page associated with the specified "target" and "priority". - H_INT_SET_QUEUE_CONFIG sets or resets the event queue for a given "target" and "priority". It is also used to set the notification config associated with the queue, only unconditional notification for the moment. Reset is performed with a queue size of 0 and queueing is disabled in that case. - H_INT_GET_QUEUE_CONFIG returns the queue settings for a given "target" and "priority". - H_INT_RESET resets all of the partition's interrupt exploitation structures to their initial state, losing all configuration set via the hcalls H_INT_SET_SOURCE_CONFIG and H_INT_SET_QUEUE_CONFIG. - H_INT_SYNC issue a synchronisation on a source to make sure sure all notifications have reached their queue. As for XICS, the XIVE interface for the guest is described in the device tree under the "interrupt-controller" node. A couple of new properties are specific to XIVE : - "reg" contains the base address and size of the thread interrupt managnement areas (TIMA), also called rings, for the User level and for the Guest OS level. Only the Guest OS level is taken into account today. - "ibm,xive-eq-sizes" the size of the event queues. One cell per size supported, contains log2 of size, in ascending order. - "ibm,xive-lisn-ranges" the interrupt numbers ranges assigned to the guest. These are allocated using a simple bitmap. and also : - "/ibm,plat-res-int-priorities" contains a list of priorities that the hypervisor has reserved for its own use. Tested with a QEMU XIVE model for pseries and with the Power hypervisor. Signed-off-by: Cédric Le Goater Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hvcall.h | 13 ++++++++++++- arch/powerpc/include/asm/xive.h | 3 +++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 57d38b504ff7..3d34dc0869f6 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -280,7 +280,18 @@ #define H_RESIZE_HPT_COMMIT 0x370 #define H_REGISTER_PROC_TBL 0x37C #define H_SIGNAL_SYS_RESET 0x380 -#define MAX_HCALL_OPCODE H_SIGNAL_SYS_RESET +#define H_INT_GET_SOURCE_INFO 0x3A8 +#define H_INT_SET_SOURCE_CONFIG 0x3AC +#define H_INT_GET_SOURCE_CONFIG 0x3B0 +#define H_INT_GET_QUEUE_INFO 0x3B4 +#define H_INT_SET_QUEUE_CONFIG 0x3B8 +#define H_INT_GET_QUEUE_CONFIG 0x3BC +#define H_INT_SET_OS_REPORTING_LINE 0x3C0 +#define H_INT_GET_OS_REPORTING_LINE 0x3C4 +#define H_INT_ESB 0x3C8 +#define H_INT_SYNC 0x3CC +#define H_INT_RESET 0x3D0 +#define MAX_HCALL_OPCODE H_INT_RESET /* H_VIOCTL functions */ #define H_GET_VIOA_DUMP_SIZE 0x01 diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index c23ff4389ca2..473f133a8555 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -110,11 +110,13 @@ extern bool __xive_enabled; static inline bool xive_enabled(void) { return __xive_enabled; } +extern bool xive_spapr_init(void); extern bool xive_native_init(void); extern void xive_smp_probe(void); extern int xive_smp_prepare_cpu(unsigned int cpu); extern void xive_smp_setup_cpu(void); extern void xive_smp_disable_cpu(void); +extern void xive_teardown_cpu(void); extern void xive_kexec_teardown_cpu(int secondary); extern void xive_shutdown(void); extern void xive_flush_interrupt(void); @@ -147,6 +149,7 @@ extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id) static inline bool xive_enabled(void) { return false; } +static inline bool xive_spapr_init(void) { return false; } static inline bool xive_native_init(void) { return false; } static inline void xive_smp_probe(void) { } extern inline int xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; } -- cgit From c58a14a9ccf0a79bbdafc106a95c080340c00f49 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 30 Aug 2017 21:46:14 +0200 Subject: powerpc/xive: add the HW IRQ number under xive_irq_data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It will be required later by the H_INT_ESB hcall. Signed-off-by: Cédric Le Goater Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/xive.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 473f133a8555..64ec9bbcf03e 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -45,6 +45,7 @@ struct xive_irq_data { void __iomem *trig_mmio; u32 esb_shift; int src_chip; + u32 hw_irq; /* Setup/used by frontend */ int target; -- cgit From bed81ee181dd6b21171cffbb80472cc5b774c24d Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 30 Aug 2017 21:46:15 +0200 Subject: powerpc/xive: introduce H_INT_ESB hcall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The H_INT_ESB hcall() is used to issue a load or store to the ESB page instead of using the MMIO pages. This can be used as a workaround on some HW issues. The OS knows that this hcall should be used on an interrupt source when the ESB hcall flag is set to 1 in the hcall H_INT_GET_SOURCE_INFO. To maintain the frontier between the xive frontend and backend, we introduce a new xive operation 'esb_rw' to be used in the routines doing memory accesses on the ESBs. Signed-off-by: Cédric Le Goater Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/xive.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 64ec9bbcf03e..371fbebf1ec9 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -56,6 +56,7 @@ struct xive_irq_data { #define XIVE_IRQ_FLAG_SHIFT_BUG 0x04 #define XIVE_IRQ_FLAG_MASK_FW 0x08 #define XIVE_IRQ_FLAG_EOI_FW 0x10 +#define XIVE_IRQ_FLAG_H_INT_ESB 0x20 #define XIVE_INVALID_CHIP_ID -1 -- cgit From ac5e5a5402d64acd48af3287718e24ff8ba9fa21 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 30 Aug 2017 21:46:16 +0200 Subject: powerpc/xive: add XIVE Exploitation Mode to CAS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On POWER9, the Client Architecture Support (CAS) negotiation process determines whether the guest operates in XIVE Legacy compatibility or in XIVE exploitation mode. Now that we have initial guest support for the XIVE interrupt controller, let's inform the hypervisor what we can do. The platform advertises the XIVE Exploitation Mode support using the property "ibm,arch-vec-5-platform-support-vec-5", byte 23 bits 0-1 : - 0b00 XIVE legacy mode Only - 0b01 XIVE exploitation mode Only - 0b10 XIVE legacy or exploitation mode The OS asks for XIVE Exploitation Mode support using the property "ibm,architecture-vec-5", byte 23 bits 0-1: - 0b00 XIVE legacy mode Only - 0b01 XIVE exploitation mode Only Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/prom.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 35c00d7a0cf8..825bd5998701 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -159,7 +159,10 @@ struct of_drconf_cell { #define OV5_PFO_HW_842 0x1140 /* PFO Compression Accelerator */ #define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */ #define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */ -#define OV5_XIVE_EXPLOIT 0x1701 /* XIVE exploitation supported */ +#define OV5_XIVE_SUPPORT 0x17C0 /* XIVE Exploitation Support Mask */ +#define OV5_XIVE_LEGACY 0x1700 /* XIVE legacy mode Only */ +#define OV5_XIVE_EXPLOIT 0x1740 /* XIVE exploitation mode Only */ +#define OV5_XIVE_EITHER 0x1780 /* XIVE legacy or exploitation mode */ /* MMU Base Architecture */ #define OV5_MMU_SUPPORT 0x18C0 /* MMU Mode Support Mask */ #define OV5_MMU_HASH 0x1800 /* Hash MMU Only */ -- cgit