diff options
author | Arnaldo Carvalho de Melo <acme@redhat.com> | 2021-06-30 15:27:32 -0300 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2021-06-30 15:27:32 -0300 |
commit | 857286e4c5ae5d2e860fd15d4628e707b434d7e5 (patch) | |
tree | 520ea5916f50fb2a4289d8d70438d559c6808b01 /arch/powerpc/mm | |
parent | 51f382428c17f172f430f9be8de4246b8f15f97c (diff) | |
parent | 007b350a58754a93ca9fe50c498cc27780171153 (diff) |
Merge remote-tracking branch 'torvalds/master' into perf/core
To pick up fixes.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/radix_pgtable.c | 33 | ||||
-rw-r--r-- | arch/powerpc/mm/book3s64/radix_tlb.c | 202 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context.c | 4 |
5 files changed, 181 insertions, 64 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index c3df3a8501d4..2ffcf540f08b 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -13,7 +13,7 @@ obj-y := fault.o mem.o pgtable.o mmap.o maccess.o \ obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/ obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/ obj-$(CONFIG_PPC_BOOK3S_64) += book3s64/ -obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o +obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_PPC_MM_SLICES) += slice.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 5fef8db3b463..2176a5f70746 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/sched/mm.h> #include <linux/memblock.h> +#include <linux/of.h> #include <linux/of_fdt.h> #include <linux/mm.h> #include <linux/hugetlb.h> @@ -357,30 +358,19 @@ static void __init radix_init_pgtable(void) } /* Find out how many PID bits are supported */ - if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) { - if (!mmu_pid_bits) - mmu_pid_bits = 20; - mmu_base_pid = 1; - } else if (cpu_has_feature(CPU_FTR_HVMODE)) { - if (!mmu_pid_bits) - mmu_pid_bits = 20; -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + if (!cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) { /* - * When KVM is possible, we only use the top half of the - * PID space to avoid collisions between host and guest PIDs - * which can cause problems due to prefetch when exiting the - * guest with AIL=3 + * Older versions of KVM on these machines perfer if the + * guest only uses the low 19 PID bits. */ - mmu_base_pid = 1 << (mmu_pid_bits - 1); -#else - mmu_base_pid = 1; -#endif - } else { - /* The guest uses the bottom half of the PID space */ if (!mmu_pid_bits) mmu_pid_bits = 19; - mmu_base_pid = 1; + } else { + if (!mmu_pid_bits) + mmu_pid_bits = 20; } + mmu_base_pid = 1; /* * Allocate Partition table and process table for the @@ -486,6 +476,7 @@ static int __init radix_dt_scan_page_sizes(unsigned long node, def = &mmu_psize_defs[idx]; def->shift = shift; def->ap = ap; + def->h_rpt_pgsize = psize_to_rpti_pgsize(idx); } /* needed ? */ @@ -560,9 +551,13 @@ void __init radix__early_init_devtree(void) */ mmu_psize_defs[MMU_PAGE_4K].shift = 12; mmu_psize_defs[MMU_PAGE_4K].ap = 0x0; + mmu_psize_defs[MMU_PAGE_4K].h_rpt_pgsize = + psize_to_rpti_pgsize(MMU_PAGE_4K); mmu_psize_defs[MMU_PAGE_64K].shift = 16; mmu_psize_defs[MMU_PAGE_64K].ap = 0x5; + mmu_psize_defs[MMU_PAGE_64K].h_rpt_pgsize = + psize_to_rpti_pgsize(MMU_PAGE_64K); } /* diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 409e61210789..318ec4f33661 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -20,10 +20,6 @@ #include "internal.h" -#define RIC_FLUSH_TLB 0 -#define RIC_FLUSH_PWC 1 -#define RIC_FLUSH_ALL 2 - /* * tlbiel instruction for radix, set invalidation * i.e., r=1 and is=01 or is=10 or is=11 @@ -130,6 +126,21 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) trace_tlbie(0, 0, rb, rs, ric, prs, r); } +static __always_inline void __tlbie_pid_lpid(unsigned long pid, + unsigned long lpid, + unsigned long ric) +{ + unsigned long rb, rs, prs, r; + + rb = PPC_BIT(53); /* IS = 1 */ + rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); + prs = 1; /* process scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); +} static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) { unsigned long rb,rs,prs,r; @@ -190,6 +201,23 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, trace_tlbie(0, 0, rb, rs, ric, prs, r); } +static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid, + unsigned long lpid, + unsigned long ap, unsigned long ric) +{ + unsigned long rb, rs, prs, r; + + rb = va & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); + prs = 1; /* process scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); +} + static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, unsigned long ap, unsigned long ric) { @@ -235,6 +263,22 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, } } +static inline void fixup_tlbie_va_range_lpid(unsigned long va, + unsigned long pid, + unsigned long lpid, + unsigned long ap) +{ + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + asm volatile("ptesync" : : : "memory"); + __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); + } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync" : : : "memory"); + __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB); + } +} + static inline void fixup_tlbie_pid(unsigned long pid) { /* @@ -254,6 +298,25 @@ static inline void fixup_tlbie_pid(unsigned long pid) } } +static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid) +{ + /* + * We can use any address for the invalidation, pick one which is + * probably unused as an optimisation. + */ + unsigned long va = ((1UL << 52) - 1); + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + asm volatile("ptesync" : : : "memory"); + __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); + } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync" : : : "memory"); + __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K), + RIC_FLUSH_TLB); + } +} static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, unsigned long ap) @@ -344,6 +407,31 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) asm volatile("eieio; tlbsync; ptesync": : :"memory"); } +static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid, + unsigned long ric) +{ + asm volatile("ptesync" : : : "memory"); + + /* + * Workaround the fact that the "ric" argument to __tlbie_pid + * must be a compile-time contraint to match the "i" constraint + * in the asm statement. + */ + switch (ric) { + case RIC_FLUSH_TLB: + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); + fixup_tlbie_pid_lpid(pid, lpid); + break; + case RIC_FLUSH_PWC: + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); + break; + case RIC_FLUSH_ALL: + default: + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); + fixup_tlbie_pid_lpid(pid, lpid); + } + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); +} struct tlbiel_pid { unsigned long pid; unsigned long ric; @@ -469,6 +557,20 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end, fixup_tlbie_va_range(addr - page_size, pid, ap); } +static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end, + unsigned long pid, unsigned long lpid, + unsigned long page_size, + unsigned long psize) +{ + unsigned long addr; + unsigned long ap = mmu_get_ap(psize); + + for (addr = start; addr < end; addr += page_size) + __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB); + + fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap); +} + static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, unsigned long psize, unsigned long ric) { @@ -549,6 +651,18 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end, asm volatile("eieio; tlbsync; ptesync": : :"memory"); } +static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end, + unsigned long pid, unsigned long lpid, + unsigned long page_size, + unsigned long psize, bool also_pwc) +{ + asm volatile("ptesync" : : : "memory"); + if (also_pwc) + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); + __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); +} + static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long pid, unsigned long page_size, @@ -1338,47 +1452,57 @@ void radix__flush_tlb_all(void) } #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE -extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) +/* + * Performs process-scoped invalidations for a given LPID + * as part of H_RPT_INVALIDATE hcall. + */ +void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid, + unsigned long type, unsigned long pg_sizes, + unsigned long start, unsigned long end) { - unsigned long pid = mm->context.id; + unsigned long psize, nr_pages; + struct mmu_psize_def *def; + bool flush_pid; - if (unlikely(pid == MMU_NO_CONTEXT)) + /* + * A H_RPTI_TYPE_ALL request implies RIC=3, hence + * do a single IS=1 based flush. + */ + if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) { + _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); return; + } - if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) - return; + if (type & H_RPTI_TYPE_PWC) + _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); - /* - * If this context hasn't run on that CPU before and KVM is - * around, there's a slim chance that the guest on another - * CPU just brought in obsolete translation into the TLB of - * this CPU due to a bad prefetch using the guest PID on - * the way into the hypervisor. - * - * We work around this here. If KVM is possible, we check if - * any sibling thread is in KVM. If it is, the window may exist - * and thus we flush that PID from the core. - * - * A potential future improvement would be to mark which PIDs - * have never been used on the system and avoid it if the PID - * is new and the process has no other cpumask bit set. - */ - if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) { - int cpu = smp_processor_id(); - int sib = cpu_first_thread_sibling(cpu); - bool flush = false; - - for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { - if (sib == cpu) - continue; - if (!cpu_possible(sib)) - continue; - if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu) - flush = true; + /* Full PID flush */ + if (start == 0 && end == -1) + return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); + + /* Do range invalidation for all the valid page sizes */ + for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { + def = &mmu_psize_defs[psize]; + if (!(pg_sizes & def->h_rpt_pgsize)) + continue; + + nr_pages = (end - start) >> def->shift; + flush_pid = nr_pages > tlb_single_page_flush_ceiling; + + /* + * If the number of pages spanning the range is above + * the ceiling, convert the request into a full PID flush. + * And since PID flush takes out all the page sizes, there + * is no need to consider remaining page sizes. + */ + if (flush_pid) { + _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); + return; } - if (flush) - _tlbiel_pid(pid, RIC_FLUSH_ALL); + _tlbie_va_range_lpid(start, end, pid, lpid, + (1UL << def->shift), psize, false); } } -EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround); +EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt); + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index a6b36a40897a..c5e520c6f13b 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -127,7 +127,7 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, } #endif -#ifndef CONFIG_NEED_MULTIPLE_NODES +#ifndef CONFIG_NUMA void __init mem_topology_setup(void) { max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; @@ -162,7 +162,7 @@ static int __init mark_nonram_nosave(void) return 0; } -#else /* CONFIG_NEED_MULTIPLE_NODES */ +#else /* CONFIG_NUMA */ static int __init mark_nonram_nosave(void) { return 0; diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index a857af401738..74246536b832 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -83,9 +83,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, if (cpu_has_feature(CPU_FTR_ALTIVEC)) asm volatile ("dssall"); - if (new_on_cpu) - radix_kvm_prefetch_workaround(next); - else + if (!new_on_cpu) membarrier_arch_switch_mm(prev, next, tsk); /* |