summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h15
-rw-r--r--arch/powerpc/include/asm/mmu_context.h18
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S59
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c5
-rw-r--r--arch/powerpc/mm/pgtable-radix.c34
-rw-r--r--arch/powerpc/mm/tlb-radix.c45
6 files changed, 154 insertions, 22 deletions
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 77529a3e3811..5b4023c616f7 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -59,13 +59,14 @@ extern struct patb_entry *partition_tb;
#define PRTS_MASK 0x1f /* process table size field */
#define PRTB_MASK 0x0ffffffffffff000UL
-/*
- * Limit process table to PAGE_SIZE table. This
- * also limit the max pid we can support.
- * MAX_USER_CONTEXT * 16 bytes of space.
- */
-#define PRTB_SIZE_SHIFT (CONTEXT_BITS + 4)
-#define PRTB_ENTRIES (1ul << CONTEXT_BITS)
+/* Number of supported PID bits */
+extern unsigned int mmu_pid_bits;
+
+/* Base PID to allocate from */
+extern unsigned int mmu_base_pid;
+
+#define PRTB_SIZE_SHIFT (mmu_pid_bits + 4)
+#define PRTB_ENTRIES (1ul << mmu_pid_bits)
/*
* Power9 currently only support 64K partition table size.
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index da7e9432fa8f..0c76675394c5 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -45,7 +45,7 @@ extern void set_context(unsigned long id, pgd_t *pgd);
#ifdef CONFIG_PPC_BOOK3S_64
extern void radix__switch_mmu_context(struct mm_struct *prev,
- struct mm_struct *next);
+ struct mm_struct *next);
static inline void switch_mmu_context(struct mm_struct *prev,
struct mm_struct *next,
struct task_struct *tsk)
@@ -67,6 +67,12 @@ extern void __destroy_context(unsigned long context_id);
extern void mmu_context_init(void);
#endif
+#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
+extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
+#else
+static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
+#endif
+
extern void switch_cop(struct mm_struct *next);
extern int use_cop(unsigned long acop, struct mm_struct *mm);
extern void drop_cop(unsigned long acop, struct mm_struct *mm);
@@ -79,9 +85,13 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev,
struct mm_struct *next,
struct task_struct *tsk)
{
+ bool new_on_cpu = false;
+
/* Mark this context has been used on the new CPU */
- if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next)))
+ if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {
cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+ new_on_cpu = true;
+ }
/* 32-bit keeps track of the current PGDIR in the thread struct */
#ifdef CONFIG_PPC32
@@ -109,6 +119,10 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev,
if (cpu_has_feature(CPU_FTR_ALTIVEC))
asm volatile ("dssall");
#endif /* CONFIG_ALTIVEC */
+
+ if (new_on_cpu)
+ radix_kvm_prefetch_workaround(next);
+
/*
* The actual HW switching method differs between the various
* sub architectures. Out of line for now
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index cb44065e2946..c52184a8efdf 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1443,12 +1443,14 @@ mc_cont:
ori r6,r6,1
mtspr SPRN_CTRLT,r6
4:
- /* Read the guest SLB and save it away */
+ /* Check if we are running hash or radix and store it in cr2 */
ld r5, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r5)
- cmpwi r0, 0
+ cmpwi cr2,r0,0
+
+ /* Read the guest SLB and save it away */
li r5, 0
- bne 3f /* for radix, save 0 entries */
+ bne cr2, 3f /* for radix, save 0 entries */
lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
mtctr r0
li r6,0
@@ -1712,11 +1714,6 @@ BEGIN_FTR_SECTION_NESTED(96)
END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
22:
- /* Clear out SLB */
- li r5,0
- slbmte r5,r5
- slbia
- ptesync
/* Restore host values of some registers */
BEGIN_FTR_SECTION
@@ -1737,10 +1734,56 @@ BEGIN_FTR_SECTION
mtspr SPRN_PID, r7
mtspr SPRN_IAMR, r8
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
+#ifdef CONFIG_PPC_RADIX_MMU
+ /*
+ * Are we running hash or radix ?
+ */
+ beq cr2,3f
+
+ /* Radix: Handle the case where the guest used an illegal PID */
+ LOAD_REG_ADDR(r4, mmu_base_pid)
+ lwz r3, VCPU_GUEST_PID(r9)
+ lwz r5, 0(r4)
+ cmpw cr0,r3,r5
+ blt 2f
+
+ /*
+ * Illegal PID, the HW might have prefetched and cached in the TLB
+ * some translations for the LPID 0 / guest PID combination which
+ * Linux doesn't know about, so we need to flush that PID out of
+ * the TLB. First we need to set LPIDR to 0 so tlbiel applies to
+ * the right context.
+ */
+ li r0,0
+ mtspr SPRN_LPID,r0
+ isync
+
+ /* Then do a congruence class local flush */
+ ld r6,VCPU_KVM(r9)
+ lwz r0,KVM_TLB_SETS(r6)
+ mtctr r0
+ li r7,0x400 /* IS field = 0b01 */
+ ptesync
+ sldi r0,r3,32 /* RS has PID */
+1: PPC_TLBIEL(7,0,2,1,1) /* RIC=2, PRS=1, R=1 */
+ addi r7,r7,0x1000
+ bdnz 1b
+ ptesync
+
+2: /* Flush the ERAT on radix P9 DD1 guest exit */
BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
+ b 4f
+#endif /* CONFIG_PPC_RADIX_MMU */
+ /* Hash: clear out SLB */
+3: li r5,0
+ slbmte r5,r5
+ slbia
+ ptesync
+4:
/*
* POWER7/POWER8 guest -> host partition switch code.
* We don't have to lock against tlbies but we do
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index abed1fe6992f..a75f63833284 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -126,9 +126,10 @@ static int hash__init_new_context(struct mm_struct *mm)
static int radix__init_new_context(struct mm_struct *mm)
{
unsigned long rts_field;
- int index;
+ int index, max_id;
- index = alloc_context_id(1, PRTB_ENTRIES - 1);
+ max_id = (1 << mmu_pid_bits) - 1;
+ index = alloc_context_id(mmu_base_pid, max_id);
if (index < 0)
return index;
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 5cc50d47ce3f..671a45d86c18 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -25,6 +25,9 @@
#include <trace/events/thp.h>
+unsigned int mmu_pid_bits;
+unsigned int mmu_base_pid;
+
static int native_register_process_table(unsigned long base, unsigned long pg_sz,
unsigned long table_size)
{
@@ -261,11 +264,34 @@ static void __init radix_init_pgtable(void)
for_each_memblock(memory, reg)
WARN_ON(create_physical_mapping(reg->base,
reg->base + reg->size));
+
+ /* Find out how many PID bits are supported */
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ if (!mmu_pid_bits)
+ mmu_pid_bits = 20;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /*
+ * When KVM is possible, we only use the top half of the
+ * PID space to avoid collisions between host and guest PIDs
+ * which can cause problems due to prefetch when exiting the
+ * guest with AIL=3
+ */
+ mmu_base_pid = 1 << (mmu_pid_bits - 1);
+#else
+ mmu_base_pid = 1;
+#endif
+ } else {
+ /* The guest uses the bottom half of the PID space */
+ if (!mmu_pid_bits)
+ mmu_pid_bits = 19;
+ mmu_base_pid = 1;
+ }
+
/*
* Allocate Partition table and process table for the
* host.
*/
- BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 36), "Process table size too large.");
+ BUG_ON(PRTB_SIZE_SHIFT > 36);
process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
/*
* Fill in the process table.
@@ -339,6 +365,12 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
if (type == NULL || strcmp(type, "cpu") != 0)
return 0;
+ /* Find MMU PID size */
+ prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size);
+ if (prop && size == 4)
+ mmu_pid_bits = be32_to_cpup(prop);
+
+ /* Grab page size encodings */
prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
if (!prop)
return 0;
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 744e0164ecf5..16ae1bbe13f0 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -12,12 +12,12 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/memblock.h>
-#include <asm/ppc-opcode.h>
+#include <asm/ppc-opcode.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/trace.h>
-
+#include <asm/cputhreads.h>
#define RIC_FLUSH_TLB 0
#define RIC_FLUSH_PWC 1
@@ -454,3 +454,44 @@ void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
else
radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize);
}
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
+{
+ unsigned int pid = mm->context.id;
+
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ return;
+
+ /*
+ * If this context hasn't run on that CPU before and KVM is
+ * around, there's a slim chance that the guest on another
+ * CPU just brought in obsolete translation into the TLB of
+ * this CPU due to a bad prefetch using the guest PID on
+ * the way into the hypervisor.
+ *
+ * We work around this here. If KVM is possible, we check if
+ * any sibling thread is in KVM. If it is, the window may exist
+ * and thus we flush that PID from the core.
+ *
+ * A potential future improvement would be to mark which PIDs
+ * have never been used on the system and avoid it if the PID
+ * is new and the process has no other cpumask bit set.
+ */
+ if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
+ int cpu = smp_processor_id();
+ int sib = cpu_first_thread_sibling(cpu);
+ bool flush = false;
+
+ for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
+ if (sib == cpu)
+ continue;
+ if (paca[sib].kvm_hstate.kvm_vcpu)
+ flush = true;
+ }
+ if (flush)
+ _tlbiel_pid(pid, RIC_FLUSH_ALL);
+ }
+}
+EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */