summaryrefslogtreecommitdiff
path: root/arch/powerpc/mm/pgtable-radix.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm/pgtable-radix.c')
-rw-r--r--arch/powerpc/mm/pgtable-radix.c175
1 files changed, 161 insertions, 14 deletions
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index c28165d8970b..39c252b54d16 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -8,9 +8,15 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+
+#define pr_fmt(fmt) "radix-mmu: " fmt
+
+#include <linux/kernel.h>
#include <linux/sched/mm.h>
#include <linux/memblock.h>
#include <linux/of_fdt.h>
+#include <linux/mm.h>
+#include <linux/string_helpers.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -19,15 +25,24 @@
#include <asm/mmu.h>
#include <asm/firmware.h>
#include <asm/powernv.h>
+#include <asm/sections.h>
+#include <asm/trace.h>
#include <trace/events/thp.h>
+unsigned int mmu_pid_bits;
+unsigned int mmu_base_pid;
+
static int native_register_process_table(unsigned long base, unsigned long pg_sz,
unsigned long table_size)
{
- unsigned long patb1 = base | table_size | PATB_GR;
+ unsigned long patb0, patb1;
+
+ patb0 = be64_to_cpu(partition_tb[0].patb0);
+ patb1 = base | table_size | PATB_GR;
+
+ mmu_partition_table_set_entry(0, patb0, patb1);
- partition_tb->patb1 = cpu_to_be64(patb1);
return 0;
}
@@ -108,20 +123,92 @@ set_the_pte:
return 0;
}
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void radix__change_memory_range(unsigned long start, unsigned long end,
+ unsigned long clear)
+{
+ unsigned long idx;
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ start = ALIGN_DOWN(start, PAGE_SIZE);
+ end = PAGE_ALIGN(end); // aligns up
+
+ pr_debug("Changing flags on range %lx-%lx removing 0x%lx\n",
+ start, end, clear);
+
+ for (idx = start; idx < end; idx += PAGE_SIZE) {
+ pgdp = pgd_offset_k(idx);
+ pudp = pud_alloc(&init_mm, pgdp, idx);
+ if (!pudp)
+ continue;
+ if (pud_huge(*pudp)) {
+ ptep = (pte_t *)pudp;
+ goto update_the_pte;
+ }
+ pmdp = pmd_alloc(&init_mm, pudp, idx);
+ if (!pmdp)
+ continue;
+ if (pmd_huge(*pmdp)) {
+ ptep = pmdp_ptep(pmdp);
+ goto update_the_pte;
+ }
+ ptep = pte_alloc_kernel(pmdp, idx);
+ if (!ptep)
+ continue;
+update_the_pte:
+ radix__pte_update(&init_mm, idx, ptep, clear, 0, 0);
+ }
+
+ radix__flush_tlb_kernel_range(start, end);
+}
+
+void radix__mark_rodata_ro(void)
+{
+ unsigned long start, end;
+
+ start = (unsigned long)_stext;
+ end = (unsigned long)__init_begin;
+
+ radix__change_memory_range(start, end, _PAGE_WRITE);
+}
+
+void radix__mark_initmem_nx(void)
+{
+ unsigned long start = (unsigned long)__init_begin;
+ unsigned long end = (unsigned long)__init_end;
+
+ radix__change_memory_range(start, end, _PAGE_EXEC);
+}
+#endif /* CONFIG_STRICT_KERNEL_RWX */
+
static inline void __meminit print_mapping(unsigned long start,
unsigned long end,
unsigned long size)
{
+ char buf[10];
+
if (end <= start)
return;
- pr_info("Mapped range 0x%lx - 0x%lx with 0x%lx\n", start, end, size);
+ string_get_size(size, 1, STRING_UNITS_2, buf, sizeof(buf));
+
+ pr_info("Mapped 0x%016lx-0x%016lx with %s pages\n", start, end, buf);
}
static int __meminit create_physical_mapping(unsigned long start,
unsigned long end)
{
- unsigned long addr, mapping_size = 0;
+ unsigned long vaddr, addr, mapping_size = 0;
+ pgprot_t prot;
+ unsigned long max_mapping_size;
+#ifdef CONFIG_STRICT_KERNEL_RWX
+ int split_text_mapping = 1;
+#else
+ int split_text_mapping = 0;
+#endif
start = _ALIGN_UP(start, PAGE_SIZE);
for (addr = start; addr < end; addr += mapping_size) {
@@ -130,9 +217,12 @@ static int __meminit create_physical_mapping(unsigned long start,
gap = end - addr;
previous_size = mapping_size;
+ max_mapping_size = PUD_SIZE;
+retry:
if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
- mmu_psize_defs[MMU_PAGE_1G].shift)
+ mmu_psize_defs[MMU_PAGE_1G].shift &&
+ PUD_SIZE <= max_mapping_size)
mapping_size = PUD_SIZE;
else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
mmu_psize_defs[MMU_PAGE_2M].shift)
@@ -140,13 +230,32 @@ static int __meminit create_physical_mapping(unsigned long start,
else
mapping_size = PAGE_SIZE;
+ if (split_text_mapping && (mapping_size == PUD_SIZE) &&
+ (addr <= __pa_symbol(__init_begin)) &&
+ (addr + mapping_size) >= __pa_symbol(_stext)) {
+ max_mapping_size = PMD_SIZE;
+ goto retry;
+ }
+
+ if (split_text_mapping && (mapping_size == PMD_SIZE) &&
+ (addr <= __pa_symbol(__init_begin)) &&
+ (addr + mapping_size) >= __pa_symbol(_stext))
+ mapping_size = PAGE_SIZE;
+
if (mapping_size != previous_size) {
print_mapping(start, addr, previous_size);
start = addr;
}
- rc = radix__map_kernel_page((unsigned long)__va(addr), addr,
- PAGE_KERNEL_X, mapping_size);
+ vaddr = (unsigned long)__va(addr);
+
+ if (overlaps_kernel_text(vaddr, vaddr + mapping_size) ||
+ overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size))
+ prot = PAGE_KERNEL_X;
+ else
+ prot = PAGE_KERNEL;
+
+ rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size);
if (rc)
return rc;
}
@@ -168,11 +277,34 @@ static void __init radix_init_pgtable(void)
for_each_memblock(memory, reg)
WARN_ON(create_physical_mapping(reg->base,
reg->base + reg->size));
+
+ /* Find out how many PID bits are supported */
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ if (!mmu_pid_bits)
+ mmu_pid_bits = 20;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /*
+ * When KVM is possible, we only use the top half of the
+ * PID space to avoid collisions between host and guest PIDs
+ * which can cause problems due to prefetch when exiting the
+ * guest with AIL=3
+ */
+ mmu_base_pid = 1 << (mmu_pid_bits - 1);
+#else
+ mmu_base_pid = 1;
+#endif
+ } else {
+ /* The guest uses the bottom half of the PID space */
+ if (!mmu_pid_bits)
+ mmu_pid_bits = 19;
+ mmu_base_pid = 1;
+ }
+
/*
* Allocate Partition table and process table for the
* host.
*/
- BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 36), "Process table size too large.");
+ BUG_ON(PRTB_SIZE_SHIFT > 36);
process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
/*
* Fill in the process table.
@@ -190,6 +322,7 @@ static void __init radix_init_pgtable(void)
asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (0));
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+ trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1);
}
static void __init radix_init_partition_table(void)
@@ -245,6 +378,12 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
if (type == NULL || strcmp(type, "cpu") != 0)
return 0;
+ /* Find MMU PID size */
+ prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size);
+ if (prop && size == 4)
+ mmu_pid_bits = be32_to_cpup(prop);
+
+ /* Grab page size encodings */
prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
if (!prop)
return 0;
@@ -316,6 +455,9 @@ static void update_hid_for_radix(void)
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(1), "i"(1), "i"(2), "r"(0) : "memory");
asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
+ trace_tlbie(0, 0, rb, 0, 2, 0, 1);
+ trace_tlbie(0, 0, rb, 0, 2, 1, 1);
+
/*
* now switch the HID
*/
@@ -397,6 +539,7 @@ void __init radix__early_init_mmu(void)
__kernel_virt_size = RADIX_KERN_VIRT_SIZE;
__vmalloc_start = RADIX_VMALLOC_START;
__vmalloc_end = RADIX_VMALLOC_END;
+ __kernel_io_start = RADIX_KERN_IO_START;
vmemmap = (struct page *)RADIX_VMEMMAP_BASE;
ioremap_bot = IOREMAP_BASE;
@@ -683,7 +826,7 @@ unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long add
unsigned long old;
#ifdef CONFIG_DEBUG_VM
- WARN_ON(!radix__pmd_trans_huge(*pmdp));
+ WARN_ON(!radix__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
assert_spin_locked(&mm->page_table_lock);
#endif
@@ -701,14 +844,18 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
VM_BUG_ON(radix__pmd_trans_huge(*pmdp));
+ VM_BUG_ON(pmd_devmap(*pmdp));
/*
* khugepaged calls this for normal pmd
*/
pmd = *pmdp;
pmd_clear(pmdp);
+
/*FIXME!! Verify whether we need this kick below */
- kick_all_cpus_sync();
- flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+ serialize_against_pte_lookup(vma->vm_mm);
+
+ radix__flush_tlb_collapsed_pmd(vma->vm_mm, address);
+
return pmd;
}
@@ -767,16 +914,16 @@ pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
old_pmd = __pmd(old);
/*
- * Serialize against find_linux_pte_or_hugepte which does lock-less
+ * Serialize against find_current_mm_pte which does lock-less
* lookup in page tables with local interrupts disabled. For huge pages
* it casts pmd_t to pte_t. Since format of pte_t is different from
* pmd_t we want to prevent transit from pmd pointing to page table
* to pmd pointing to huge page (and back) while interrupts are disabled.
* We clear pmd to possibly replace it with page table pointer in
* different code paths. So make sure we wait for the parallel
- * find_linux_pte_or_hugepage to finish.
+ * find_current_mm_pte to finish.
*/
- kick_all_cpus_sync();
+ serialize_against_pte_lookup(mm);
return old_pmd;
}