summaryrefslogtreecommitdiff
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/Makefile7
-rw-r--r--arch/powerpc/mm/dump_hashpagetable.c2
-rw-r--r--arch/powerpc/mm/dump_linuxpagetables.c10
-rw-r--r--arch/powerpc/mm/hash_utils_64.c1
-rw-r--r--arch/powerpc/mm/highmem.c1
-rw-r--r--arch/powerpc/mm/hugetlbpage-book3e.c1
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c1
-rw-r--r--arch/powerpc/mm/hugetlbpage-radix.c21
-rw-r--r--arch/powerpc/mm/hugetlbpage.c1
-rw-r--r--arch/powerpc/mm/init_64.c21
-rw-r--r--arch/powerpc/mm/mmap.c49
-rw-r--r--arch/powerpc/mm/mmu_context.c9
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c35
-rw-r--r--arch/powerpc/mm/numa.c74
-rw-r--r--arch/powerpc/mm/pgtable-radix.c10
-rw-r--r--arch/powerpc/mm/pgtable_32.c2
-rw-r--r--arch/powerpc/mm/pgtable_64.c4
-rw-r--r--arch/powerpc/mm/slb_low.S6
-rw-r--r--arch/powerpc/mm/slice.c62
-rw-r--r--arch/powerpc/mm/tlb-radix.c345
-rw-r--r--arch/powerpc/mm/vphn.c1
-rw-r--r--arch/powerpc/mm/vphn.h1
22 files changed, 444 insertions, 220 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index fb844d2f266e..76a6b057d454 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the linux ppc-specific parts of the memory manager.
#
@@ -14,11 +15,11 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o
hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o
-obj-$(CONFIG_PPC_STD_MMU_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
+obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o
obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o
-ifeq ($(CONFIG_PPC_STD_MMU_64),y)
+ifeq ($(CONFIG_PPC_BOOK3S_64),y)
obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o
obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o
endif
@@ -31,7 +32,7 @@ obj-$(CONFIG_PPC_SPLPAR) += vphn.o
obj-$(CONFIG_PPC_MM_SLICES) += slice.o
obj-y += hugetlbpage.o
ifeq ($(CONFIG_HUGETLB_PAGE),y)
-obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o
+obj-$(CONFIG_PPC_BOOK3S_64) += hugetlbpage-hash64.o
obj-$(CONFIG_PPC_RADIX_MMU) += hugetlbpage-radix.o
obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o
endif
diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c
index 5c4c93dcff19..14cfb11b09d0 100644
--- a/arch/powerpc/mm/dump_hashpagetable.c
+++ b/arch/powerpc/mm/dump_hashpagetable.c
@@ -500,7 +500,7 @@ static void populate_markers(void)
address_markers[6].start_address = PHB_IO_END;
address_markers[7].start_address = IOREMAP_BASE;
address_markers[8].start_address = IOREMAP_END;
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
address_markers[9].start_address = H_VMEMMAP_BASE;
#else
address_markers[9].start_address = VMEMMAP_BASE;
diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c
index c9282d27b203..c2e7dea59490 100644
--- a/arch/powerpc/mm/dump_linuxpagetables.c
+++ b/arch/powerpc/mm/dump_linuxpagetables.c
@@ -112,7 +112,7 @@ struct flag_info {
static const struct flag_info flag_array[] = {
{
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
.mask = _PAGE_PRIVILEGED,
.val = 0,
#else
@@ -147,7 +147,7 @@ static const struct flag_info flag_array[] = {
.set = "present",
.clear = " ",
}, {
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
.mask = H_PAGE_HASHPTE,
.val = H_PAGE_HASHPTE,
#else
@@ -157,7 +157,7 @@ static const struct flag_info flag_array[] = {
.set = "hpte",
.clear = " ",
}, {
-#ifndef CONFIG_PPC_STD_MMU_64
+#ifndef CONFIG_PPC_BOOK3S_64
.mask = _PAGE_GUARDED,
.val = _PAGE_GUARDED,
.set = "guarded",
@@ -174,7 +174,7 @@ static const struct flag_info flag_array[] = {
.set = "accessed",
.clear = " ",
}, {
-#ifndef CONFIG_PPC_STD_MMU_64
+#ifndef CONFIG_PPC_BOOK3S_64
.mask = _PAGE_WRITETHRU,
.val = _PAGE_WRITETHRU,
.set = "write through",
@@ -450,7 +450,7 @@ static void populate_markers(void)
address_markers[i++].start_address = PHB_IO_END;
address_markers[i++].start_address = IOREMAP_BASE;
address_markers[i++].start_address = IOREMAP_END;
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
address_markers[i++].start_address = H_VMEMMAP_BASE;
#else
address_markers[i++].start_address = VMEMMAP_BASE;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 67ec2e927253..655a5a9a183d 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -21,6 +21,7 @@
#undef DEBUG
#undef DEBUG_LOW
+#define pr_fmt(fmt) "hash-mmu: " fmt
#include <linux/spinlock.h>
#include <linux/errno.h>
#include <linux/sched/mm.h>
diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c
index e292c8a60952..668e87d03f9e 100644
--- a/arch/powerpc/mm/highmem.c
+++ b/arch/powerpc/mm/highmem.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* highmem.c: virtual kernel memory mappings for high memory
*
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c
index bfe4e8526b2d..f84ec46cdb26 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/hugetlbpage-book3e.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* PPC Huge TLB Page Support for Book3E MMU
*
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index a84bb44497f9..0c2a91df3210 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* PPC64 Huge TLB Page Support for hash based MMUs (POWER4 and later)
*
diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c
index a12e86395025..2486bee0f93e 100644
--- a/arch/powerpc/mm/hugetlbpage-radix.c
+++ b/arch/powerpc/mm/hugetlbpage-radix.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <asm/pgtable.h>
@@ -48,17 +49,22 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct hstate *h = hstate_file(file);
+ int fixed = (flags & MAP_FIXED);
+ unsigned long high_limit;
struct vm_unmapped_area_info info;
- if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE))
- mm->context.addr_limit = TASK_SIZE;
+ high_limit = DEFAULT_MAP_WINDOW;
+ if (addr >= high_limit || (fixed && (addr + len > high_limit)))
+ high_limit = TASK_SIZE;
if (len & ~huge_page_mask(h))
return -EINVAL;
- if (len > mm->task_size)
+ if (len > high_limit)
return -ENOMEM;
- if (flags & MAP_FIXED) {
+ if (fixed) {
+ if (addr > high_limit - len)
+ return -ENOMEM;
if (prepare_hugepage_range(file, addr, len))
return -EINVAL;
return addr;
@@ -67,7 +73,7 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (addr) {
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
- if (mm->task_size - len >= addr &&
+ if (high_limit - len >= addr &&
(!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
@@ -78,12 +84,9 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = PAGE_SIZE;
- info.high_limit = current->mm->mmap_base;
+ info.high_limit = mm->mmap_base + (high_limit - DEFAULT_MAP_WINDOW);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
- if (addr > DEFAULT_MAP_WINDOW)
- info.high_limit += mm->context.addr_limit - DEFAULT_MAP_WINDOW;
-
return vm_unmapped_area(&info);
}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 1571a498a33f..a9b9083c5e49 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -433,6 +433,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
pud = pud_offset(pgd, start);
pgd_clear(pgd);
pud_free_tlb(tlb, pud, start);
+ mm_dec_nr_puds(tlb->mm);
}
/*
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 588a521966ec..a07722531b32 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -68,11 +68,11 @@
#include "mmu_decl.h"
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
#if H_PGTABLE_RANGE > USER_VSID_RANGE
#warning Limited user VSID range means pagetable space is wasted
#endif
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
phys_addr_t memstart_addr = ~0;
EXPORT_SYMBOL_GPL(memstart_addr);
@@ -367,11 +367,20 @@ EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
-#ifdef CONFIG_PPC_STD_MMU_64
-static bool disable_radix;
+#ifdef CONFIG_PPC_BOOK3S_64
+static bool disable_radix = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
+
static int __init parse_disable_radix(char *p)
{
- disable_radix = true;
+ bool val;
+
+ if (strlen(p) == 0)
+ val = true;
+ else if (kstrtobool(p, &val))
+ return -EINVAL;
+
+ disable_radix = val;
+
return 0;
}
early_param("disable_radix", parse_disable_radix);
@@ -444,4 +453,4 @@ void __init mmu_early_init_devtree(void)
else
hash__early_init_devtree();
}
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 5d78b193fec4..d503f344e476 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -106,22 +106,27 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
+ int fixed = (flags & MAP_FIXED);
+ unsigned long high_limit;
struct vm_unmapped_area_info info;
- if (unlikely(addr > mm->context.addr_limit &&
- mm->context.addr_limit != TASK_SIZE))
- mm->context.addr_limit = TASK_SIZE;
+ high_limit = DEFAULT_MAP_WINDOW;
+ if (addr >= high_limit || (fixed && (addr + len > high_limit)))
+ high_limit = TASK_SIZE;
- if (len > mm->task_size - mmap_min_addr)
+ if (len > high_limit)
return -ENOMEM;
- if (flags & MAP_FIXED)
+ if (fixed) {
+ if (addr > high_limit - len)
+ return -ENOMEM;
return addr;
+ }
if (addr) {
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
- if (mm->task_size - len >= addr && addr >= mmap_min_addr &&
+ if (high_limit - len >= addr && addr >= mmap_min_addr &&
(!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
@@ -129,13 +134,9 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
info.flags = 0;
info.length = len;
info.low_limit = mm->mmap_base;
+ info.high_limit = high_limit;
info.align_mask = 0;
- if (unlikely(addr > DEFAULT_MAP_WINDOW))
- info.high_limit = mm->context.addr_limit;
- else
- info.high_limit = DEFAULT_MAP_WINDOW;
-
return vm_unmapped_area(&info);
}
@@ -149,37 +150,37 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
+ int fixed = (flags & MAP_FIXED);
+ unsigned long high_limit;
struct vm_unmapped_area_info info;
- if (unlikely(addr > mm->context.addr_limit &&
- mm->context.addr_limit != TASK_SIZE))
- mm->context.addr_limit = TASK_SIZE;
+ high_limit = DEFAULT_MAP_WINDOW;
+ if (addr >= high_limit || (fixed && (addr + len > high_limit)))
+ high_limit = TASK_SIZE;
- /* requested length too big for entire address space */
- if (len > mm->task_size - mmap_min_addr)
+ if (len > high_limit)
return -ENOMEM;
- if (flags & MAP_FIXED)
+ if (fixed) {
+ if (addr > high_limit - len)
+ return -ENOMEM;
return addr;
+ }
- /* requesting a specific address */
if (addr) {
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
- if (mm->task_size - len >= addr && addr >= mmap_min_addr &&
- (!vma || addr + len <= vm_start_gap(vma)))
+ if (high_limit - len >= addr && addr >= mmap_min_addr &&
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = max(PAGE_SIZE, mmap_min_addr);
- info.high_limit = mm->mmap_base;
+ info.high_limit = mm->mmap_base + (high_limit - DEFAULT_MAP_WINDOW);
info.align_mask = 0;
- if (addr > DEFAULT_MAP_WINDOW)
- info.high_limit += mm->context.addr_limit - DEFAULT_MAP_WINDOW;
-
addr = vm_unmapped_area(&info);
if (!(addr & ~PAGE_MASK))
return addr;
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index 0f613bc63c50..d60a62bf4fc7 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -34,15 +34,6 @@ static inline void switch_mm_pgdir(struct task_struct *tsk,
struct mm_struct *mm) { }
#endif
-#ifdef CONFIG_PPC_BOOK3S_64
-static inline void inc_mm_active_cpus(struct mm_struct *mm)
-{
- atomic_inc(&mm->context.active_cpus);
-}
-#else
-static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
-#endif
-
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 05e15386d4cb..59c0766ae4e0 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -93,11 +93,11 @@ static int hash__init_new_context(struct mm_struct *mm)
return index;
/*
- * We do switch_slb() early in fork, even before we setup the
- * mm->context.addr_limit. Default to max task size so that we copy the
- * default values to paca which will help us to handle slb miss early.
+ * In the case of exec, use the default limit,
+ * otherwise inherit it from the mm we are duplicating.
*/
- mm->context.addr_limit = DEFAULT_MAP_WINDOW_USER64;
+ if (!mm->context.slb_addr_limit)
+ mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
/*
* The old code would re-promote on fork, we don't do that when using
@@ -200,7 +200,7 @@ static void destroy_pagetable_page(struct mm_struct *mm)
/* We allow PTE_FRAG_NR fragments from a PTE page */
if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) {
pgtable_page_dtor(page);
- free_hot_cold_page(page, 0);
+ free_unref_page(page);
}
}
@@ -216,19 +216,34 @@ void destroy_context(struct mm_struct *mm)
#ifdef CONFIG_SPAPR_TCE_IOMMU
WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
#endif
+ if (radix_enabled())
+ WARN_ON(process_tb[mm->context.id].prtb0 != 0);
+ else
+ subpage_prot_free(mm);
+ destroy_pagetable_page(mm);
+ __destroy_context(mm->context.id);
+ mm->context.id = MMU_NO_CONTEXT;
+}
+
+void arch_exit_mmap(struct mm_struct *mm)
+{
if (radix_enabled()) {
/*
* Radix doesn't have a valid bit in the process table
* entries. However we know that at least P9 implementation
* will avoid caching an entry with an invalid RTS field,
* and 0 is invalid. So this will do.
+ *
+ * This runs before the "fullmm" tlb flush in exit_mmap,
+ * which does a RIC=2 tlbie to clear the process table
+ * entry. See the "fullmm" comments in tlb-radix.c.
+ *
+ * No barrier required here after the store because
+ * this process will do the invalidate, which starts with
+ * ptesync.
*/
process_tb[mm->context.id].prtb0 = 0;
- } else
- subpage_prot_free(mm);
- destroy_pagetable_page(mm);
- __destroy_context(mm->context.id);
- mm->context.id = MMU_NO_CONTEXT;
+ }
}
#ifdef CONFIG_PPC_RADIX_MMU
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b95c584ce19d..adb6364f4091 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1148,11 +1148,33 @@ struct topology_update_data {
int new_nid;
};
+#define TOPOLOGY_DEF_TIMER_SECS 60
+
static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
static cpumask_t cpu_associativity_changes_mask;
static int vphn_enabled;
static int prrn_enabled;
static void reset_topology_timer(void);
+static int topology_timer_secs = 1;
+static int topology_inited;
+static int topology_update_needed;
+
+/*
+ * Change polling interval for associativity changes.
+ */
+int timed_topology_update(int nsecs)
+{
+ if (vphn_enabled) {
+ if (nsecs > 0)
+ topology_timer_secs = nsecs;
+ else
+ topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS;
+
+ reset_topology_timer();
+ }
+
+ return 0;
+}
/*
* Store the current values of the associativity change counters in the
@@ -1246,6 +1268,11 @@ static long vphn_get_associativity(unsigned long cpu,
"hcall_vphn() experienced a hardware fault "
"preventing VPHN. Disabling polling...\n");
stop_topology_update();
+ break;
+ case H_SUCCESS:
+ dbg("VPHN hcall succeeded. Reset polling...\n");
+ timed_topology_update(0);
+ break;
}
return rc;
@@ -1323,8 +1350,11 @@ int numa_update_cpu_topology(bool cpus_locked)
struct device *dev;
int weight, new_nid, i = 0;
- if (!prrn_enabled && !vphn_enabled)
+ if (!prrn_enabled && !vphn_enabled) {
+ if (!topology_inited)
+ topology_update_needed = 1;
return 0;
+ }
weight = cpumask_weight(&cpu_associativity_changes_mask);
if (!weight)
@@ -1363,22 +1393,30 @@ int numa_update_cpu_topology(bool cpus_locked)
cpumask_andnot(&cpu_associativity_changes_mask,
&cpu_associativity_changes_mask,
cpu_sibling_mask(cpu));
+ dbg("Assoc chg gives same node %d for cpu%d\n",
+ new_nid, cpu);
cpu = cpu_last_thread_sibling(cpu);
continue;
}
for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
ud = &updates[i++];
+ ud->next = &updates[i];
ud->cpu = sibling;
ud->new_nid = new_nid;
ud->old_nid = numa_cpu_lookup_table[sibling];
cpumask_set_cpu(sibling, &updated_cpus);
- if (i < weight)
- ud->next = &updates[i];
}
cpu = cpu_last_thread_sibling(cpu);
}
+ /*
+ * Prevent processing of 'updates' from overflowing array
+ * where last entry filled in a 'next' pointer.
+ */
+ if (i)
+ updates[i-1].next = NULL;
+
pr_debug("Topology update for the following CPUs:\n");
if (cpumask_weight(&updated_cpus)) {
for (ud = &updates[0]; ud; ud = ud->next) {
@@ -1433,12 +1471,12 @@ int numa_update_cpu_topology(bool cpus_locked)
out:
kfree(updates);
+ topology_update_needed = 0;
return changed;
}
int arch_update_cpu_topology(void)
{
- lockdep_assert_cpus_held();
return numa_update_cpu_topology(true);
}
@@ -1453,7 +1491,7 @@ static void topology_schedule_update(void)
schedule_work(&topology_work);
}
-static void topology_timer_fn(unsigned long ignored)
+static void topology_timer_fn(struct timer_list *unused)
{
if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask))
topology_schedule_update();
@@ -1463,14 +1501,11 @@ static void topology_timer_fn(unsigned long ignored)
reset_topology_timer();
}
}
-static struct timer_list topology_timer =
- TIMER_INITIALIZER(topology_timer_fn, 0, 0);
+static struct timer_list topology_timer;
static void reset_topology_timer(void)
{
- topology_timer.data = 0;
- topology_timer.expires = jiffies + 60 * HZ;
- mod_timer(&topology_timer, topology_timer.expires);
+ mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ);
}
#ifdef CONFIG_SMP
@@ -1519,18 +1554,18 @@ int start_topology_update(void)
if (firmware_has_feature(FW_FEATURE_PRRN)) {
if (!prrn_enabled) {
prrn_enabled = 1;
- vphn_enabled = 0;
#ifdef CONFIG_SMP
rc = of_reconfig_notifier_register(&dt_update_nb);
#endif
}
- } else if (firmware_has_feature(FW_FEATURE_VPHN) &&
+ }
+ if (firmware_has_feature(FW_FEATURE_VPHN) &&
lppaca_shared_proc(get_lppaca())) {
if (!vphn_enabled) {
- prrn_enabled = 0;
vphn_enabled = 1;
setup_cpu_associativity_change_counters();
- init_timer_deferrable(&topology_timer);
+ timer_setup(&topology_timer, topology_timer_fn,
+ TIMER_DEFERRABLE);
reset_topology_timer();
}
}
@@ -1550,7 +1585,8 @@ int stop_topology_update(void)
#ifdef CONFIG_SMP
rc = of_reconfig_notifier_unregister(&dt_update_nb);
#endif
- } else if (vphn_enabled) {
+ }
+ if (vphn_enabled) {
vphn_enabled = 0;
rc = del_timer_sync(&topology_timer);
}
@@ -1613,9 +1649,17 @@ static int topology_update_init(void)
if (topology_updates_enabled)
start_topology_update();
+ if (vphn_enabled)
+ topology_schedule_update();
+
if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops))
return -ENOMEM;
+ topology_inited = 1;
+ if (topology_update_needed)
+ bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask),
+ nr_cpumask_bits);
+
return 0;
}
device_initcall(topology_update_init);
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 39c252b54d16..cfbbee941a76 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -169,6 +169,16 @@ void radix__mark_rodata_ro(void)
{
unsigned long start, end;
+ /*
+ * mark_rodata_ro() will mark itself as !writable at some point.
+ * Due to DD1 workaround in radix__pte_update(), we'll end up with
+ * an invalid pte and the system will crash quite severly.
+ */
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+ pr_warn("Warning: Unable to mark rodata read only on P9 DD1\n");
+ return;
+ }
+
start = (unsigned long)_stext;
end = (unsigned long)__init_begin;
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 65eda1997c3f..f6c7f54c0515 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -361,9 +361,9 @@ static int change_page_attr(struct page *page, int numpages, pgprot_t prot)
break;
}
wmb();
+ local_irq_restore(flags);
flush_tlb_kernel_range((unsigned long)page_address(start),
(unsigned long)page_address(page));
- local_irq_restore(flags);
return err;
}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index ac0717a90ca6..813ea22c3e00 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -57,7 +57,7 @@
#include "mmu_decl.h"
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
#error TASK_SIZE_USER64 exceeds user VSID range
#endif
@@ -404,7 +404,7 @@ void pte_fragment_free(unsigned long *table, int kernel)
if (put_page_testzero(page)) {
if (!kernel)
pgtable_page_dtor(page);
- free_hot_cold_page(page, 0);
+ free_unref_page(page);
}
}
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 906a86fe457b..2cf5ef3fc50d 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -167,7 +167,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
/*
* user space make sure we are within the allowed limit
*/
- ld r11,PACA_ADDR_LIMIT(r13)
+ ld r11,PACA_SLB_ADDR_LIMIT(r13)
cmpld r3,r11
bge- 8f
@@ -309,10 +309,6 @@ slb_compare_rr_to_size:
srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */
rldimi r10,r9,ESID_BITS_1T,0
ASM_VSID_SCRAMBLE(r10,r9,r11,1T)
- /*
- * bits above VSID_BITS_1T need to be ignored from r10
- * also combine VSID and flags
- */
li r10,MMU_SEGSIZE_1T
rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 45f6740dd407..564fff06f5c1 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -96,7 +96,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
{
struct vm_area_struct *vma;
- if ((mm->task_size - len) < addr)
+ if ((mm->context.slb_addr_limit - len) < addr)
return 0;
vma = find_vma(mm, addr);
return (!vma || (addr + len) <= vm_start_gap(vma));
@@ -133,10 +133,10 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret)
if (!slice_low_has_vma(mm, i))
ret->low_slices |= 1u << i;
- if (mm->task_size <= SLICE_LOW_TOP)
+ if (mm->context.slb_addr_limit <= SLICE_LOW_TOP)
return;
- for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.addr_limit); i++)
+ for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++)
if (!slice_high_has_vma(mm, i))
__set_bit(i, ret->high_slices);
}
@@ -157,7 +157,7 @@ static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_ma
ret->low_slices |= 1u << i;
hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.addr_limit); i++) {
+ for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
mask_index = i & 0x1;
index = i >> 1;
if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
@@ -169,7 +169,7 @@ static int slice_check_fit(struct mm_struct *mm,
struct slice_mask mask, struct slice_mask available)
{
DECLARE_BITMAP(result, SLICE_NUM_HIGH);
- unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.addr_limit);
+ unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit);
bitmap_and(result, mask.high_slices,
available.high_slices, slice_count);
@@ -219,7 +219,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
mm->context.low_slices_psize = lpsizes;
hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.addr_limit); i++) {
+ for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
mask_index = i & 0x1;
index = i >> 1;
if (test_bit(i, mask.high_slices))
@@ -329,8 +329,8 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
* Only for that request for which high_limit is above
* DEFAULT_MAP_WINDOW we should apply this.
*/
- if (high_limit > DEFAULT_MAP_WINDOW)
- addr += mm->context.addr_limit - DEFAULT_MAP_WINDOW;
+ if (high_limit > DEFAULT_MAP_WINDOW)
+ addr += mm->context.slb_addr_limit - DEFAULT_MAP_WINDOW;
while (addr > PAGE_SIZE) {
info.high_limit = addr;
@@ -412,25 +412,31 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
struct slice_mask compat_mask;
int fixed = (flags & MAP_FIXED);
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+ unsigned long page_size = 1UL << pshift;
struct mm_struct *mm = current->mm;
unsigned long newaddr;
unsigned long high_limit;
- /*
- * Check if we need to expland slice area.
- */
- if (unlikely(addr > mm->context.addr_limit &&
- mm->context.addr_limit != TASK_SIZE)) {
- mm->context.addr_limit = TASK_SIZE;
+ high_limit = DEFAULT_MAP_WINDOW;
+ if (addr >= high_limit || (fixed && (addr + len > high_limit)))
+ high_limit = TASK_SIZE;
+
+ if (len > high_limit)
+ return -ENOMEM;
+ if (len & (page_size - 1))
+ return -EINVAL;
+ if (fixed) {
+ if (addr & (page_size - 1))
+ return -EINVAL;
+ if (addr > high_limit - len)
+ return -ENOMEM;
+ }
+
+ if (high_limit > mm->context.slb_addr_limit) {
+ mm->context.slb_addr_limit = high_limit;
on_each_cpu(slice_flush_segments, mm, 1);
}
- /*
- * This mmap request can allocate upt to 512TB
- */
- if (addr > DEFAULT_MAP_WINDOW)
- high_limit = mm->context.addr_limit;
- else
- high_limit = DEFAULT_MAP_WINDOW;
+
/*
* init different masks
*/
@@ -446,27 +452,19 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
/* Sanity checks */
BUG_ON(mm->task_size == 0);
+ BUG_ON(mm->context.slb_addr_limit == 0);
VM_BUG_ON(radix_enabled());
slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d\n",
addr, len, flags, topdown);
- if (len > mm->task_size)
- return -ENOMEM;
- if (len & ((1ul << pshift) - 1))
- return -EINVAL;
- if (fixed && (addr & ((1ul << pshift) - 1)))
- return -EINVAL;
- if (fixed && addr > (mm->task_size - len))
- return -ENOMEM;
-
/* If hint, make sure it matches our alignment restrictions */
if (!fixed && addr) {
- addr = _ALIGN_UP(addr, 1ul << pshift);
+ addr = _ALIGN_UP(addr, page_size);
slice_dbg(" aligned addr=%lx\n", addr);
/* Ignore hint if it's too large or overlaps a VMA */
- if (addr > mm->task_size - len ||
+ if (addr > high_limit - len ||
!slice_area_is_free(mm, addr, len))
addr = 0;
}
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index b3e849c4886e..884f4b705b57 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -39,6 +39,20 @@ static inline void __tlbiel_pid(unsigned long pid, int set,
trace_tlbie(0, 1, rb, rs, ric, prs, r);
}
+static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
+{
+ unsigned long rb,rs,prs,r;
+
+ rb = PPC_BIT(53); /* IS = 1 */
+ rs = pid << PPC_BITLSHIFT(31);
+ prs = 1; /* process scoped */
+ r = 1; /* raidx format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
/*
* We use 128 set in radix mode and 256 set in hpt mode.
*/
@@ -70,22 +84,13 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
{
- unsigned long rb,rs,prs,r;
-
- rb = PPC_BIT(53); /* IS = 1 */
- rs = pid << PPC_BITLSHIFT(31);
- prs = 1; /* process scoped */
- r = 1; /* raidx format */
-
asm volatile("ptesync": : :"memory");
- asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
- : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ __tlbie_pid(pid, ric);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
- trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
-static inline void _tlbiel_va(unsigned long va, unsigned long pid,
- unsigned long ap, unsigned long ric)
+static inline void __tlbiel_va(unsigned long va, unsigned long pid,
+ unsigned long ap, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -95,14 +100,44 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
prs = 1; /* process scoped */
r = 1; /* raidx format */
- asm volatile("ptesync": : :"memory");
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
- asm volatile("ptesync": : :"memory");
trace_tlbie(0, 1, rb, rs, ric, prs, r);
}
-static inline void _tlbie_va(unsigned long va, unsigned long pid,
+static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
+{
+ unsigned long addr;
+ unsigned long ap = mmu_get_ap(psize);
+
+ for (addr = start; addr < end; addr += page_size)
+ __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
+}
+
+static inline void _tlbiel_va(unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
+{
+ unsigned long ap = mmu_get_ap(psize);
+
+ asm volatile("ptesync": : :"memory");
+ __tlbiel_va(va, pid, ap, ric);
+ asm volatile("ptesync": : :"memory");
+}
+
+static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize, bool also_pwc)
+{
+ asm volatile("ptesync": : :"memory");
+ if (also_pwc)
+ __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
+ __tlbiel_va_range(start, end, pid, page_size, psize);
+ asm volatile("ptesync": : :"memory");
+}
+
+static inline void __tlbie_va(unsigned long va, unsigned long pid,
unsigned long ap, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -113,13 +148,43 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
prs = 1; /* process scoped */
r = 1; /* raidx format */
- asm volatile("ptesync": : :"memory");
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
- asm volatile("eieio; tlbsync; ptesync": : :"memory");
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
+static inline void __tlbie_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
+{
+ unsigned long addr;
+ unsigned long ap = mmu_get_ap(psize);
+
+ for (addr = start; addr < end; addr += page_size)
+ __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+}
+
+static inline void _tlbie_va(unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
+{
+ unsigned long ap = mmu_get_ap(psize);
+
+ asm volatile("ptesync": : :"memory");
+ __tlbie_va(va, pid, ap, ric);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+static inline void _tlbie_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize, bool also_pwc)
+{
+ asm volatile("ptesync": : :"memory");
+ if (also_pwc)
+ __tlbie_pid(pid, RIC_FLUSH_PWC);
+ __tlbie_va_range(start, end, pid, page_size, psize);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
/*
* Base TLB flushing operations:
*
@@ -144,7 +209,7 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm)
EXPORT_SYMBOL(radix__local_flush_tlb_mm);
#ifndef CONFIG_SMP
-static void radix__local_flush_all_mm(struct mm_struct *mm)
+void radix__local_flush_all_mm(struct mm_struct *mm)
{
unsigned long pid;
@@ -154,18 +219,18 @@ static void radix__local_flush_all_mm(struct mm_struct *mm)
_tlbiel_pid(pid, RIC_FLUSH_ALL);
preempt_enable();
}
+EXPORT_SYMBOL(radix__local_flush_all_mm);
#endif /* CONFIG_SMP */
void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
int psize)
{
unsigned long pid;
- unsigned long ap = mmu_get_ap(psize);
preempt_disable();
- pid = mm ? mm->context.id : 0;
+ pid = mm->context.id;
if (pid != MMU_NO_CONTEXT)
- _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
+ _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
preempt_enable();
}
@@ -173,11 +238,10 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd
{
#ifdef CONFIG_HUGETLB_PAGE
/* need the return fix for nohash.c */
- if (vma && is_vm_hugetlb_page(vma))
- return __local_flush_hugetlb_page(vma, vmaddr);
+ if (is_vm_hugetlb_page(vma))
+ return radix__local_flush_hugetlb_page(vma, vmaddr);
#endif
- radix__local_flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr,
- mmu_virtual_psize);
+ radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
}
EXPORT_SYMBOL(radix__local_flush_tlb_page);
@@ -186,36 +250,35 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
{
unsigned long pid;
- preempt_disable();
pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
- goto no_context;
+ return;
+ preempt_disable();
if (!mm_is_thread_local(mm))
_tlbie_pid(pid, RIC_FLUSH_TLB);
else
_tlbiel_pid(pid, RIC_FLUSH_TLB);
-no_context:
preempt_enable();
}
EXPORT_SYMBOL(radix__flush_tlb_mm);
-static void radix__flush_all_mm(struct mm_struct *mm)
+void radix__flush_all_mm(struct mm_struct *mm)
{
unsigned long pid;
- preempt_disable();
pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
- goto no_context;
+ return;
+ preempt_disable();
if (!mm_is_thread_local(mm))
_tlbie_pid(pid, RIC_FLUSH_ALL);
else
_tlbiel_pid(pid, RIC_FLUSH_ALL);
-no_context:
preempt_enable();
}
+EXPORT_SYMBOL(radix__flush_all_mm);
void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
{
@@ -227,28 +290,26 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
int psize)
{
unsigned long pid;
- unsigned long ap = mmu_get_ap(psize);
- preempt_disable();
- pid = mm ? mm->context.id : 0;
+ pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
- goto bail;
+ return;
+
+ preempt_disable();
if (!mm_is_thread_local(mm))
- _tlbie_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
+ _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
else
- _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
-bail:
+ _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
preempt_enable();
}
void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
{
#ifdef CONFIG_HUGETLB_PAGE
- if (vma && is_vm_hugetlb_page(vma))
- return flush_hugetlb_page(vma, vmaddr);
+ if (is_vm_hugetlb_page(vma))
+ return radix__flush_hugetlb_page(vma, vmaddr);
#endif
- radix__flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr,
- mmu_virtual_psize);
+ radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
}
EXPORT_SYMBOL(radix__flush_tlb_page);
@@ -262,17 +323,86 @@ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
}
EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
+#define TLB_FLUSH_ALL -1UL
+
/*
- * Currently, for range flushing, we just do a full mm flush. Because
- * we use this in code path where we don' track the page size.
+ * Number of pages above which we invalidate the entire PID rather than
+ * flush individual pages, for local and global flushes respectively.
+ *
+ * tlbie goes out to the interconnect and individual ops are more costly.
+ * It also does not iterate over sets like the local tlbiel variant when
+ * invalidating a full PID, so it has a far lower threshold to change from
+ * individual page flushes to full-pid flushes.
*/
+static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
+static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
+
void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
struct mm_struct *mm = vma->vm_mm;
+ unsigned long pid;
+ unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
+ unsigned long page_size = 1UL << page_shift;
+ unsigned long nr_pages = (end - start) >> page_shift;
+ bool local, full;
+
+#ifdef CONFIG_HUGETLB_PAGE
+ if (is_vm_hugetlb_page(vma))
+ return radix__flush_hugetlb_tlb_range(vma, start, end);
+#endif
+
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ return;
+
+ preempt_disable();
+ if (mm_is_thread_local(mm)) {
+ local = true;
+ full = (end == TLB_FLUSH_ALL ||
+ nr_pages > tlb_local_single_page_flush_ceiling);
+ } else {
+ local = false;
+ full = (end == TLB_FLUSH_ALL ||
+ nr_pages > tlb_single_page_flush_ceiling);
+ }
- radix__flush_tlb_mm(mm);
+ if (full) {
+ if (local)
+ _tlbiel_pid(pid, RIC_FLUSH_TLB);
+ else
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ } else {
+ bool hflush = false;
+ unsigned long hstart, hend;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT;
+ hend = end >> HPAGE_PMD_SHIFT;
+ if (hstart < hend) {
+ hstart <<= HPAGE_PMD_SHIFT;
+ hend <<= HPAGE_PMD_SHIFT;
+ hflush = true;
+ }
+#endif
+
+ asm volatile("ptesync": : :"memory");
+ if (local) {
+ __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
+ if (hflush)
+ __tlbiel_va_range(hstart, hend, pid,
+ HPAGE_PMD_SIZE, MMU_PAGE_2M);
+ asm volatile("ptesync": : :"memory");
+ } else {
+ __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
+ if (hflush)
+ __tlbie_va_range(hstart, hend, pid,
+ HPAGE_PMD_SIZE, MMU_PAGE_2M);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+ }
+ preempt_enable();
}
EXPORT_SYMBOL(radix__flush_tlb_range);
@@ -291,77 +421,100 @@ static int radix_get_mmu_psize(int page_size)
return psize;
}
+static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize);
+
void radix__tlb_flush(struct mmu_gather *tlb)
{
int psize = 0;
struct mm_struct *mm = tlb->mm;
int page_size = tlb->page_size;
- psize = radix_get_mmu_psize(page_size);
/*
* if page size is not something we understand, do a full mm flush
+ *
+ * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
+ * that flushes the process table entry cache upon process teardown.
+ * See the comment for radix in arch_exit_mmap().
*/
- if (psize != -1 && !tlb->fullmm && !tlb->need_flush_all)
- radix__flush_tlb_range_psize(mm, tlb->start, tlb->end, psize);
- else if (tlb->need_flush_all) {
- tlb->need_flush_all = 0;
+ if (tlb->fullmm) {
radix__flush_all_mm(mm);
- } else
- radix__flush_tlb_mm(mm);
-}
+ } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
+ if (!tlb->need_flush_all)
+ radix__flush_tlb_mm(mm);
+ else
+ radix__flush_all_mm(mm);
+ } else {
+ unsigned long start = tlb->start;
+ unsigned long end = tlb->end;
-#define TLB_FLUSH_ALL -1UL
-/*
- * Number of pages above which we will do a bcast tlbie. Just a
- * number at this point copied from x86
- */
-static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
+ if (!tlb->need_flush_all)
+ radix__flush_tlb_range_psize(mm, start, end, psize);
+ else
+ radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
+ }
+ tlb->need_flush_all = 0;
+}
-void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
- unsigned long end, int psize)
+static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ int psize, bool also_pwc)
{
unsigned long pid;
- unsigned long addr;
- int local = mm_is_thread_local(mm);
- unsigned long ap = mmu_get_ap(psize);
- unsigned long page_size = 1UL << mmu_psize_defs[psize].shift;
+ unsigned int page_shift = mmu_psize_defs[psize].shift;
+ unsigned long page_size = 1UL << page_shift;
+ unsigned long nr_pages = (end - start) >> page_shift;
+ bool local, full;
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ return;
preempt_disable();
- pid = mm ? mm->context.id : 0;
- if (unlikely(pid == MMU_NO_CONTEXT))
- goto err_out;
+ if (mm_is_thread_local(mm)) {
+ local = true;
+ full = (end == TLB_FLUSH_ALL ||
+ nr_pages > tlb_local_single_page_flush_ceiling);
+ } else {
+ local = false;
+ full = (end == TLB_FLUSH_ALL ||
+ nr_pages > tlb_single_page_flush_ceiling);
+ }
- if (end == TLB_FLUSH_ALL ||
- (end - start) > tlb_single_page_flush_ceiling * page_size) {
+ if (full) {
if (local)
- _tlbiel_pid(pid, RIC_FLUSH_TLB);
+ _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
else
- _tlbie_pid(pid, RIC_FLUSH_TLB);
- goto err_out;
- }
- for (addr = start; addr < end; addr += page_size) {
-
+ _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL: RIC_FLUSH_TLB);
+ } else {
if (local)
- _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
+ _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
else
- _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+ _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
}
-err_out:
preempt_enable();
}
+void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize)
+{
+ return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
+}
+
+static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize)
+{
+ __radix__flush_tlb_range_psize(mm, start, end, psize, true);
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
{
- int local = mm_is_thread_local(mm);
- unsigned long ap = mmu_get_ap(mmu_virtual_psize);
unsigned long pid, end;
-
- pid = mm ? mm->context.id : 0;
+ pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
- goto no_context;
+ return;
/* 4k page size, just blow the world */
if (PAGE_SIZE == 0x1000) {
@@ -369,21 +522,17 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
return;
}
- /* Otherwise first do the PWC */
- if (local)
- _tlbiel_pid(pid, RIC_FLUSH_PWC);
- else
- _tlbie_pid(pid, RIC_FLUSH_PWC);
-
- /* Then iterate the pages */
end = addr + HPAGE_PMD_SIZE;
- for (; addr < end; addr += PAGE_SIZE) {
- if (local)
- _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
- else
- _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+
+ /* Otherwise first do the PWC, then iterate the pages. */
+ preempt_disable();
+
+ if (mm_is_thread_local(mm)) {
+ _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+ } else {
+ _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
}
-no_context:
+
preempt_enable();
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/mm/vphn.c b/arch/powerpc/mm/vphn.c
index 5f8ef50e5c66..f83044faac23 100644
--- a/arch/powerpc/mm/vphn.c
+++ b/arch/powerpc/mm/vphn.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <asm/byteorder.h>
#include "vphn.h"
diff --git a/arch/powerpc/mm/vphn.h b/arch/powerpc/mm/vphn.h
index fe8b7805b78f..f9ffdb3942fc 100644
--- a/arch/powerpc/mm/vphn.h
+++ b/arch/powerpc/mm/vphn.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ARCH_POWERPC_MM_VPHN_H_
#define _ARCH_POWERPC_MM_VPHN_H_