summaryrefslogtreecommitdiff
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
authorNicholas Piggin <npiggin@gmail.com>2017-11-07 18:53:07 +1100
committerMichael Ellerman <mpe@ellerman.id.au>2017-11-10 21:33:33 +1100
commitcbf09c837720f72f5e63ab7a2d331ec6cc9a3417 (patch)
tree7af2917fe54977c8068dcc75d3cfa65fd4638a48 /arch/powerpc/mm
parentd665767e39fa4a9e725f92d77ba2060c5ce273dc (diff)
powerpc/64s/radix: Optimize flush_tlb_range
Currently for radix, flush_tlb_range flushes the entire PID, because the Linux mm code does not tell us about page size here for THP vs regular pages. This is quite sub-optimal for small mremap / mprotect / change_protection. So implement va range flushes with two flush passes, one for each page size (regular and THP). The second flush has an order of matnitude fewer tlbie instructions than the first, so it is a relatively small additional cost. There is still room for improvement here with some changes to generic APIs, particularly if there are mostly THP pages to be invalidated, the small page flushes could be reduced. Time to mprotect 1 page of memory (after mmap, touch): vanilla 2.9us 1.8us patched 1.2us 1.6us Time to mprotect 30 pages of memory (after mmap, touch): vanilla 8.2us 7.2us patched 6.9us 17.9us Time to mprotect 34 pages of memory (after mmap, touch): vanilla 9.1us 8.0us patched 9.0us 8.0us 34 pages is the point at which the invalidation switches from va to entire PID, which tlbie can do in a single instruction. This is why in the case of 30 pages, the new code runs slower for this test. This is a deliberate tradeoff already present in the unmap and THP promotion code, the idea is that the benefit from avoiding flushing entire TLB for this PID on all threads in the system. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/tlb-radix.c138
1 files changed, 103 insertions, 35 deletions
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 9916ea2fff43..b4b49de551a9 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -100,6 +100,17 @@ static inline void __tlbiel_va(unsigned long va, unsigned long pid,
trace_tlbie(0, 1, rb, rs, ric, prs, r);
}
+static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
+{
+ unsigned long addr;
+ unsigned long ap = mmu_get_ap(psize);
+
+ for (addr = start; addr < end; addr += page_size)
+ __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
+}
+
static inline void _tlbiel_va(unsigned long va, unsigned long pid,
unsigned long psize, unsigned long ric)
{
@@ -114,12 +125,8 @@ static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
unsigned long pid, unsigned long page_size,
unsigned long psize)
{
- unsigned long addr;
- unsigned long ap = mmu_get_ap(psize);
-
asm volatile("ptesync": : :"memory");
- for (addr = start; addr < end; addr += page_size)
- __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
+ __tlbiel_va_range(start, end, pid, page_size, psize);
asm volatile("ptesync": : :"memory");
}
@@ -139,6 +146,17 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid,
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
+static inline void __tlbie_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
+{
+ unsigned long addr;
+ unsigned long ap = mmu_get_ap(psize);
+
+ for (addr = start; addr < end; addr += page_size)
+ __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+}
+
static inline void _tlbie_va(unsigned long va, unsigned long pid,
unsigned long psize, unsigned long ric)
{
@@ -153,12 +171,8 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
unsigned long pid, unsigned long page_size,
unsigned long psize)
{
- unsigned long addr;
- unsigned long ap = mmu_get_ap(psize);
-
asm volatile("ptesync": : :"memory");
- for (addr = start; addr < end; addr += page_size)
- __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+ __tlbie_va_range(start, end, pid, page_size, psize);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@@ -300,17 +314,78 @@ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
}
EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
+#define TLB_FLUSH_ALL -1UL
+
/*
- * Currently, for range flushing, we just do a full mm flush. Because
- * we use this in code path where we don' track the page size.
+ * Number of pages above which we invalidate the entire PID rather than
+ * flush individual pages, for local and global flushes respectively.
+ *
+ * tlbie goes out to the interconnect and individual ops are more costly.
+ * It also does not iterate over sets like the local tlbiel variant when
+ * invalidating a full PID, so it has a far lower threshold to change from
+ * individual page flushes to full-pid flushes.
*/
+static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
+
void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
struct mm_struct *mm = vma->vm_mm;
+ unsigned long pid;
+ unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
+ unsigned long page_size = 1UL << page_shift;
+ unsigned long nr_pages = (end - start) >> page_shift;
+ bool local, full;
+
+#ifdef CONFIG_HUGETLB_PAGE
+ if (is_vm_hugetlb_page(vma))
+ return radix__flush_hugetlb_tlb_range(vma, start, end);
+#endif
- radix__flush_tlb_mm(mm);
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ return;
+
+ preempt_disable();
+ local = mm_is_thread_local(mm);
+ full = (end == TLB_FLUSH_ALL || nr_pages > tlb_single_page_flush_ceiling);
+
+ if (full) {
+ if (local)
+ _tlbiel_pid(pid, RIC_FLUSH_TLB);
+ else
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ } else {
+ bool hflush = false;
+ unsigned long hstart, hend;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT;
+ hend = end >> HPAGE_PMD_SHIFT;
+ if (hstart < hend) {
+ hstart <<= HPAGE_PMD_SHIFT;
+ hend <<= HPAGE_PMD_SHIFT;
+ hflush = true;
+ }
+#endif
+
+ asm volatile("ptesync": : :"memory");
+ if (local) {
+ __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
+ if (hflush)
+ __tlbiel_va_range(hstart, hend, pid,
+ HPAGE_PMD_SIZE, MMU_PAGE_2M);
+ asm volatile("ptesync": : :"memory");
+ } else {
+ __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
+ if (hflush)
+ __tlbie_va_range(hstart, hend, pid,
+ HPAGE_PMD_SIZE, MMU_PAGE_2M);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+ }
+ preempt_enable();
}
EXPORT_SYMBOL(radix__flush_tlb_range);
@@ -352,19 +427,14 @@ void radix__tlb_flush(struct mmu_gather *tlb)
radix__flush_tlb_mm(mm);
}
-#define TLB_FLUSH_ALL -1UL
-/*
- * Number of pages above which we will do a bcast tlbie. Just a
- * number at this point copied from x86
- */
-static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
-
void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
unsigned long end, int psize)
{
unsigned long pid;
- bool local;
- unsigned long page_size = 1UL << mmu_psize_defs[psize].shift;
+ unsigned int page_shift = mmu_psize_defs[psize].shift;
+ unsigned long page_size = 1UL << page_shift;
+ unsigned long nr_pages = (end - start) >> page_shift;
+ bool local, full;
pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
@@ -372,8 +442,9 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
preempt_disable();
local = mm_is_thread_local(mm);
- if (end == TLB_FLUSH_ALL ||
- (end - start) > tlb_single_page_flush_ceiling * page_size) {
+ full = (end == TLB_FLUSH_ALL || nr_pages > tlb_single_page_flush_ceiling);
+
+ if (full) {
if (local)
_tlbiel_pid(pid, RIC_FLUSH_TLB);
else
@@ -391,7 +462,6 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
{
unsigned long pid, end;
- bool local;
pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
@@ -403,20 +473,18 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
return;
}
+ end = addr + HPAGE_PMD_SIZE;
+
+ /* Otherwise first do the PWC, then iterate the pages. */
preempt_disable();
- local = mm_is_thread_local(mm);
- /* Otherwise first do the PWC */
- if (local)
- _tlbiel_pid(pid, RIC_FLUSH_PWC);
- else
- _tlbie_pid(pid, RIC_FLUSH_PWC);
- /* Then iterate the pages */
- end = addr + HPAGE_PMD_SIZE;
- if (local)
+ if (mm_is_thread_local(mm)) {
+ _tlbiel_pid(pid, RIC_FLUSH_PWC);
_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize);
- else
+ } else {
+ _tlbie_pid(pid, RIC_FLUSH_PWC);
_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize);
+ }
preempt_enable();
}