summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Piggin <npiggin@gmail.com>2017-11-07 18:53:08 +1100
committerMichael Ellerman <mpe@ellerman.id.au>2017-11-10 21:33:35 +1100
commitf6f27951fdf84a6edca3ea14077268ad629b57ac (patch)
tree34094279dbe3a00bc60d4a463c9d32449e294de4
parentcbf09c837720f72f5e63ab7a2d331ec6cc9a3417 (diff)
powerpc/64s/radix: Introduce local single page ceiling for TLB range flush
The single page flush ceiling is the cut-off point at which we switch from invalidating individual pages, to invalidating the entire process address space in response to a range flush. Introduce a local variant of this heuristic because local and global tlbie have significantly different properties: - Local tlbiel requires 128 instructions to invalidate a PID, global tlbie only 1 instruction. - Global tlbie instructions are expensive broadcast operations. The local ceiling has been made much higher, 2x the number of instructions required to invalidate the entire PID (i.e., 256 pages). Time to mprotect N pages of memory (after mmap, touch), local invalidate: N 32 34 64 128 256 512 vanilla 7.4us 9.0us 14.6us 26.4us 50.2us 98.3us patched 7.4us 7.8us 13.8us 26.4us 51.9us 98.3us The behaviour of both is identical at N=32 and N=512. Between there, the vanilla kernel does a PID invalidate and the patched kernel does a va range invalidate. At N=128, these require the same number of tlbiel instructions, so the patched version can be sen to be cheaper when < 128, and more expensive when > 128. However this does not well capture the cost of invalidated TLB. The additional cost at 256 pages does not seem prohibitive. It may be the case that increasing the limit further would continue to be beneficial to avoid invalidating all of the process's TLB entries. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r--arch/powerpc/mm/tlb-radix.c23
1 files changed, 19 insertions, 4 deletions
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index b4b49de551a9..cfa08da534a7 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -326,6 +326,7 @@ EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
* individual page flushes to full-pid flushes.
*/
static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
+static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
@@ -348,8 +349,15 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
return;
preempt_disable();
- local = mm_is_thread_local(mm);
- full = (end == TLB_FLUSH_ALL || nr_pages > tlb_single_page_flush_ceiling);
+ if (mm_is_thread_local(mm)) {
+ local = true;
+ full = (end == TLB_FLUSH_ALL ||
+ nr_pages > tlb_local_single_page_flush_ceiling);
+ } else {
+ local = false;
+ full = (end == TLB_FLUSH_ALL ||
+ nr_pages > tlb_single_page_flush_ceiling);
+ }
if (full) {
if (local)
@@ -441,8 +449,15 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
return;
preempt_disable();
- local = mm_is_thread_local(mm);
- full = (end == TLB_FLUSH_ALL || nr_pages > tlb_single_page_flush_ceiling);
+ if (mm_is_thread_local(mm)) {
+ local = true;
+ full = (end == TLB_FLUSH_ALL ||
+ nr_pages > tlb_local_single_page_flush_ceiling);
+ } else {
+ local = false;
+ full = (end == TLB_FLUSH_ALL ||
+ nr_pages > tlb_single_page_flush_ceiling);
+ }
if (full) {
if (local)