From f048aace29e007f2b642097e2da8231e0e9cce2d Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 18 Dec 2008 19:13:38 +0000 Subject: powerpc/mm: Add SMP support to no-hash TLB handling This commit moves the whole no-hash TLB handling out of line into a new tlb_nohash.c file, and implements some basic SMP support using IPIs and/or broadcast tlbivax instructions. Note that I'm using local invalidations for D->I cache coherency. At worst, if another processor is trying to execute the same and has the old entry in its TLB, it will just take a fault and re-do the TLB flush locally (it won't re-do the cache flush in any case). Signed-off-by: Benjamin Herrenschmidt Acked-by: Kumar Gala Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/highmem.h | 4 +- arch/powerpc/include/asm/mmu.h | 16 +++ arch/powerpc/include/asm/tlbflush.h | 84 +++++++-------- arch/powerpc/kernel/misc_32.S | 9 ++ arch/powerpc/kernel/ppc_ksyms.c | 6 -- arch/powerpc/mm/Makefile | 2 +- arch/powerpc/mm/fault.c | 2 +- arch/powerpc/mm/mem.c | 2 +- arch/powerpc/mm/tlb_hash32.c | 4 + arch/powerpc/mm/tlb_nohash.c | 209 ++++++++++++++++++++++++++++++++++++ 10 files changed, 281 insertions(+), 57 deletions(-) create mode 100644 arch/powerpc/mm/tlb_nohash.c diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h index 7dc52eca8b67..fd97e501aa6a 100644 --- a/arch/powerpc/include/asm/highmem.h +++ b/arch/powerpc/include/asm/highmem.h @@ -85,7 +85,7 @@ static inline void *kmap_atomic_prot(struct page *page, enum km_type type, pgpro BUG_ON(!pte_none(*(kmap_pte-idx))); #endif __set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot)); - local_flush_tlb_page(vaddr); + local_flush_tlb_page(NULL, vaddr); return (void*) vaddr; } @@ -113,7 +113,7 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type) * this pte without first remap it */ pte_clear(&init_mm, vaddr, kmap_pte-idx); - local_flush_tlb_page(vaddr); + local_flush_tlb_page(NULL, vaddr); #endif pagefault_enable(); } diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index dc8c0aef5e6c..6e7639911318 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -30,6 +30,22 @@ */ #define MMU_FTR_BIG_PHYS ASM_CONST(0x00020000) +/* Enable use of broadcast TLB invalidations. We don't always set it + * on processors that support it due to other constraints with the + * use of such invalidations + */ +#define MMU_FTR_USE_TLBIVAX_BCAST ASM_CONST(0x00040000) + +/* Enable use of tlbilx invalidate-by-PID variant. + */ +#define MMU_FTR_USE_TLBILX_PID ASM_CONST(0x00080000) + +/* This indicates that the processor cannot handle multiple outstanding + * broadcast tlbivax or tlbsync. This makes the code use a spinlock + * around such invalidate forms. + */ +#define MMU_FTR_LOCK_BCAST_INVAL ASM_CONST(0x00100000) + #ifndef __ASSEMBLY__ #include diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 9ed363d3de44..8c39b27c1ed7 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -6,7 +6,9 @@ * * - flush_tlb_mm(mm) flushes the specified mm context TLB's * - flush_tlb_page(vma, vmaddr) flushes one page - * - local_flush_tlb_page(vmaddr) flushes one page on the local processor + * - local_flush_tlb_mm(mm) flushes the specified mm context on + * the local processor + * - local_flush_tlb_page(vma, vmaddr) flushes one page on the local processor * - flush_tlb_page_nohash(vma, vmaddr) flushes one page if SW loaded TLB * - flush_tlb_range(vma, start, end) flushes a range of pages * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages @@ -18,7 +20,7 @@ */ #ifdef __KERNEL__ -#if defined(CONFIG_4xx) || defined(CONFIG_8xx) || defined(CONFIG_FSL_BOOKE) +#ifdef CONFIG_PPC_MMU_NOHASH /* * TLB flushing for software loaded TLB chips * @@ -31,10 +33,10 @@ #define MMU_NO_CONTEXT ((unsigned int)-1) -extern void _tlbie(unsigned long address, unsigned int pid); extern void _tlbil_all(void); extern void _tlbil_pid(unsigned int pid); extern void _tlbil_va(unsigned long address, unsigned int pid); +extern void _tlbivax_bcast(unsigned long address, unsigned int pid); #if defined(CONFIG_40x) || defined(CONFIG_8xx) #define _tlbia() asm volatile ("tlbia; sync" : : : "memory") @@ -42,48 +44,26 @@ extern void _tlbil_va(unsigned long address, unsigned int pid); extern void _tlbia(void); #endif -static inline void local_flush_tlb_mm(struct mm_struct *mm) -{ - _tlbil_pid(mm->context.id); -} - -static inline void flush_tlb_mm(struct mm_struct *mm) -{ - _tlbil_pid(mm->context.id); -} - -static inline void local_flush_tlb_page(unsigned long vmaddr) -{ - _tlbil_va(vmaddr, 0); -} - -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long vmaddr) -{ - _tlbil_va(vmaddr, vma ? vma->vm_mm->context.id : 0); -} +extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); +extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); -static inline void flush_tlb_page_nohash(struct vm_area_struct *vma, - unsigned long vmaddr) -{ - flush_tlb_page(vma, vmaddr); -} +extern void local_flush_tlb_mm(struct mm_struct *mm); +extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - _tlbil_pid(vma->vm_mm->context.id); -} +#ifdef CONFIG_SMP +extern void flush_tlb_mm(struct mm_struct *mm); +extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +#else +#define flush_tlb_mm(mm) local_flush_tlb_mm(mm) +#define flush_tlb_page(vma,addr) local_flush_tlb_page(vma,addr) +#endif +#define flush_tlb_page_nohash(vma,addr) flush_tlb_page(vma,addr) -static inline void flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ - _tlbil_pid(0); -} +#elif defined(CONFIG_PPC_STD_MMU_32) -#elif defined(CONFIG_PPC32) /* - * TLB flushing for "classic" hash-MMMU 32-bit CPUs, 6xx, 7xx, 7xxx + * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx */ extern void _tlbie(unsigned long address); extern void _tlbia(void); @@ -94,14 +74,20 @@ extern void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); -static inline void local_flush_tlb_page(unsigned long vmaddr) +static inline void local_flush_tlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) { - flush_tlb_page(NULL, vmaddr); + flush_tlb_page(vma, vmaddr); +} +static inline void local_flush_tlb_mm(struct mm_struct *mm) +{ + flush_tlb_mm(mm); } -#else +#elif defined(CONFIG_PPC_STD_MMU_64) + /* - * TLB flushing for 64-bit has-MMU CPUs + * TLB flushing for 64-bit hash-MMU CPUs */ #include @@ -151,11 +137,16 @@ extern void flush_hash_page(unsigned long va, real_pte_t pte, int psize, extern void flush_hash_range(unsigned long number, int local); +static inline void local_flush_tlb_mm(struct mm_struct *mm) +{ +} + static inline void flush_tlb_mm(struct mm_struct *mm) { } -static inline void local_flush_tlb_page(unsigned long vmaddr) +static inline void local_flush_tlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) { } @@ -183,7 +174,8 @@ static inline void flush_tlb_kernel_range(unsigned long start, extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, unsigned long end); - +#else +#error Unsupported MMU type #endif #endif /*__KERNEL__ */ diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 5c33bc14bd9f..2c2ab89f0b64 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -29,6 +29,7 @@ #include #include #include +#include .text @@ -496,6 +497,14 @@ _GLOBAL(_tlbil_va) blr #endif /* CONFIG_FSL_BOOKE */ +/* + * Nobody implements this yet + */ +_GLOBAL(_tlbivax_bcast) +1: trap + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; + blr + /* * Flush instruction cache. diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 341b3d3048e0..dcec1325d340 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -116,12 +116,6 @@ EXPORT_SYMBOL(giveup_spe); #ifndef CONFIG_PPC64 EXPORT_SYMBOL(flush_instruction_cache); -EXPORT_SYMBOL(flush_tlb_kernel_range); -EXPORT_SYMBOL(flush_tlb_page); -EXPORT_SYMBOL(_tlbie); -#if defined(CONFIG_4xx) || defined(CONFIG_8xx) || defined(CONFIG_FSL_BOOKE) -EXPORT_SYMBOL(_tlbil_va); -#endif #endif EXPORT_SYMBOL(__flush_icache_range); EXPORT_SYMBOL(flush_dcache_range); diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 923bd3fa7d64..af987df8d5a3 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -9,7 +9,7 @@ endif obj-y := fault.o mem.o pgtable.o \ init_$(CONFIG_WORD_SIZE).o \ pgtable_$(CONFIG_WORD_SIZE).o -obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o +obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o hash-$(CONFIG_PPC_NATIVE) := hash_native_64.o obj-$(CONFIG_PPC64) += hash_utils_64.o \ slb_low.o slb.o stab.o \ diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 7df0409107ad..87f1f955dea4 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -284,7 +284,7 @@ good_area: } pte_update(ptep, 0, _PAGE_HWEXEC | _PAGE_ACCESSED); - _tlbie(address, mm->context.id); + local_flush_tlb_page(vma, address); pte_unmap_unlock(ptep, ptl); up_read(&mm->mmap_sem); return 0; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index b9e1a1da6e52..8fee696fb795 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -488,7 +488,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, * we invalidate the TLB here, thus avoiding dcbst * misbehaviour. */ - _tlbie(address, 0 /* 8xx doesn't care about PID */); + _tlbil_va(address, 0 /* 8xx doesn't care about PID */); #endif /* The _PAGE_USER test should really be _PAGE_EXEC, but * older glibc versions execute some code from no-exec diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c index f9a47fee3927..65190587a365 100644 --- a/arch/powerpc/mm/tlb_hash32.c +++ b/arch/powerpc/mm/tlb_hash32.c @@ -137,6 +137,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) flush_range(&init_mm, start, end); FINISH_FLUSH; } +EXPORT_SYMBOL(flush_tlb_kernel_range); /* * Flush all the (user) entries for the address space described by mm. @@ -160,6 +161,7 @@ void flush_tlb_mm(struct mm_struct *mm) flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); FINISH_FLUSH; } +EXPORT_SYMBOL(flush_tlb_mm); void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { @@ -176,6 +178,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); FINISH_FLUSH; } +EXPORT_SYMBOL(flush_tlb_page); /* * For each address in the range, find the pte for the address @@ -188,3 +191,4 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, flush_range(vma->vm_mm, start, end); FINISH_FLUSH; } +EXPORT_SYMBOL(flush_tlb_range); diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c new file mode 100644 index 000000000000..803a64c02b06 --- /dev/null +++ b/arch/powerpc/mm/tlb_nohash.c @@ -0,0 +1,209 @@ +/* + * This file contains the routines for TLB flushing. + * On machines where the MMU does not use a hash table to store virtual to + * physical translations (ie, SW loaded TLBs or Book3E compilant processors, + * this does -not- include 603 however which shares the implementation with + * hash based processors) + * + * -- BenH + * + * Copyright 2008 Ben Herrenschmidt + * IBM Corp. + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "mmu_decl.h" + +/* + * Base TLB flushing operations: + * + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(vma, start, end) flushes a range of pages + * - flush_tlb_kernel_range(start, end) flushes kernel pages + * + * - local_* variants of page and mm only apply to the current + * processor + */ + +/* + * These are the base non-SMP variants of page and mm flushing + */ +void local_flush_tlb_mm(struct mm_struct *mm) +{ + unsigned int pid; + + preempt_disable(); + pid = mm->context.id; + if (pid != MMU_NO_CONTEXT) + _tlbil_pid(pid); + preempt_enable(); +} +EXPORT_SYMBOL(local_flush_tlb_mm); + +void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +{ + unsigned int pid; + + preempt_disable(); + pid = vma ? vma->vm_mm->context.id : 0; + if (pid != MMU_NO_CONTEXT) + _tlbil_va(vmaddr, pid); + preempt_enable(); +} +EXPORT_SYMBOL(local_flush_tlb_page); + + +/* + * And here are the SMP non-local implementations + */ +#ifdef CONFIG_SMP + +static DEFINE_SPINLOCK(tlbivax_lock); + +struct tlb_flush_param { + unsigned long addr; + unsigned int pid; +}; + +static void do_flush_tlb_mm_ipi(void *param) +{ + struct tlb_flush_param *p = param; + + _tlbil_pid(p ? p->pid : 0); +} + +static void do_flush_tlb_page_ipi(void *param) +{ + struct tlb_flush_param *p = param; + + _tlbil_va(p->addr, p->pid); +} + + +/* Note on invalidations and PID: + * + * We snapshot the PID with preempt disabled. At this point, it can still + * change either because: + * - our context is being stolen (PID -> NO_CONTEXT) on another CPU + * - we are invaliating some target that isn't currently running here + * and is concurrently acquiring a new PID on another CPU + * - some other CPU is re-acquiring a lost PID for this mm + * etc... + * + * However, this shouldn't be a problem as we only guarantee + * invalidation of TLB entries present prior to this call, so we + * don't care about the PID changing, and invalidating a stale PID + * is generally harmless. + */ + +void flush_tlb_mm(struct mm_struct *mm) +{ + cpumask_t cpu_mask; + unsigned int pid; + + preempt_disable(); + pid = mm->context.id; + if (unlikely(pid == MMU_NO_CONTEXT)) + goto no_context; + cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + if (!cpus_empty(cpu_mask)) { + struct tlb_flush_param p = { .pid = pid }; + smp_call_function_mask(cpu_mask, do_flush_tlb_mm_ipi, &p, 1); + } + _tlbil_pid(pid); + no_context: + preempt_enable(); +} +EXPORT_SYMBOL(flush_tlb_mm); + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +{ + cpumask_t cpu_mask; + unsigned int pid; + + preempt_disable(); + pid = vma ? vma->vm_mm->context.id : 0; + if (unlikely(pid == MMU_NO_CONTEXT)) + goto bail; + cpu_mask = vma->vm_mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + if (!cpus_empty(cpu_mask)) { + /* If broadcast tlbivax is supported, use it */ + if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) { + int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL); + if (lock) + spin_lock(&tlbivax_lock); + _tlbivax_bcast(vmaddr, pid); + if (lock) + spin_unlock(&tlbivax_lock); + goto bail; + } else { + struct tlb_flush_param p = { .pid = pid, .addr = vmaddr }; + smp_call_function_mask(cpu_mask, + do_flush_tlb_page_ipi, &p, 1); + } + } + _tlbil_va(vmaddr, pid); + bail: + preempt_enable(); +} +EXPORT_SYMBOL(flush_tlb_page); + +#endif /* CONFIG_SMP */ + +/* + * Flush kernel TLB entries in the given range + */ +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ +#ifdef CONFIG_SMP + preempt_disable(); + smp_call_function(do_flush_tlb_mm_ipi, NULL, 1); + _tlbil_pid(0); + preempt_enable(); +#endif + _tlbil_pid(0); +} +EXPORT_SYMBOL(flush_tlb_kernel_range); + +/* + * Currently, for range flushing, we just do a full mm flush. This should + * be optimized based on a threshold on the size of the range, since + * some implementation can stack multiple tlbivax before a tlbsync but + * for now, we keep it that way + */ +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) + +{ + flush_tlb_mm(vma->vm_mm); +} +EXPORT_SYMBOL(flush_tlb_range); -- cgit