diff options
Diffstat (limited to 'arch/arm64/include/asm/tlbflush.h')
| -rw-r--r-- | arch/arm64/include/asm/tlbflush.h | 417 |
1 files changed, 311 insertions, 106 deletions
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index cc3f5a33ff9c..a2d65d7d6aae 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -8,11 +8,12 @@ #ifndef __ASM_TLBFLUSH_H #define __ASM_TLBFLUSH_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/bitfield.h> #include <linux/mm_types.h> #include <linux/sched.h> +#include <linux/mmu_notifier.h> #include <asm/cputype.h> #include <asm/mmu.h> @@ -93,19 +94,22 @@ static inline unsigned long get_trans_granule(void) * When ARMv8.4-TTL exists, TLBI operations take an additional hint for * the level at which the invalidation must take place. If the level is * wrong, no invalidation may take place. In the case where the level - * cannot be easily determined, a 0 value for the level parameter will - * perform a non-hinted invalidation. + * cannot be easily determined, the value TLBI_TTL_UNKNOWN will perform + * a non-hinted invalidation. Any provided level outside the hint range + * will also cause fall-back to non-hinted invalidation. * * For Stage-2 invalidation, use the level values provided to that effect * in asm/stage2_pgtable.h. */ #define TLBI_TTL_MASK GENMASK_ULL(47, 44) +#define TLBI_TTL_UNKNOWN INT_MAX + #define __tlbi_level(op, addr, level) do { \ u64 arg = addr; \ \ - if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) && \ - level) { \ + if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && \ + level >= 0 && level <= 3) { \ u64 ttl = level & 3; \ ttl |= get_trans_granule() << 2; \ arg &= ~TLBI_TTL_MASK; \ @@ -121,28 +125,41 @@ static inline unsigned long get_trans_granule(void) } while (0) /* - * This macro creates a properly formatted VA operand for the TLB RANGE. - * The value bit assignments are: + * This macro creates a properly formatted VA operand for the TLB RANGE. The + * value bit assignments are: * * +----------+------+-------+-------+-------+----------------------+ * | ASID | TG | SCALE | NUM | TTL | BADDR | * +-----------------+-------+-------+-------+----------------------+ * |63 48|47 46|45 44|43 39|38 37|36 0| * - * The address range is determined by below formula: - * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE) + * The address range is determined by below formula: [BADDR, BADDR + (NUM + 1) * + * 2^(5*SCALE + 1) * PAGESIZE) + * + * Note that the first argument, baddr, is pre-shifted; If LPA2 is in use, BADDR + * holds addr[52:16]. Else BADDR holds page number. See for example ARM DDI + * 0487J.a section C5.5.60 "TLBI VAE1IS, TLBI VAE1ISNXS, TLB Invalidate by VA, + * EL1, Inner Shareable". * */ -#define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \ - ({ \ - unsigned long __ta = (addr) >> PAGE_SHIFT; \ - __ta &= GENMASK_ULL(36, 0); \ - __ta |= (unsigned long)(ttl) << 37; \ - __ta |= (unsigned long)(num) << 39; \ - __ta |= (unsigned long)(scale) << 44; \ - __ta |= get_trans_granule() << 46; \ - __ta |= (unsigned long)(asid) << 48; \ - __ta; \ +#define TLBIR_ASID_MASK GENMASK_ULL(63, 48) +#define TLBIR_TG_MASK GENMASK_ULL(47, 46) +#define TLBIR_SCALE_MASK GENMASK_ULL(45, 44) +#define TLBIR_NUM_MASK GENMASK_ULL(43, 39) +#define TLBIR_TTL_MASK GENMASK_ULL(38, 37) +#define TLBIR_BADDR_MASK GENMASK_ULL(36, 0) + +#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \ + ({ \ + unsigned long __ta = 0; \ + unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \ + __ta |= FIELD_PREP(TLBIR_BADDR_MASK, baddr); \ + __ta |= FIELD_PREP(TLBIR_TTL_MASK, __ttl); \ + __ta |= FIELD_PREP(TLBIR_NUM_MASK, num); \ + __ta |= FIELD_PREP(TLBIR_SCALE_MASK, scale); \ + __ta |= FIELD_PREP(TLBIR_TG_MASK, get_trans_granule()); \ + __ta |= FIELD_PREP(TLBIR_ASID_MASK, asid); \ + __ta; \ }) /* These macros are used by the TLBI RANGE feature. */ @@ -151,12 +168,18 @@ static inline unsigned long get_trans_granule(void) #define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3) /* - * Generate 'num' values from -1 to 30 with -1 rejected by the - * __flush_tlb_range() loop below. + * Generate 'num' values from -1 to 31 with -1 rejected by the + * __flush_tlb_range() loop below. Its return value is only + * significant for a maximum of MAX_TLBI_RANGE_PAGES pages. If + * 'pages' is more than that, you must iterate over the overall + * range. */ -#define TLBI_RANGE_MASK GENMASK_ULL(4, 0) -#define __TLBI_RANGE_NUM(pages, scale) \ - ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1) +#define __TLBI_RANGE_NUM(pages, scale) \ + ({ \ + int __pages = min((pages), \ + __TLBI_RANGE_PAGES(31, (scale))); \ + (__pages >> (5 * (scale) + 1)) - 1; \ + }) /* * TLB Invalidation @@ -215,13 +238,30 @@ static inline unsigned long get_trans_granule(void) * CPUs, ensuring that any walk-cache entries associated with the * translation are also invalidated. * - * __flush_tlb_range(vma, start, end, stride, last_level) + * __flush_tlb_range(vma, start, end, stride, last_level, tlb_level) * Invalidate the virtual-address range '[start, end)' on all * CPUs for the user address space corresponding to 'vma->mm'. * The invalidation operations are issued at a granularity * determined by 'stride' and only affect any walk-cache entries - * if 'last_level' is equal to false. + * if 'last_level' is equal to false. tlb_level is the level at + * which the invalidation must take place. If the level is wrong, + * no invalidation may take place. In the case where the level + * cannot be easily determined, the value TLBI_TTL_UNKNOWN will + * perform a non-hinted invalidation. + * + * local_flush_tlb_page(vma, addr) + * Local variant of flush_tlb_page(). Stale TLB entries may + * remain in remote CPUs. * + * local_flush_tlb_page_nonotify(vma, addr) + * Same as local_flush_tlb_page() except MMU notifier will not be + * called. + * + * local_flush_tlb_contpte(vma, addr) + * Invalidate the virtual-address range + * '[addr, addr+CONT_PTE_SIZE)' mapped with contpte on local CPU + * for the user address space corresponding to 'vma->mm'. Stale + * TLB entries may remain in remote CPUs. * * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented * on top of these routines, since that is our interface to the mmu_gather @@ -245,22 +285,60 @@ static inline void flush_tlb_all(void) static inline void flush_tlb_mm(struct mm_struct *mm) { - unsigned long asid = __TLBI_VADDR(0, ASID(mm)); + unsigned long asid; dsb(ishst); + asid = __TLBI_VADDR(0, ASID(mm)); __tlbi(aside1is, asid); __tlbi_user(aside1is, asid); dsb(ish); + mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); } -static inline void flush_tlb_page_nosync(struct vm_area_struct *vma, - unsigned long uaddr) +static inline void __local_flush_tlb_page_nonotify_nosync(struct mm_struct *mm, + unsigned long uaddr) +{ + unsigned long addr; + + dsb(nshst); + addr = __TLBI_VADDR(uaddr, ASID(mm)); + __tlbi(vale1, addr); + __tlbi_user(vale1, addr); +} + +static inline void local_flush_tlb_page_nonotify(struct vm_area_struct *vma, + unsigned long uaddr) +{ + __local_flush_tlb_page_nonotify_nosync(vma->vm_mm, uaddr); + dsb(nsh); +} + +static inline void local_flush_tlb_page(struct vm_area_struct *vma, + unsigned long uaddr) +{ + __local_flush_tlb_page_nonotify_nosync(vma->vm_mm, uaddr); + mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, uaddr & PAGE_MASK, + (uaddr & PAGE_MASK) + PAGE_SIZE); + dsb(nsh); +} + +static inline void __flush_tlb_page_nosync(struct mm_struct *mm, + unsigned long uaddr) { - unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm)); + unsigned long addr; dsb(ishst); + addr = __TLBI_VADDR(uaddr, ASID(mm)); __tlbi(vale1is, addr); __tlbi_user(vale1is, addr); + mmu_notifier_arch_invalidate_secondary_tlbs(mm, uaddr & PAGE_MASK, + (uaddr & PAGE_MASK) + PAGE_SIZE); +} + +static inline void flush_tlb_page_nosync(struct vm_area_struct *vma, + unsigned long uaddr) +{ + return __flush_tlb_page_nosync(vma->vm_mm, uaddr); } static inline void flush_tlb_page(struct vm_area_struct *vma, @@ -270,121 +348,215 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, dsb(ish); } +static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm) +{ + /* + * TLB flush deferral is not required on systems which are affected by + * ARM64_WORKAROUND_REPEAT_TLBI, as __tlbi()/__tlbi_user() implementation + * will have two consecutive TLBI instructions with a dsb(ish) in between + * defeating the purpose (i.e save overall 'dsb ish' cost). + */ + if (alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI)) + return false; + + return true; +} + +/* + * To support TLB batched flush for multiple pages unmapping, we only send + * the TLBI for each page in arch_tlbbatch_add_pending() and wait for the + * completion at the end in arch_tlbbatch_flush(). Since we've already issued + * TLBI for each page so only a DSB is needed to synchronise its effect on the + * other CPUs. + * + * This will save the time waiting on DSB comparing issuing a TLBI;DSB sequence + * for each page. + */ +static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) +{ + dsb(ish); +} + /* * This is meant to avoid soft lock-ups on large TLB flushing ranges and not * necessarily a performance improvement. */ -#define MAX_TLBI_OPS PTRS_PER_PTE +#define MAX_DVM_OPS PTRS_PER_PTE -static inline void __flush_tlb_range(struct vm_area_struct *vma, +/* + * __flush_tlb_range_op - Perform TLBI operation upon a range + * + * @op: TLBI instruction that operates on a range (has 'r' prefix) + * @start: The start address of the range + * @pages: Range as the number of pages from 'start' + * @stride: Flush granularity + * @asid: The ASID of the task (0 for IPA instructions) + * @tlb_level: Translation Table level hint, if known + * @tlbi_user: If 'true', call an additional __tlbi_user() + * (typically for user ASIDs). 'flase' for IPA instructions + * @lpa2: If 'true', the lpa2 scheme is used as set out below + * + * When the CPU does not support TLB range operations, flush the TLB + * entries one by one at the granularity of 'stride'. If the TLB + * range ops are supported, then: + * + * 1. If FEAT_LPA2 is in use, the start address of a range operation must be + * 64KB aligned, so flush pages one by one until the alignment is reached + * using the non-range operations. This step is skipped if LPA2 is not in + * use. + * + * 2. The minimum range granularity is decided by 'scale', so multiple range + * TLBI operations may be required. Start from scale = 3, flush the largest + * possible number of pages ((num+1)*2^(5*scale+1)) that fit into the + * requested range, then decrement scale and continue until one or zero pages + * are left. We must start from highest scale to ensure 64KB start alignment + * is maintained in the LPA2 case. + * + * 3. If there is 1 page remaining, flush it through non-range operations. Range + * operations can only span an even number of pages. We save this for last to + * ensure 64KB start alignment is maintained for the LPA2 case. + */ +#define __flush_tlb_range_op(op, start, pages, stride, \ + asid, tlb_level, tlbi_user, lpa2) \ +do { \ + typeof(start) __flush_start = start; \ + typeof(pages) __flush_pages = pages; \ + int num = 0; \ + int scale = 3; \ + int shift = lpa2 ? 16 : PAGE_SHIFT; \ + unsigned long addr; \ + \ + while (__flush_pages > 0) { \ + if (!system_supports_tlb_range() || \ + __flush_pages == 1 || \ + (lpa2 && __flush_start != ALIGN(__flush_start, SZ_64K))) { \ + addr = __TLBI_VADDR(__flush_start, asid); \ + __tlbi_level(op, addr, tlb_level); \ + if (tlbi_user) \ + __tlbi_user_level(op, addr, tlb_level); \ + __flush_start += stride; \ + __flush_pages -= stride >> PAGE_SHIFT; \ + continue; \ + } \ + \ + num = __TLBI_RANGE_NUM(__flush_pages, scale); \ + if (num >= 0) { \ + addr = __TLBI_VADDR_RANGE(__flush_start >> shift, asid, \ + scale, num, tlb_level); \ + __tlbi(r##op, addr); \ + if (tlbi_user) \ + __tlbi_user(r##op, addr); \ + __flush_start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ + __flush_pages -= __TLBI_RANGE_PAGES(num, scale);\ + } \ + scale--; \ + } \ +} while (0) + +#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \ + __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled()); + +static inline bool __flush_tlb_range_limit_excess(unsigned long start, + unsigned long end, unsigned long pages, unsigned long stride) +{ + /* + * When the system does not support TLB range based flush + * operation, (MAX_DVM_OPS - 1) pages can be handled. But + * with TLB range based operation, MAX_TLBI_RANGE_PAGES + * pages can be handled. + */ + if ((!system_supports_tlb_range() && + (end - start) >= (MAX_DVM_OPS * stride)) || + pages > MAX_TLBI_RANGE_PAGES) + return true; + + return false; +} + +static inline void __flush_tlb_range_nosync(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long stride, bool last_level, int tlb_level) { - int num = 0; - int scale = 0; - unsigned long asid = ASID(vma->vm_mm); - unsigned long addr; - unsigned long pages; + unsigned long asid, pages; start = round_down(start, stride); end = round_up(end, stride); pages = (end - start) >> PAGE_SHIFT; - /* - * When not uses TLB range ops, we can handle up to - * (MAX_TLBI_OPS - 1) pages; - * When uses TLB range ops, we can handle up to - * (MAX_TLBI_RANGE_PAGES - 1) pages. - */ - if ((!system_supports_tlb_range() && - (end - start) >= (MAX_TLBI_OPS * stride)) || - pages >= MAX_TLBI_RANGE_PAGES) { - flush_tlb_mm(vma->vm_mm); + if (__flush_tlb_range_limit_excess(start, end, pages, stride)) { + flush_tlb_mm(mm); return; } dsb(ishst); + asid = ASID(mm); - /* - * When the CPU does not support TLB range operations, flush the TLB - * entries one by one at the granularity of 'stride'. If the the TLB - * range ops are supported, then: - * - * 1. If 'pages' is odd, flush the first page through non-range - * operations; - * - * 2. For remaining pages: the minimum range granularity is decided - * by 'scale', so multiple range TLBI operations may be required. - * Start from scale = 0, flush the corresponding number of pages - * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it - * until no pages left. - * - * Note that certain ranges can be represented by either num = 31 and - * scale or num = 0 and scale + 1. The loop below favours the latter - * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. - */ - while (pages > 0) { - if (!system_supports_tlb_range() || - pages % 2 == 1) { - addr = __TLBI_VADDR(start, asid); - if (last_level) { - __tlbi_level(vale1is, addr, tlb_level); - __tlbi_user_level(vale1is, addr, tlb_level); - } else { - __tlbi_level(vae1is, addr, tlb_level); - __tlbi_user_level(vae1is, addr, tlb_level); - } - start += stride; - pages -= stride >> PAGE_SHIFT; - continue; - } - - num = __TLBI_RANGE_NUM(pages, scale); - if (num >= 0) { - addr = __TLBI_VADDR_RANGE(start, asid, scale, - num, tlb_level); - if (last_level) { - __tlbi(rvale1is, addr); - __tlbi_user(rvale1is, addr); - } else { - __tlbi(rvae1is, addr); - __tlbi_user(rvae1is, addr); - } - start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; - pages -= __TLBI_RANGE_PAGES(num, scale); - } - scale++; - } + if (last_level) + __flush_tlb_range_op(vale1is, start, pages, stride, asid, + tlb_level, true, lpa2_is_enabled()); + else + __flush_tlb_range_op(vae1is, start, pages, stride, asid, + tlb_level, true, lpa2_is_enabled()); + + mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end); +} + +static inline void __flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + unsigned long stride, bool last_level, + int tlb_level) +{ + __flush_tlb_range_nosync(vma->vm_mm, start, end, stride, + last_level, tlb_level); dsb(ish); } +static inline void local_flush_tlb_contpte(struct vm_area_struct *vma, + unsigned long addr) +{ + unsigned long asid; + + addr = round_down(addr, CONT_PTE_SIZE); + + dsb(nshst); + asid = ASID(vma->vm_mm); + __flush_tlb_range_op(vale1, addr, CONT_PTES, PAGE_SIZE, asid, + 3, true, lpa2_is_enabled()); + mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, addr, + addr + CONT_PTE_SIZE); + dsb(nsh); +} + static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { /* * We cannot use leaf-only invalidation here, since we may be invalidating * table entries as part of collapsing hugepages or moving page tables. - * Set the tlb_level to 0 because we can not get enough information here. + * Set the tlb_level to TLBI_TTL_UNKNOWN because we can not get enough + * information here. */ - __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0); + __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - unsigned long addr; + const unsigned long stride = PAGE_SIZE; + unsigned long pages; + + start = round_down(start, stride); + end = round_up(end, stride); + pages = (end - start) >> PAGE_SHIFT; - if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) { + if (__flush_tlb_range_limit_excess(start, end, pages, stride)) { flush_tlb_all(); return; } - start = __TLBI_VADDR(start, 0); - end = __TLBI_VADDR(end, 0); - dsb(ishst); - for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - __tlbi(vaale1is, addr); + __flush_tlb_range_op(vaale1is, start, pages, stride, 0, + TLBI_TTL_UNKNOWN, false, lpa2_is_enabled()); dsb(ish); isb(); } @@ -402,6 +574,39 @@ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) dsb(ish); isb(); } + +static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, + struct mm_struct *mm, unsigned long start, unsigned long end) +{ + __flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3); +} + +static inline bool __pte_flags_need_flush(ptdesc_t oldval, ptdesc_t newval) +{ + ptdesc_t diff = oldval ^ newval; + + /* invalid to valid transition requires no flush */ + if (!(oldval & PTE_VALID)) + return false; + + /* Transition in the SW bits requires no flush */ + diff &= ~PTE_SWBITS_MASK; + + return diff; +} + +static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) +{ + return __pte_flags_need_flush(pte_val(oldpte), pte_val(newpte)); +} +#define pte_needs_flush pte_needs_flush + +static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) +{ + return __pte_flags_need_flush(pmd_val(oldpmd), pmd_val(newpmd)); +} +#define huge_pmd_needs_flush huge_pmd_needs_flush + #endif #endif |
