diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-04 14:47:13 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-04 14:47:13 -0800 |
commit | 2dc10ad81fc017837037e60439662e1b16bdffb9 (patch) | |
tree | fc2f77874339b2f79499e3b34dc5ecb496b68dfc /arch/arm64/mm/context.c | |
parent | e627078a0cbdc0c391efeb5a2c4eb287328fd633 (diff) | |
parent | f8f8bdc48851da979c6e0e4808b6031122e4af47 (diff) |
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas:
- "genirq: Introduce generic irq migration for cpu hotunplugged" patch
merged from tip/irq/for-arm to allow the arm64-specific part to be
upstreamed via the arm64 tree
- CPU feature detection reworked to cope with heterogeneous systems
where CPUs may not have exactly the same features. The features
reported by the kernel via internal data structures or ELF_HWCAP are
delayed until all the CPUs are up (and before user space starts)
- Support for 16KB pages, with the additional bonus of a 36-bit VA
space, though the latter only depending on EXPERT
- Implement native {relaxed, acquire, release} atomics for arm64
- New ASID allocation algorithm which avoids IPI on roll-over, together
with TLB invalidation optimisations (using local vs global where
feasible)
- KASan support for arm64
- EFI_STUB clean-up and isolation for the kernel proper (required by
KASan)
- copy_{to,from,in}_user optimisations (sharing the memcpy template)
- perf: moving arm64 to the arm32/64 shared PMU framework
- L1_CACHE_BYTES increased to 128 to accommodate Cavium hardware
- Support for the contiguous PTE hint on kernel mapping (16 consecutive
entries may be able to use a single TLB entry)
- Generic CONFIG_HZ now used on arm64
- defconfig updates
* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (91 commits)
arm64/efi: fix libstub build under CONFIG_MODVERSIONS
ARM64: Enable multi-core scheduler support by default
arm64/efi: move arm64 specific stub C code to libstub
arm64: page-align sections for DEBUG_RODATA
arm64: Fix build with CONFIG_ZONE_DMA=n
arm64: Fix compat register mappings
arm64: Increase the max granular size
arm64: remove bogus TASK_SIZE_64 check
arm64: make Timer Interrupt Frequency selectable
arm64/mm: use PAGE_ALIGNED instead of IS_ALIGNED
arm64: cachetype: fix definitions of ICACHEF_* flags
arm64: cpufeature: declare enable_cpu_capabilities as static
genirq: Make the cpuhotplug migration code less noisy
arm64: Constify hwcap name string arrays
arm64/kvm: Make use of the system wide safe values
arm64/debug: Make use of the system wide safe value
arm64: Move FP/ASIMD hwcap handling to common code
arm64/HWCAP: Use system wide safe values
arm64/capabilities: Make use of system wide safe value
arm64: Delay cpu feature capability checks
...
Diffstat (limited to 'arch/arm64/mm/context.c')
-rw-r--r-- | arch/arm64/mm/context.c | 236 |
1 files changed, 143 insertions, 93 deletions
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index d70ff14dbdbd..f636a2639f03 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -17,135 +17,185 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <linux/init.h> +#include <linux/bitops.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/mm.h> -#include <linux/smp.h> -#include <linux/percpu.h> +#include <asm/cpufeature.h> #include <asm/mmu_context.h> #include <asm/tlbflush.h> -#include <asm/cachetype.h> -#define asid_bits(reg) \ - (((read_cpuid(ID_AA64MMFR0_EL1) & 0xf0) >> 2) + 8) +static u32 asid_bits; +static DEFINE_RAW_SPINLOCK(cpu_asid_lock); -#define ASID_FIRST_VERSION (1 << MAX_ASID_BITS) +static atomic64_t asid_generation; +static unsigned long *asid_map; -static DEFINE_RAW_SPINLOCK(cpu_asid_lock); -unsigned int cpu_last_asid = ASID_FIRST_VERSION; +static DEFINE_PER_CPU(atomic64_t, active_asids); +static DEFINE_PER_CPU(u64, reserved_asids); +static cpumask_t tlb_flush_pending; -/* - * We fork()ed a process, and we need a new context for the child to run in. - */ -void __init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - mm->context.id = 0; - raw_spin_lock_init(&mm->context.id_lock); -} +#define ASID_MASK (~GENMASK(asid_bits - 1, 0)) +#define ASID_FIRST_VERSION (1UL << asid_bits) +#define NUM_USER_ASIDS ASID_FIRST_VERSION -static void flush_context(void) +static void flush_context(unsigned int cpu) { - /* set the reserved TTBR0 before flushing the TLB */ - cpu_set_reserved_ttbr0(); - flush_tlb_all(); - if (icache_is_aivivt()) - __flush_icache_all(); -} + int i; + u64 asid; -static void set_mm_context(struct mm_struct *mm, unsigned int asid) -{ - unsigned long flags; + /* Update the list of reserved ASIDs and the ASID bitmap. */ + bitmap_clear(asid_map, 0, NUM_USER_ASIDS); /* - * Locking needed for multi-threaded applications where the same - * mm->context.id could be set from different CPUs during the - * broadcast. This function is also called via IPI so the - * mm->context.id_lock has to be IRQ-safe. + * Ensure the generation bump is observed before we xchg the + * active_asids. */ - raw_spin_lock_irqsave(&mm->context.id_lock, flags); - if (likely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) { + smp_wmb(); + + for_each_possible_cpu(i) { + asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); /* - * Old version of ASID found. Set the new one and reset - * mm_cpumask(mm). + * If this CPU has already been through a + * rollover, but hasn't run another task in + * the meantime, we must preserve its reserved + * ASID, as this is the only trace we have of + * the process it is still running. */ - mm->context.id = asid; - cpumask_clear(mm_cpumask(mm)); + if (asid == 0) + asid = per_cpu(reserved_asids, i); + __set_bit(asid & ~ASID_MASK, asid_map); + per_cpu(reserved_asids, i) = asid; } - raw_spin_unlock_irqrestore(&mm->context.id_lock, flags); - /* - * Set the mm_cpumask(mm) bit for the current CPU. - */ - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); + /* Queue a TLB invalidate and flush the I-cache if necessary. */ + cpumask_setall(&tlb_flush_pending); + + if (icache_is_aivivt()) + __flush_icache_all(); } -/* - * Reset the ASID on the current CPU. This function call is broadcast from the - * CPU handling the ASID rollover and holding cpu_asid_lock. - */ -static void reset_context(void *info) +static int is_reserved_asid(u64 asid) +{ + int cpu; + for_each_possible_cpu(cpu) + if (per_cpu(reserved_asids, cpu) == asid) + return 1; + return 0; +} + +static u64 new_context(struct mm_struct *mm, unsigned int cpu) { - unsigned int asid; - unsigned int cpu = smp_processor_id(); - struct mm_struct *mm = current->active_mm; + static u32 cur_idx = 1; + u64 asid = atomic64_read(&mm->context.id); + u64 generation = atomic64_read(&asid_generation); + + if (asid != 0) { + /* + * If our current ASID was active during a rollover, we + * can continue to use it and this was just a false alarm. + */ + if (is_reserved_asid(asid)) + return generation | (asid & ~ASID_MASK); + + /* + * We had a valid ASID in a previous life, so try to re-use + * it if possible. + */ + asid &= ~ASID_MASK; + if (!__test_and_set_bit(asid, asid_map)) + goto bump_gen; + } /* - * current->active_mm could be init_mm for the idle thread immediately - * after secondary CPU boot or hotplug. TTBR0_EL1 is already set to - * the reserved value, so no need to reset any context. + * Allocate a free ASID. If we can't find one, take a note of the + * currently active ASIDs and mark the TLBs as requiring flushes. + * We always count from ASID #1, as we use ASID #0 when setting a + * reserved TTBR0 for the init_mm. */ - if (mm == &init_mm) - return; + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx); + if (asid != NUM_USER_ASIDS) + goto set_asid; - smp_rmb(); - asid = cpu_last_asid + cpu; + /* We're out of ASIDs, so increment the global generation count */ + generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION, + &asid_generation); + flush_context(cpu); - flush_context(); - set_mm_context(mm, asid); + /* We have at least 1 ASID per CPU, so this will always succeed */ + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); - /* set the new ASID */ - cpu_switch_mm(mm->pgd, mm); +set_asid: + __set_bit(asid, asid_map); + cur_idx = asid; + +bump_gen: + asid |= generation; + return asid; } -void __new_context(struct mm_struct *mm) +void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) { - unsigned int asid; - unsigned int bits = asid_bits(); + unsigned long flags; + u64 asid; + + asid = atomic64_read(&mm->context.id); - raw_spin_lock(&cpu_asid_lock); /* - * Check the ASID again, in case the change was broadcast from another - * CPU before we acquired the lock. + * The memory ordering here is subtle. We rely on the control + * dependency between the generation read and the update of + * active_asids to ensure that we are synchronised with a + * parallel rollover (i.e. this pairs with the smp_wmb() in + * flush_context). */ - if (!unlikely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) { - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); - raw_spin_unlock(&cpu_asid_lock); - return; + if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits) + && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid)) + goto switch_mm_fastpath; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + /* Check that our ASID belongs to the current generation. */ + asid = atomic64_read(&mm->context.id); + if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) { + asid = new_context(mm, cpu); + atomic64_set(&mm->context.id, asid); } - /* - * At this point, it is guaranteed that the current mm (with an old - * ASID) isn't active on any other CPU since the ASIDs are changed - * simultaneously via IPI. - */ - asid = ++cpu_last_asid; - /* - * If we've used up all our ASIDs, we need to start a new version and - * flush the TLB. - */ - if (unlikely((asid & ((1 << bits) - 1)) == 0)) { - /* increment the ASID version */ - cpu_last_asid += (1 << MAX_ASID_BITS) - (1 << bits); - if (cpu_last_asid == 0) - cpu_last_asid = ASID_FIRST_VERSION; - asid = cpu_last_asid + smp_processor_id(); - flush_context(); - smp_wmb(); - smp_call_function(reset_context, NULL, 1); - cpu_last_asid += NR_CPUS - 1; + if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) + local_flush_tlb_all(); + + atomic64_set(&per_cpu(active_asids, cpu), asid); + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); + +switch_mm_fastpath: + cpu_switch_mm(mm->pgd, mm); +} + +static int asids_init(void) +{ + int fld = cpuid_feature_extract_field(read_cpuid(ID_AA64MMFR0_EL1), 4); + + switch (fld) { + default: + pr_warn("Unknown ASID size (%d); assuming 8-bit\n", fld); + /* Fallthrough */ + case 0: + asid_bits = 8; + break; + case 2: + asid_bits = 16; } - set_mm_context(mm, asid); - raw_spin_unlock(&cpu_asid_lock); + /* If we end up with more CPUs than ASIDs, expect things to crash */ + WARN_ON(NUM_USER_ASIDS < num_possible_cpus()); + atomic64_set(&asid_generation, ASID_FIRST_VERSION); + asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map), + GFP_KERNEL); + if (!asid_map) + panic("Failed to allocate bitmap for %lu ASIDs\n", + NUM_USER_ASIDS); + + pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS); + return 0; } +early_initcall(asids_init); |