From f81a348728ec5ac43f3bbcf81c97d52baba253f7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 21 Nov 2017 11:59:13 +0000 Subject: arm64: mm: cleanup stale AIVIVT references Since commit: 155433cb365ee466 ("arm64: cache: Remove support for ASID-tagged VIVT I-caches") ... the kernel no longer cares about AIVIVT I-caches, as these were removed from the architecture. This patch removes the stale references to such I-caches. The comment in flush_context() is also updated to clarify when and where the TLB invalidation occurs. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 2 +- arch/arm64/mm/context.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 76d1cc85d5b1..955130762a3c 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -38,7 +38,7 @@ * * See Documentation/cachetlb.txt for more information. Please note that * the implementation assumes non-aliasing VIPT D-cache and (aliasing) - * VIPT or ASID-tagged VIVT I-cache. + * VIPT I-cache. * * flush_cache_mm(mm) * diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index ab9f5f0fb2c7..28a45a19aae7 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -117,7 +117,10 @@ static void flush_context(unsigned int cpu) per_cpu(reserved_asids, i) = asid; } - /* Queue a TLB invalidate and flush the I-cache if necessary. */ + /* + * Queue a TLB invalidation for each CPU to perform on next + * context-switch + */ cpumask_setall(&tlb_flush_pending); } -- cgit From 7e8b9c1d2e2f5f45db7d40b50d14f606097c25de Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Nov 2017 17:41:29 +0000 Subject: arm64: module-plts: factor out PLT generation code for ftrace To allow the ftrace trampoline code to reuse the PLT entry routines, factor it out and move it into asm/module.h. Cc: Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/include/asm/module.h | 44 +++++++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/module-plts.c | 38 ++--------------------------------- 2 files changed, 46 insertions(+), 36 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 19bd97671bb8..11d4aaee82e1 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -45,4 +45,48 @@ extern u64 module_alloc_base; #define module_alloc_base ((u64)_etext - MODULES_VSIZE) #endif +struct plt_entry { + /* + * A program that conforms to the AArch64 Procedure Call Standard + * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or + * IP1 (x17) may be inserted at any branch instruction that is + * exposed to a relocation that supports long branches. Since that + * is exactly what we are dealing with here, we are free to use x16 + * as a scratch register in the PLT veneers. + */ + __le32 mov0; /* movn x16, #0x.... */ + __le32 mov1; /* movk x16, #0x...., lsl #16 */ + __le32 mov2; /* movk x16, #0x...., lsl #32 */ + __le32 br; /* br x16 */ +}; + +static inline struct plt_entry get_plt_entry(u64 val) +{ + /* + * MOVK/MOVN/MOVZ opcode: + * +--------+------------+--------+-----------+-------------+---------+ + * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | + * +--------+------------+--------+-----------+-------------+---------+ + * + * Rd := 0x10 (x16) + * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) + * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) + * sf := 1 (64-bit variant) + */ + return (struct plt_entry){ + cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), + cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), + cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), + cpu_to_le32(0xd61f0200) + }; +} + +static inline bool plt_entries_equal(const struct plt_entry *a, + const struct plt_entry *b) +{ + return a->mov0 == b->mov0 && + a->mov1 == b->mov1 && + a->mov2 == b->mov2; +} + #endif /* __ASM_MODULE_H */ diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index d05dbe658409..ebff6c155cac 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -11,21 +11,6 @@ #include #include -struct plt_entry { - /* - * A program that conforms to the AArch64 Procedure Call Standard - * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or - * IP1 (x17) may be inserted at any branch instruction that is - * exposed to a relocation that supports long branches. Since that - * is exactly what we are dealing with here, we are free to use x16 - * as a scratch register in the PLT veneers. - */ - __le32 mov0; /* movn x16, #0x.... */ - __le32 mov1; /* movk x16, #0x...., lsl #16 */ - __le32 mov2; /* movk x16, #0x...., lsl #32 */ - __le32 br; /* br x16 */ -}; - static bool in_init(const struct module *mod, void *loc) { return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size; @@ -40,33 +25,14 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, int i = pltsec->plt_num_entries; u64 val = sym->st_value + rela->r_addend; - /* - * MOVK/MOVN/MOVZ opcode: - * +--------+------------+--------+-----------+-------------+---------+ - * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | - * +--------+------------+--------+-----------+-------------+---------+ - * - * Rd := 0x10 (x16) - * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) - * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) - * sf := 1 (64-bit variant) - */ - plt[i] = (struct plt_entry){ - cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), - cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), - cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), - cpu_to_le32(0xd61f0200) - }; + plt[i] = get_plt_entry(val); /* * Check if the entry we just created is a duplicate. Given that the * relocations are sorted, this will be the last entry we allocated. * (if one exists). */ - if (i > 0 && - plt[i].mov0 == plt[i - 1].mov0 && - plt[i].mov1 == plt[i - 1].mov1 && - plt[i].mov2 == plt[i - 1].mov2) + if (i > 0 && plt_entries_equal(plt + i, plt + i - 1)) return (u64)&plt[i - 1]; pltsec->plt_num_entries++; -- cgit From be0f272bfc83797f70d44faca86954df62e2bbc0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Nov 2017 17:41:30 +0000 Subject: arm64: ftrace: emit ftrace-mod.o contents through code When building the arm64 kernel with both CONFIG_ARM64_MODULE_PLTS and CONFIG_DYNAMIC_FTRACE enabled, the ftrace-mod.o object file is built with the kernel and contains a trampoline that is linked into each module, so that modules can be loaded far away from the kernel and still reach the ftrace entry point in the core kernel with an ordinary relative branch, as is emitted by the compiler instrumentation code dynamic ftrace relies on. In order to be able to build out of tree modules, this object file needs to be included into the linux-headers or linux-devel packages, which is undesirable, as it makes arm64 a special case (although a precedent does exist for 32-bit PPC). Given that the trampoline essentially consists of a PLT entry, let's not bother with a source or object file for it, and simply patch it in whenever the trampoline is being populated, using the existing PLT support routines. Cc: Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/Makefile | 3 --- arch/arm64/include/asm/module.h | 2 +- arch/arm64/kernel/Makefile | 3 --- arch/arm64/kernel/ftrace-mod.S | 18 ------------------ arch/arm64/kernel/ftrace.c | 14 ++++++++------ arch/arm64/kernel/module-plts.c | 12 ++++++++++++ arch/arm64/kernel/module.lds | 1 + 7 files changed, 22 insertions(+), 31 deletions(-) delete mode 100644 arch/arm64/kernel/ftrace-mod.S (limited to 'arch') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b35788c909f1..b481b4a7c011 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -83,9 +83,6 @@ endif ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds -ifeq ($(CONFIG_DYNAMIC_FTRACE),y) -KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o -endif endif # Default value diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 11d4aaee82e1..4f766178fa6f 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -32,7 +32,7 @@ struct mod_arch_specific { struct mod_plt_sec init; /* for CONFIG_DYNAMIC_FTRACE */ - void *ftrace_trampoline; + struct plt_entry *ftrace_trampoline; }; #endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 8265dd790895..067baace74a0 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -61,6 +61,3 @@ extra-y += $(head-y) vmlinux.lds ifeq ($(CONFIG_DEBUG_EFI),y) AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" endif - -# will be included by each individual module but not by the core kernel itself -extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o diff --git a/arch/arm64/kernel/ftrace-mod.S b/arch/arm64/kernel/ftrace-mod.S deleted file mode 100644 index 00c4025be4ff..000000000000 --- a/arch/arm64/kernel/ftrace-mod.S +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (C) 2017 Linaro Ltd - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include - - .section ".text.ftrace_trampoline", "ax" - .align 3 -0: .quad 0 -__ftrace_trampoline: - ldr x16, 0b - br x16 -ENDPROC(__ftrace_trampoline) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index c13b1fca0e5b..50986e388d2b 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -76,7 +76,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (offset < -SZ_128M || offset >= SZ_128M) { #ifdef CONFIG_ARM64_MODULE_PLTS - unsigned long *trampoline; + struct plt_entry trampoline; struct module *mod; /* @@ -104,22 +104,24 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * is added in the future, but for now, the pr_err() below * deals with a theoretical issue only. */ - trampoline = (unsigned long *)mod->arch.ftrace_trampoline; - if (trampoline[0] != addr) { - if (trampoline[0] != 0) { + trampoline = get_plt_entry(addr); + if (!plt_entries_equal(mod->arch.ftrace_trampoline, + &trampoline)) { + if (!plt_entries_equal(mod->arch.ftrace_trampoline, + &(struct plt_entry){})) { pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); return -EINVAL; } /* point the trampoline to our ftrace entry point */ module_disable_ro(mod); - trampoline[0] = addr; + *mod->arch.ftrace_trampoline = trampoline; module_enable_ro(mod, true); /* update trampoline before patching in the branch */ smp_wmb(); } - addr = (unsigned long)&trampoline[1]; + addr = (unsigned long)(void *)mod->arch.ftrace_trampoline; #else /* CONFIG_ARM64_MODULE_PLTS */ return -EINVAL; #endif /* CONFIG_ARM64_MODULE_PLTS */ diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index ebff6c155cac..ea640f92fe5a 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -120,6 +120,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, unsigned long core_plts = 0; unsigned long init_plts = 0; Elf64_Sym *syms = NULL; + Elf_Shdr *tramp = NULL; int i; /* @@ -131,6 +132,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.core.plt = sechdrs + i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt")) mod->arch.init.plt = sechdrs + i; + else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && + !strcmp(secstrings + sechdrs[i].sh_name, + ".text.ftrace_trampoline")) + tramp = sechdrs + i; else if (sechdrs[i].sh_type == SHT_SYMTAB) syms = (Elf64_Sym *)sechdrs[i].sh_addr; } @@ -181,5 +186,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.init.plt_num_entries = 0; mod->arch.init.plt_max_entries = init_plts; + if (tramp) { + tramp->sh_type = SHT_NOBITS; + tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + tramp->sh_addralign = __alignof__(struct plt_entry); + tramp->sh_size = sizeof(struct plt_entry); + } + return 0; } diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds index f7c9781a9d48..22e36a21c113 100644 --- a/arch/arm64/kernel/module.lds +++ b/arch/arm64/kernel/module.lds @@ -1,4 +1,5 @@ SECTIONS { .plt (NOLOAD) : { BYTE(0) } .init.plt (NOLOAD) : { BYTE(0) } + .text.ftrace_trampoline (NOLOAD) : { BYTE(0) } } -- cgit From a349b30250634da20950eb91e2551dcd81f1805d Mon Sep 17 00:00:00 2001 From: Jinbum Park Date: Wed, 22 Nov 2017 21:43:59 +0900 Subject: arm64: pgd: Mark pgd_cache as __ro_after_init pgd_cache is setup once while init stage and never changed after that, so it is good candidate for __ro_after_init Signed-off-by: Jinbum Park Signed-off-by: Will Deacon --- arch/arm64/mm/pgd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 371c5f03a170..051e71ec3335 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -26,7 +26,7 @@ #include #include -static struct kmem_cache *pgd_cache; +static struct kmem_cache *pgd_cache __ro_after_init; pgd_t *pgd_alloc(struct mm_struct *mm) { -- cgit From 9de52a755cfb6da5ee21a07e3a868bdc8fbfccb3 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 30 Nov 2017 11:56:37 +0000 Subject: arm64: fpsimd: Fix failure to restore FPSIMD state after signals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fpsimd_update_current_state() function is responsible for loading the FPSIMD state from the user signal frame into the current task during sigreturn. When implementing support for SVE, conditional code was added to this function in order to handle the case where SVE state need to be loaded for the task and merged with the FPSIMD data from the signal frame; however, the FPSIMD-only case was unintentionally dropped. As a result of this, sigreturn does not currently restore the FPSIMD state of the task, except in the case where the system supports SVE and the signal frame contains SVE state in addition to FPSIMD state. This patch fixes this bug by making the copy-in of the FPSIMD data from the signal frame to thread_struct unconditional. This remains a performance regression from v4.14, since the FPSIMD state is now copied into thread_struct and then loaded back, instead of _only_ being loaded into the CPU FPSIMD registers. However, it is essential to call task_fpsimd_load() here anyway in order to ensure that the SVE enable bit in CPACR_EL1 is set correctly before returning to userspace. This could use some refactoring, but since sigreturn is not a fast path I have kept this patch as a pure fix and left the refactoring for later. Cc: Catalin Marinas Cc: Ard Biesheuvel Fixes: 8cd969d28fd2 ("arm64/sve: Signal handling support") Reported-by: Alex Bennée Tested-by: Alex Bennée Reviewed-by: Alex Bennée Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 143b3e72c25e..5084e699447a 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1026,10 +1026,10 @@ void fpsimd_update_current_state(struct fpsimd_state *state) local_bh_disable(); - if (system_supports_sve() && test_thread_flag(TIF_SVE)) { - current->thread.fpsimd_state = *state; + current->thread.fpsimd_state = *state; + if (system_supports_sve() && test_thread_flag(TIF_SVE)) fpsimd_to_sve(current); - } + task_fpsimd_load(); if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { -- cgit From f8ada189550984ee21f27be736042b74a7da1d68 Mon Sep 17 00:00:00 2001 From: Xu YiPing Date: Wed, 15 Nov 2017 15:39:26 +0800 Subject: arm64: perf: remove unsupported events for Cortex-A73 bus access read/write events are not supported in A73, based on the Cortex-A73 TRM r0p2, section 11.9 Events (pages 11-457 to 11-460). Fixes: 5561b6c5e981 "arm64: perf: add support for Cortex-A73" Acked-by: Julien Thierry Signed-off-by: Xu YiPing Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 9eaef51f83ff..3affca3dd96a 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -262,12 +262,6 @@ static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, - - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, - - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] -- cgit From 770ba06084f7aeadea120922c775d574f3128ba3 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Wed, 29 Nov 2017 17:03:03 +0300 Subject: arm64: cpu_ops: Add missing 'const' qualifiers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building the kernel with an LTO-enabled GCC spits out the following "const" warning for the cpu_ops code: mm/percpu.c:2168:20: error: pcpu_fc_names causes a section type conflict with dt_supported_cpu_ops const char * const pcpu_fc_names[PCPU_FC_NR] __initconst = { ^ arch/arm64/kernel/cpu_ops.c:34:37: note: ‘dt_supported_cpu_ops’ was declared here static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { Fix it by adding missed const qualifiers. Signed-off-by: Yury Norov Reviewed-by: Nick Desaulniers Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_ops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index d16978213c5b..ea001241bdd4 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -31,13 +31,13 @@ extern const struct cpu_operations cpu_psci_ops; const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; -static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { +static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = { &smp_spin_table_ops, &cpu_psci_ops, NULL, }; -static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { +static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = { #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL &acpi_parking_protocol_ops, #endif @@ -47,7 +47,7 @@ static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { static const struct cpu_operations * __init cpu_get_ops(const char *name) { - const struct cpu_operations **ops; + const struct cpu_operations *const *ops; ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops; -- cgit From 3a33c7605750fb6a87613044d16b1455e482414d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 30 Nov 2017 18:25:17 +0000 Subject: arm64: context: Fix comments and remove pointless smp_wmb() The comments in the ASID allocator incorrectly hint at an MP-style idiom using the asid_generation and the active_asids array. In fact, the synchronisation is achieved using a combination of an xchg operation and a spinlock, so update the comments and remove the pointless smp_wmb(). Cc: James Morse Signed-off-by: Will Deacon --- arch/arm64/mm/context.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 28a45a19aae7..6f4017046323 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -96,12 +96,6 @@ static void flush_context(unsigned int cpu) set_reserved_asid_bits(); - /* - * Ensure the generation bump is observed before we xchg the - * active_asids. - */ - smp_wmb(); - for_each_possible_cpu(i) { asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); /* @@ -205,11 +199,18 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) asid = atomic64_read(&mm->context.id); /* - * The memory ordering here is subtle. We rely on the control - * dependency between the generation read and the update of - * active_asids to ensure that we are synchronised with a - * parallel rollover (i.e. this pairs with the smp_wmb() in - * flush_context). + * The memory ordering here is subtle. + * If our ASID matches the current generation, then we update + * our active_asids entry with a relaxed xchg. Racing with a + * concurrent rollover means that either: + * + * - We get a zero back from the xchg and end up waiting on the + * lock. Taking the lock synchronises with the rollover and so + * we are forced to see the updated generation. + * + * - We get a valid ASID back from the xchg, which means the + * relaxed xchg in flush_context will treat us as reserved + * because atomic RmWs are totally ordered for a given location. */ if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits) && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid)) -- cgit