From 19bef63f951e47dd4ba54810e6f7c7ff9344a3ef Mon Sep 17 00:00:00 2001 From: Prakruthi Deepak Heragu Date: Fri, 13 May 2022 10:46:54 -0700 Subject: arm64: paravirt: Use RCU read locks to guard stolen_time During hotplug, the stolen time data structure is unmapped and memset. There is a possibility of the timer IRQ being triggered before memset and stolen time is getting updated as part of this timer IRQ handler. This causes the below crash in timer handler - [ 3457.473139][ C5] Unable to handle kernel paging request at virtual address ffffffc03df05148 ... [ 3458.154398][ C5] Call trace: [ 3458.157648][ C5] para_steal_clock+0x30/0x50 [ 3458.162319][ C5] irqtime_account_process_tick+0x30/0x194 [ 3458.168148][ C5] account_process_tick+0x3c/0x280 [ 3458.173274][ C5] update_process_times+0x5c/0xf4 [ 3458.178311][ C5] tick_sched_timer+0x180/0x384 [ 3458.183164][ C5] __run_hrtimer+0x160/0x57c [ 3458.187744][ C5] hrtimer_interrupt+0x258/0x684 [ 3458.192698][ C5] arch_timer_handler_virt+0x5c/0xa0 [ 3458.198002][ C5] handle_percpu_devid_irq+0xdc/0x414 [ 3458.203385][ C5] handle_domain_irq+0xa8/0x168 [ 3458.208241][ C5] gic_handle_irq.34493+0x54/0x244 [ 3458.213359][ C5] call_on_irq_stack+0x40/0x70 [ 3458.218125][ C5] do_interrupt_handler+0x60/0x9c [ 3458.223156][ C5] el1_interrupt+0x34/0x64 [ 3458.227560][ C5] el1h_64_irq_handler+0x1c/0x2c [ 3458.232503][ C5] el1h_64_irq+0x7c/0x80 [ 3458.236736][ C5] free_vmap_area_noflush+0x108/0x39c [ 3458.242126][ C5] remove_vm_area+0xbc/0x118 [ 3458.246714][ C5] vm_remove_mappings+0x48/0x2a4 [ 3458.251656][ C5] __vunmap+0x154/0x278 [ 3458.255796][ C5] stolen_time_cpu_down_prepare+0xc0/0xd8 [ 3458.261542][ C5] cpuhp_invoke_callback+0x248/0xc34 [ 3458.266842][ C5] cpuhp_thread_fun+0x1c4/0x248 [ 3458.271696][ C5] smpboot_thread_fn+0x1b0/0x400 [ 3458.276638][ C5] kthread+0x17c/0x1e0 [ 3458.280691][ C5] ret_from_fork+0x10/0x20 As a fix, introduce rcu lock to update stolen time structure. Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online") Cc: stable@vger.kernel.org Suggested-by: Will Deacon Signed-off-by: Prakruthi Deepak Heragu Signed-off-by: Elliot Berman Reviewed-by: Srivatsa S. Bhat (VMware) Link: https://lore.kernel.org/r/20220513174654.362169-1-quic_eberman@quicinc.com Signed-off-by: Will Deacon --- arch/arm64/kernel/paravirt.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 75fed4460407..57c7c211f8c7 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -35,7 +35,7 @@ static u64 native_steal_clock(int cpu) DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); struct pv_time_stolen_time_region { - struct pvclock_vcpu_stolen_time *kaddr; + struct pvclock_vcpu_stolen_time __rcu *kaddr; }; static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region); @@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc); /* return stolen time in ns by asking the hypervisor */ static u64 para_steal_clock(int cpu) { + struct pvclock_vcpu_stolen_time *kaddr = NULL; struct pv_time_stolen_time_region *reg; + u64 ret = 0; reg = per_cpu_ptr(&stolen_time_region, cpu); @@ -61,28 +63,37 @@ static u64 para_steal_clock(int cpu) * online notification callback runs. Until the callback * has run we just return zero. */ - if (!reg->kaddr) + rcu_read_lock(); + kaddr = rcu_dereference(reg->kaddr); + if (!kaddr) { + rcu_read_unlock(); return 0; + } - return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time)); + ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time)); + rcu_read_unlock(); + return ret; } static int stolen_time_cpu_down_prepare(unsigned int cpu) { + struct pvclock_vcpu_stolen_time *kaddr = NULL; struct pv_time_stolen_time_region *reg; reg = this_cpu_ptr(&stolen_time_region); if (!reg->kaddr) return 0; - memunmap(reg->kaddr); - memset(reg, 0, sizeof(*reg)); + kaddr = rcu_replace_pointer(reg->kaddr, NULL, true); + synchronize_rcu(); + memunmap(kaddr); return 0; } static int stolen_time_cpu_online(unsigned int cpu) { + struct pvclock_vcpu_stolen_time *kaddr = NULL; struct pv_time_stolen_time_region *reg; struct arm_smccc_res res; @@ -93,17 +104,19 @@ static int stolen_time_cpu_online(unsigned int cpu) if (res.a0 == SMCCC_RET_NOT_SUPPORTED) return -EINVAL; - reg->kaddr = memremap(res.a0, + kaddr = memremap(res.a0, sizeof(struct pvclock_vcpu_stolen_time), MEMREMAP_WB); + rcu_assign_pointer(reg->kaddr, kaddr); + if (!reg->kaddr) { pr_warn("Failed to map stolen time data structure\n"); return -ENOMEM; } - if (le32_to_cpu(reg->kaddr->revision) != 0 || - le32_to_cpu(reg->kaddr->attributes) != 0) { + if (le32_to_cpu(kaddr->revision) != 0 || + le32_to_cpu(kaddr->attributes) != 0) { pr_warn_once("Unexpected revision or attributes in stolen time data\n"); return -ENXIO; } -- cgit From eb3d8ea3e1f03f4b0b72d8f5ed9eb7c3165862e8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 May 2022 17:07:35 +0100 Subject: arm64: kexec: load from kimage prior to clobbering In arm64_relocate_new_kernel() we load some fields out of the kimage structure after relocation has occurred. As the kimage structure isn't allocated to be relocation-safe, it may be clobbered during relocation, and we may load junk values out of the structure. Due to this, kexec may fail when the kimage allocation happens to fall within a PA range that an object will be relocated to. This has been observed to occur for regular kexec on a QEMU TCG 'virt' machine with 2GiB of RAM, where the PA range of the new kernel image overlaps the kimage structure. Avoid this by ensuring we load all values from the kimage structure prior to relocation. I've tested this atop v5.16 and v5.18-rc6. Fixes: 878fdbd70486 ("arm64: kexec: pass kimage as the only argument to relocation function") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Pasha Tatashin Cc: Will Deacon Reviewed-by: Pasha Tatashin Link: https://lore.kernel.org/r/20220516160735.731404-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/relocate_kernel.S | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index f0a3df9e18a3..413f899e4ac6 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -37,6 +37,15 @@ * safe memory that has been set up to be preserved during the copy operation. */ SYM_CODE_START(arm64_relocate_new_kernel) + /* + * The kimage structure isn't allocated specially and may be clobbered + * during relocation. We must load any values we need from it prior to + * any relocation occurring. + */ + ldr x28, [x0, #KIMAGE_START] + ldr x27, [x0, #KIMAGE_ARCH_EL2_VECTORS] + ldr x26, [x0, #KIMAGE_ARCH_DTB_MEM] + /* Setup the list loop variables. */ ldr x18, [x0, #KIMAGE_ARCH_ZERO_PAGE] /* x18 = zero page for BBM */ ldr x17, [x0, #KIMAGE_ARCH_TTBR1] /* x17 = linear map copy */ @@ -72,21 +81,20 @@ SYM_CODE_START(arm64_relocate_new_kernel) ic iallu dsb nsh isb - ldr x4, [x0, #KIMAGE_START] /* relocation start */ - ldr x1, [x0, #KIMAGE_ARCH_EL2_VECTORS] /* relocation start */ - ldr x0, [x0, #KIMAGE_ARCH_DTB_MEM] /* dtb address */ turn_off_mmu x12, x13 /* Start new image. */ - cbz x1, .Lel1 - mov x1, x4 /* relocation start */ - mov x2, x0 /* dtb address */ + cbz x27, .Lel1 + mov x1, x28 /* kernel entry point */ + mov x2, x26 /* dtb address */ mov x3, xzr mov x4, xzr mov x0, #HVC_SOFT_RESTART hvc #0 /* Jumps from el2 */ .Lel1: + mov x0, x26 /* dtb address */ + mov x1, xzr mov x2, xzr mov x3, xzr - br x4 /* Jumps from el1 */ + br x28 /* Jumps from el1 */ SYM_CODE_END(arm64_relocate_new_kernel) -- cgit From 1d0cb4c8864addc362bae98e8ffa5500c87e1227 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 17 May 2022 10:35:32 +0100 Subject: arm64: mte: Ensure the cleared tags are visible before setting the PTE As an optimisation, only pages mapped with PROT_MTE in user space have the MTE tags zeroed. This is done lazily at the set_pte_at() time via mte_sync_tags(). However, this function is missing a barrier and another CPU may see the PTE updated before the zeroed tags are visible. Add an smp_wmb() barrier if the mapping is Normal Tagged. Signed-off-by: Catalin Marinas Fixes: 34bfeea4a9e9 ("arm64: mte: Clear the tags when a page is mapped in user-space with PROT_MTE") Cc: # 5.10.x Reported-by: Vladimir Murzin Cc: Will Deacon Reviewed-by: Steven Price Tested-by: Vladimir Murzin Link: https://lore.kernel.org/r/20220517093532.127095-1-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/mte.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 78b3e0f8e997..d502703e8373 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -76,6 +76,9 @@ void mte_sync_tags(pte_t old_pte, pte_t pte) mte_sync_page_tags(page, old_pte, check_swap, pte_is_tagged); } + + /* ensure the tags are visible before the PTE is set */ + smp_wmb(); } int memcmp_pages(struct page *page1, struct page *page2) -- cgit