From 7e71b85473f863a29eb1c69265ef025389b4091d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 8 Jun 2021 14:26:58 +0300 Subject: arm64: dts: ls1028a: fix node name for the sysclk U-Boot attempts to fix up the "clock-frequency" property of the "/sysclk" node: https://elixir.bootlin.com/u-boot/v2021.04/source/arch/arm/cpu/armv8/fsl-layerscape/fdt.c#L512 but fails to do so: ## Booting kernel from Legacy Image at a1000000 ... Image Name: Created: 2021-06-08 10:31:38 UTC Image Type: AArch64 Linux Kernel Image (gzip compressed) Data Size: 15431370 Bytes = 14.7 MiB Load Address: 80080000 Entry Point: 80080000 Verifying Checksum ... OK ## Flattened Device Tree blob at a0000000 Booting using the fdt blob at 0xa0000000 Uncompressing Kernel Image Loading Device Tree to 00000000fbb19000, end 00000000fbb22717 ... OK Unable to update property /sysclk:clock-frequency, err=FDT_ERR_NOTFOUND Starting kernel ... All Layerscape SoCs except LS1028A use "sysclk" as the node name, and not "clock-sysclk". So change the node name of LS1028A accordingly. Fixes: 8897f3255c9c ("arm64: dts: Add support for NXP LS1028A SoC") Signed-off-by: Vladimir Oltean Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index b2e3e5d2a108..343ecf0e8973 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -66,7 +66,7 @@ }; }; - sysclk: clock-sysclk { + sysclk: sysclk { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <100000000>; -- cgit From 80d9ac9bd7b9366c2a89d2716a397749299728e7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Jul 2021 12:36:41 +0100 Subject: KVM: arm64: Fix detection of shared VMAs on guest fault When merging the KVM MTE support, the blob that was interposed between the chair and the keyboard experienced a neuronal accident (also known as a brain fart), turning a check for VM_SHARED into VM_PFNMAP as it was reshuffling some of the code. The blob having now come back to its senses, let's restore the initial check that the original author got right the first place. Fixes: ea7fc1bb1cd1 ("KVM: arm64: Introduce MTE VM feature") Reviewed-by: Steven Price Signed-off-by: Marc Zyngier Cc: Catalin Marinas Link: https://lore.kernel.org/r/20210713114804.594993-1-maz@kernel.org --- arch/arm64/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 3155c9e778f0..0625bf2353c2 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -947,7 +947,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vma_shift = get_vma_page_shift(vma, hva); } - shared = (vma->vm_flags & VM_PFNMAP); + shared = (vma->vm_flags & VM_SHARED); switch (vma_shift) { #ifndef __PAGETABLE_PMD_FOLDED -- cgit From ba02920c51debb9198e72b3a8726a7c5ae4ffb41 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Tue, 13 Jul 2021 17:05:46 +0530 Subject: arm64: tegra: Enable SMMU support for PCIe on Tegra194 As of commit c7289b1c8a4e ("arm64: tegra: Enable SMMU support on Tegra194"), SMMU support is enabled system-wide on Tegra194. However, there was a bit of overlap between the SMMU enablement and the PCIe support addition, so the PCIe device tree nodes are missing the iommus and interconnects properties. This in turn leads to SMMU faults for these devices, since by default the ARM SMMU will fault. Add the iommus and interconnects properties to all the PCIe device tree nodes to restore their functionality. Fixes: c7289b1c8a4e ("arm64: tegra: Enable SMMU support on Tegra194") Signed-off-by: Vidya Sagar Reviewed-by: Jon Hunter Signed-off-by: Thierry Reding --- arch/arm64/boot/dts/nvidia/tegra194.dtsi | 60 ++++++++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 6 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index 076d5efc4c3d..5ba7a4519b95 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -1840,7 +1840,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE1R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE1W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE1>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE1 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@14120000 { @@ -1890,7 +1894,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE2AR &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE2AW &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE2>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE2 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@14140000 { @@ -1940,7 +1948,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE3R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE3W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE3>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE3 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@14160000 { @@ -1990,7 +2002,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE4R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE4W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE4>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE4 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@14180000 { @@ -2040,7 +2056,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE0R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE0W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE0>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE0 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@141a0000 { @@ -2094,7 +2114,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE5R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE5W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE5>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE5 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie_ep@14160000 { @@ -2127,6 +2151,14 @@ nvidia,aspm-cmrt-us = <60>; nvidia,aspm-pwr-on-t-us = <20>; nvidia,aspm-l0s-entrance-latency-us = <3>; + + interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE4R &emc>, + <&mc TEGRA194_MEMORY_CLIENT_PCIE4W &emc>; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE4>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE4 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie_ep@14180000 { @@ -2159,6 +2191,14 @@ nvidia,aspm-cmrt-us = <60>; nvidia,aspm-pwr-on-t-us = <20>; nvidia,aspm-l0s-entrance-latency-us = <3>; + + interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE0R &emc>, + <&mc TEGRA194_MEMORY_CLIENT_PCIE0W &emc>; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE0>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE0 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie_ep@141a0000 { @@ -2194,6 +2234,14 @@ nvidia,aspm-cmrt-us = <60>; nvidia,aspm-pwr-on-t-us = <20>; nvidia,aspm-l0s-entrance-latency-us = <3>; + + interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE5R &emc>, + <&mc TEGRA194_MEMORY_CLIENT_PCIE5W &emc>; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE5>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE5 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; sram@40000000 { -- cgit From 29f6a20c21b5bdc7eb623a712bbf7b99612ee746 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Fri, 2 Jul 2021 21:49:14 +0200 Subject: arm64: dts: ls1028: sl28: fix networking for variant 2 The PHY configuration for the variant 2 is still missing the flag for in-band signalling between PHY and MAC. Both sides - MAC and PHY - have to match the setting. For now, Linux only supports setting the MAC side and thus it has to match the setting the bootloader is configuring. Enable in-band signalling to make ethernet work. Fixes: ab43f0307449 ("arm64: dts: ls1028a: sl28: add support for variant 2") Signed-off-by: Michael Walle Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts index dd764b720fb0..f6a79c8080d1 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts @@ -54,6 +54,7 @@ &mscc_felix_port0 { label = "swp0"; + managed = "in-band-status"; phy-handle = <&phy0>; phy-mode = "sgmii"; status = "okay"; @@ -61,6 +62,7 @@ &mscc_felix_port1 { label = "swp1"; + managed = "in-band-status"; phy-handle = <&phy1>; phy-mode = "sgmii"; status = "okay"; -- cgit From 923f98929182dfd04e9149be839160b63a3db145 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 22 Jul 2021 13:11:34 +0300 Subject: arm64: dts: armada-3720-turris-mox: fixed indices for the SDHC controllers Since drivers/mmc/host/sdhci-xenon.c declares the PROBE_PREFER_ASYNCHRONOUS probe type, it is not guaranteed whether /dev/mmcblk0 will belong to sdhci0 or sdhci1. In turn, this will break booting by: root=/dev/mmcblk0p1 Fix the issue by adding aliases so that the old MMC controller indices are preserved. Fixes: 7320915c8861 ("mmc: Set PROBE_PREFER_ASYNCHRONOUS for drivers that existed in v4.14") Signed-off-by: Vladimir Oltean Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts index ce2bcddf396f..f2d7d6f071bc 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts @@ -19,6 +19,8 @@ aliases { spi0 = &spi0; ethernet1 = ð1; + mmc0 = &sdhci0; + mmc1 = &sdhci1; }; chosen { -- cgit From ee7ab3f263f8131722cff3871b9618b1e7478f07 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Mon, 28 Jun 2021 17:12:29 +0200 Subject: arm64: dts: armada-3720-turris-mox: remove mrvl,i2c-fast-mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some SFP modules are not detected when i2c-fast-mode is enabled even when clock-frequency is already set to 100000. The I2C bus violates the timing specifications when run in fast mode. So disable fast mode on Turris Mox. Same change was already applied for uDPU (also Armada 3720 board with SFP) in commit fe3ec631a77d ("arm64: dts: uDPU: remove i2c-fast-mode"). Fixes: 7109d817db2e ("arm64: dts: marvell: add DTS for Turris Mox") Signed-off-by: Pali Rohár Reviewed-by: Marek Behún Acked-by: Russell King (Oracle) Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm64') diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts index f2d7d6f071bc..a05b1ab2dd12 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts @@ -121,6 +121,7 @@ pinctrl-names = "default"; pinctrl-0 = <&i2c1_pins>; clock-frequency = <100000>; + /delete-property/ mrvl,i2c-fast-mode; status = "okay"; rtc@6f { -- cgit From f5d156c7bfab7d728b2fd35bc63eab12eda18125 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Mon, 19 Jul 2021 15:34:37 +0800 Subject: arm64: dts: imx8mp: remove fallback compatible string for FlexCAN FlexCAN on i.MX8MP is not derived from i.MX6Q, instead reuses from i.MX8QM with extra ECC added and default is enabled, so that the FlexCAN would be put into freeze mode without FLEXCAN_QUIRK_DISABLE_MECR quirk. This patch removes "fsl,imx6q-flexcan" fallback compatible string since it's not compatible with the i.MX6Q. Link: https://lore.kernel.org/r/20210719073437.32078-1-qiangqing.zhang@nxp.com Signed-off-by: Joakim Zhang Reviewed-by: Fabio Estevam Signed-off-by: Marc Kleine-Budde --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index ca38d0d6c3c4..f4eaab3ecf03 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -579,7 +579,7 @@ }; flexcan1: can@308c0000 { - compatible = "fsl,imx8mp-flexcan", "fsl,imx6q-flexcan"; + compatible = "fsl,imx8mp-flexcan"; reg = <0x308c0000 0x10000>; interrupts = ; clocks = <&clk IMX8MP_CLK_IPG_ROOT>, @@ -594,7 +594,7 @@ }; flexcan2: can@308d0000 { - compatible = "fsl,imx8mp-flexcan", "fsl,imx6q-flexcan"; + compatible = "fsl,imx8mp-flexcan"; reg = <0x308d0000 0x10000>; interrupts = ; clocks = <&clk IMX8MP_CLK_IPG_ROOT>, -- cgit From f5e81d1117501546b7be050c5fbafa6efd2c722c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 13 Jul 2021 08:18:31 +0000 Subject: bpf: Introduce BPF nospec instruction for mitigating Spectre v4 In case of JITs, each of the JIT backends compiles the BPF nospec instruction /either/ to a machine instruction which emits a speculation barrier /or/ to /no/ machine instruction in case the underlying architecture is not affected by Speculative Store Bypass or has different mitigations in place already. This covers both x86 and (implicitly) arm64: In case of x86, we use 'lfence' instruction for mitigation. In case of arm64, we rely on the firmware mitigation as controlled via the ssbd kernel parameter. Whenever the mitigation is enabled, it works for all of the kernel code with no need to provide any additional instructions here (hence only comment in arm64 JIT). Other archs can follow as needed. The BPF nospec instruction is specifically targeting Spectre v4 since i) we don't use a serialization barrier for the Spectre v1 case, and ii) mitigation instructions for v1 and v4 might be different on some archs. The BPF nospec is required for a future commit, where the BPF verifier does annotate intermediate BPF programs with speculation barriers. Co-developed-by: Piotr Krysiuk Co-developed-by: Benedict Schlueter Signed-off-by: Daniel Borkmann Signed-off-by: Piotr Krysiuk Signed-off-by: Benedict Schlueter Acked-by: Alexei Starovoitov --- arch/arm64/net/bpf_jit_comp.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index dccf98a37283..41c23f474ea6 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -823,6 +823,19 @@ emit_cond_jmp: return ret; break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + /* + * Nothing required here. + * + * In case of arm64, we rely on the firmware mitigation of + * Speculative Store Bypass as controlled via the ssbd kernel + * parameter. Whenever the mitigation is enabled, it works + * for all of the kernel code with no need to provide any + * additional instructions. + */ + break; + /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: -- cgit From facee1be7689f8cf573b9ffee6a5c28ee193615e Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 28 Jul 2021 15:32:31 +0000 Subject: KVM: arm64: Fix off-by-one in range_is_memory Hyp checks whether an address range only covers RAM by checking the start/endpoints against a list of memblock_region structs. However, the endpoint here is exclusive but internally is treated as inclusive. Fix the off-by-one error that caused valid address ranges to be rejected. Cc: Quentin Perret Fixes: 90134ac9cabb6 ("KVM: arm64: Protect the .hyp sections from the host") Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210728153232.1018911-2-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index d938ce95d3bd..a6ce991b1467 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -193,7 +193,7 @@ static bool range_is_memory(u64 start, u64 end) { struct kvm_mem_range r1, r2; - if (!find_mem_range(start, &r1) || !find_mem_range(end, &r2)) + if (!find_mem_range(start, &r1) || !find_mem_range(end - 1, &r2)) return false; if (r1.start != r2.start) return false; -- cgit From c4d7c51845af9542d42cd18a25c570583abf2768 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Thu, 29 Jul 2021 17:00:36 +0100 Subject: KVM: arm64: Fix race when enabling KVM_ARM_CAP_MTE When enabling KVM_CAP_ARM_MTE the ioctl checks that there are no VCPUs created to ensure that the capability is enabled before the VM is running. However no locks are held at that point so it is (theoretically) possible for another thread in the VMM to create VCPUs between the check and actually setting mte_enabled. Close the race by taking kvm->lock. Reported-by: Alexandru Elisei Fixes: 673638f434ee ("KVM: arm64: Expose KVM_ARM_CAP_MTE") Signed-off-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210729160036.20433-1-steven.price@arm.com --- arch/arm64/kvm/arm.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e9a2b8f27792..0ca72f5cda41 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -94,10 +94,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, kvm->arch.return_nisv_io_abort_to_user = true; break; case KVM_CAP_ARM_MTE: - if (!system_supports_mte() || kvm->created_vcpus) - return -EINVAL; - r = 0; - kvm->arch.mte_enabled = true; + mutex_lock(&kvm->lock); + if (!system_supports_mte() || kvm->created_vcpus) { + r = -EINVAL; + } else { + r = 0; + kvm->arch.mte_enabled = true; + } + mutex_unlock(&kvm->lock); break; default: r = -EINVAL; -- cgit From e30e8d46cf605d216a799a28c77b8a41c328613a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 2 Aug 2021 11:42:00 +0100 Subject: arm64: fix compat syscall return truncation Due to inconsistencies in the way we manipulate compat GPRs, we have a few issues today: * For audit and tracing, where error codes are handled as a (native) long, negative error codes are expected to be sign-extended to the native 64-bits, or they may fail to be matched correctly. Thus a syscall which fails with an error may erroneously be identified as failing. * For ptrace, *all* compat return values should be sign-extended for consistency with 32-bit arm, but we currently only do this for negative return codes. * As we may transiently set the upper 32 bits of some compat GPRs while in the kernel, these can be sampled by perf, which is somewhat confusing. This means that where a syscall returns a pointer above 2G, this will be sign-extended, but will not be mistaken for an error as error codes are constrained to the inclusive range [-4096, -1] where no user pointer can exist. To fix all of these, we must consistently use helpers to get/set the compat GPRs, ensuring that we never write the upper 32 bits of the return code, and always sign-extend when reading the return code. This patch does so, with the following changes: * We re-organise syscall_get_return_value() to always sign-extend for compat tasks, and reimplement syscall_get_error() atop. We update syscall_trace_exit() to use syscall_get_return_value(). * We consistently use syscall_set_return_value() to set the return value, ensureing the upper 32 bits are never set unexpectedly. * As the core audit code currently uses regs_return_value() rather than syscall_get_return_value(), we special-case this for compat_user_mode(regs) such that this will do the right thing. Going forward, we should try to move the core audit code over to syscall_get_return_value(). Cc: Reported-by: He Zhe Reported-by: weiyuchen Cc: Catalin Marinas Cc: Will Deacon Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20210802104200.21390-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/ptrace.h | 12 +++++++++++- arch/arm64/include/asm/syscall.h | 19 ++++++++++--------- arch/arm64/kernel/ptrace.c | 2 +- arch/arm64/kernel/signal.c | 3 ++- arch/arm64/kernel/syscall.c | 9 +++------ 5 files changed, 27 insertions(+), 18 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index e58bca832dff..41b332c054ab 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -320,7 +320,17 @@ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) static inline unsigned long regs_return_value(struct pt_regs *regs) { - return regs->regs[0]; + unsigned long val = regs->regs[0]; + + /* + * Audit currently uses regs_return_value() instead of + * syscall_get_return_value(). Apply the same sign-extension here until + * audit is updated to use syscall_get_return_value(). + */ + if (compat_user_mode(regs)) + val = sign_extend64(val, 31); + + return val; } static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index cfc0672013f6..03e20895453a 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -29,22 +29,23 @@ static inline void syscall_rollback(struct task_struct *task, regs->regs[0] = regs->orig_x0; } - -static inline long syscall_get_error(struct task_struct *task, - struct pt_regs *regs) +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) { - unsigned long error = regs->regs[0]; + unsigned long val = regs->regs[0]; if (is_compat_thread(task_thread_info(task))) - error = sign_extend64(error, 31); + val = sign_extend64(val, 31); - return IS_ERR_VALUE(error) ? error : 0; + return val; } -static inline long syscall_get_return_value(struct task_struct *task, - struct pt_regs *regs) +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) { - return regs->regs[0]; + unsigned long error = syscall_get_return_value(task, regs); + + return IS_ERR_VALUE(error) ? error : 0; } static inline void syscall_set_return_value(struct task_struct *task, diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 499b6b2f9757..b381a1ee9ea7 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1862,7 +1862,7 @@ void syscall_trace_exit(struct pt_regs *regs) audit_syscall_exit(regs); if (flags & _TIF_SYSCALL_TRACEPOINT) - trace_sys_exit(regs, regs_return_value(regs)); + trace_sys_exit(regs, syscall_get_return_value(current, regs)); if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP)) tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index f8192f4ae0b8..23036334f4dc 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -890,7 +891,7 @@ static void do_signal(struct pt_regs *regs) retval == -ERESTART_RESTARTBLOCK || (retval == -ERESTARTSYS && !(ksig.ka.sa.sa_flags & SA_RESTART)))) { - regs->regs[0] = -EINTR; + syscall_set_return_value(current, regs, -EINTR, 0); regs->pc = continue_addr; } diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 263d6c1a525f..50a0f1a38e84 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -54,10 +54,7 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno, ret = do_ni_syscall(regs, scno); } - if (is_compat_task()) - ret = lower_32_bits(ret); - - regs->regs[0] = ret; + syscall_set_return_value(current, regs, 0, ret); /* * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), @@ -115,7 +112,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, * syscall. do_notify_resume() will send a signal to userspace * before the syscall is restarted. */ - regs->regs[0] = -ERESTARTNOINTR; + syscall_set_return_value(current, regs, -ERESTARTNOINTR, 0); return; } @@ -136,7 +133,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, * anyway. */ if (scno == NO_SYSCALL) - regs->regs[0] = -ENOSYS; + syscall_set_return_value(current, regs, -ENOSYS, 0); scno = syscall_trace_enter(regs); if (scno == NO_SYSCALL) goto trace_exit; -- cgit From 64ee84c75b5f75132eec97f2c7a201a056d53698 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 1 Aug 2021 14:35:25 +0900 Subject: arm64: move warning about toolchains to archprepare Commit 987fdfec2410 ("arm64: move --fix-cortex-a53-843419 linker test to Kconfig") fixed the false-positive warning in the installation step. Yet, there are some cases where this false-positive is shown. For example, you can see it when you cross 987fdfec2410 during git-bisect. $ git checkout 987fdfec2410^ [ snip ] $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- defconfig all [ snip ] $ git checkout v5.13 [ snip] $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- defconfig all [ snip ] arch/arm64/Makefile:25: ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum In the stale include/config/auto.config, CONFIG_ARM64_ERRATUM_843419=y is set without CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419, so the warning is displayed while parsing the Makefiles. Make will restart with the updated include/config/auto.config, hence CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419 will be set eventually, but this warning is a surprise for users. Commit 25896d073d8a ("x86/build: Fix compiler support check for CONFIG_RETPOLINE") addressed a similar issue. Move $(warning ...) out of the parse stage of Makefiles. The same applies to CONFIG_ARM64_USE_LSE_ATOMICS. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20210801053525.105235-1-masahiroy@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Makefile | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7bc37d0a1b68..7b668db43261 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -21,19 +21,11 @@ LDFLAGS_vmlinux += -shared -Bsymbolic -z notext \ endif ifeq ($(CONFIG_ARM64_ERRATUM_843419),y) - ifneq ($(CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419),y) -$(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum) - else + ifeq ($(CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419),y) LDFLAGS_vmlinux += --fix-cortex-a53-843419 endif endif -ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS), y) - ifneq ($(CONFIG_ARM64_LSE_ATOMICS), y) -$(warning LSE atomics not supported by binutils) - endif -endif - cc_has_k_constraint := $(call try-run,echo \ 'int main(void) { \ asm volatile("and w0, w0, %w0" :: "K" (4294967295)); \ @@ -176,6 +168,17 @@ vdso_install: archprepare: $(Q)$(MAKE) $(build)=arch/arm64/tools kapi +ifeq ($(CONFIG_ARM64_ERRATUM_843419),y) + ifneq ($(CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419),y) + @echo "warning: ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum" >&2 + endif +endif +ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS),y) + ifneq ($(CONFIG_ARM64_LSE_ATOMICS),y) + @echo "warning: LSE atomics not supported by binutils" >&2 + endif +endif + # We use MRPROPER_FILES and CLEAN_FILES now archclean: -- cgit From f9c4ff2ab9fe433d44ebbc2e3c2368a49df44798 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Sat, 31 Jul 2021 00:51:31 +1200 Subject: arm64: fix the doc of RANDOMIZE_MODULE_REGION_FULL Obviously kaslr is setting the module region to 2GB rather than 4GB since commit b2eed9b588112 ("arm64/kernel: kaslr: reduce module randomization range to 2 GB"). So fix the size of region in Kconfig. On the other hand, even though RANDOMIZE_MODULE_REGION_FULL is not set, module_alloc() can fall back to a 2GB window if ARM64_MODULE_PLTS is set. In this case, veneers are still needed. !RANDOMIZE_MODULE_REGION_FULL doesn't necessarily mean veneers are not needed. So fix the doc to be more precise to avoid any confusion to the readers of the code. Cc: Masami Hiramatsu Cc: Ard Biesheuvel Cc: Qi Liu Signed-off-by: Barry Song Reviewed-by: Masami Hiramatsu Link: https://lore.kernel.org/r/20210730125131.13724-1-song.bao.hua@hisilicon.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 9 ++++++--- arch/arm64/kernel/kaslr.c | 4 +++- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b5b13a932561..fdcd54d39c1e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1800,11 +1800,11 @@ config RANDOMIZE_BASE If unsure, say N. config RANDOMIZE_MODULE_REGION_FULL - bool "Randomize the module region over a 4 GB range" + bool "Randomize the module region over a 2 GB range" depends on RANDOMIZE_BASE default y help - Randomizes the location of the module region inside a 4 GB window + Randomizes the location of the module region inside a 2 GB window covering the core kernel. This way, it is less likely for modules to leak information about the location of core kernel data structures but it does imply that function calls between modules and the core @@ -1812,7 +1812,10 @@ config RANDOMIZE_MODULE_REGION_FULL When this option is not set, the module region will be randomized over a limited range that contains the [_stext, _etext] interval of the - core kernel, so branch relocations are always in range. + core kernel, so branch relocations are almost always in range unless + ARM64_MODULE_PLTS is enabled and the region is exhausted. In this + particular case of region exhaustion, modules might be able to fall + back to a larger 2GB area. config CC_HAVE_STACKPROTECTOR_SYSREG def_bool $(cc-option,-mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=0) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index cfa2cfde3019..418b2bba1521 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -162,7 +162,9 @@ u64 __init kaslr_early_init(void) * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE, * _stext) . This guarantees that the resulting region still * covers [_stext, _etext], and that all relative branches can - * be resolved without veneers. + * be resolved without veneers unless this region is exhausted + * and we fall back to a larger 2GB window in module_alloc() + * when ARM64_MODULE_PLTS is enabled. */ module_range = MODULES_VSIZE - (u64)(_etext - _stext); module_alloc_base = (u64)_etext + offset - MODULES_VSIZE; -- cgit From 8d5903f457145e3fcd858578b065d667822d99ac Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 2 Aug 2021 17:48:44 +0100 Subject: arm64: stacktrace: fix comment Due to a copy-paste error, we describe struct stackframe::pc as a snapshot of the `fp` field rather than the `lr` field. Fix the comment. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Madhavan T. Venkataraman Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20210802164845.45506-2-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/stacktrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 1801399204d7..8aebc00c1718 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -35,7 +35,7 @@ struct stack_info { * accounting information necessary for robust unwinding. * * @fp: The fp value in the frame record (or the real fp) - * @pc: The fp value in the frame record (or the real lr) + * @pc: The lr value in the frame record (or the real lr) * * @stacks_done: Stacks which have been entirely unwound, for which it is no * longer valid to unwind to. -- cgit From 0c32706dac1b0a72713184246952ab0f54327c21 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 2 Aug 2021 17:48:45 +0100 Subject: arm64: stacktrace: avoid tracing arch_stack_walk() When the function_graph tracer is in use, arch_stack_walk() may unwind the stack incorrectly, erroneously reporting itself, missing the final entry which is being traced, and reporting all traced entries between these off-by-one from where they should be. When ftrace hooks a function return, the original return address is saved to the fgraph ret_stack, and the return address in the LR (or the function's frame record) is replaced with `return_to_handler`. When arm64's unwinder encounter frames returning to `return_to_handler`, it finds the associated original return address from the fgraph ret stack, assuming the most recent `ret_to_hander` entry on the stack corresponds to the most recent entry in the fgraph ret stack, and so on. When arch_stack_walk() is used to dump the current task's stack, it starts from the caller of arch_stack_walk(). However, arch_stack_walk() can be traced, and so may push an entry on to the fgraph ret stack, leaving the fgraph ret stack offset by one from the expected position. This can be seen when dumping the stack via /proc/self/stack, where enabling the graph tracer results in an unexpected `stack_trace_save_tsk` entry at the start of the trace, and `el0_svc` missing form the end of the trace. This patch fixes this by marking arch_stack_walk() as notrace, as we do for all other functions on the path to ftrace_graph_get_ret_stack(). While a few helper functions are not marked notrace, their calls/returns are balanced, and will have no observable effect when examining the fgraph ret stack. It is possible for an exeption boundary to cause a similar offset if the return address of the interrupted context was in the LR. Fixing those cases will require some more substantial rework, and is left for subsequent patches. Before: | # cat /proc/self/stack | [<0>] proc_pid_stack+0xc4/0x140 | [<0>] proc_single_show+0x6c/0x120 | [<0>] seq_read_iter+0x240/0x4e0 | [<0>] seq_read+0xe8/0x140 | [<0>] vfs_read+0xb8/0x1e4 | [<0>] ksys_read+0x74/0x100 | [<0>] __arm64_sys_read+0x28/0x3c | [<0>] invoke_syscall+0x50/0x120 | [<0>] el0_svc_common.constprop.0+0xc4/0xd4 | [<0>] do_el0_svc+0x30/0x9c | [<0>] el0_svc+0x2c/0x54 | [<0>] el0t_64_sync_handler+0x1a8/0x1b0 | [<0>] el0t_64_sync+0x198/0x19c | # echo function_graph > /sys/kernel/tracing/current_tracer | # cat /proc/self/stack | [<0>] stack_trace_save_tsk+0xa4/0x110 | [<0>] proc_pid_stack+0xc4/0x140 | [<0>] proc_single_show+0x6c/0x120 | [<0>] seq_read_iter+0x240/0x4e0 | [<0>] seq_read+0xe8/0x140 | [<0>] vfs_read+0xb8/0x1e4 | [<0>] ksys_read+0x74/0x100 | [<0>] __arm64_sys_read+0x28/0x3c | [<0>] invoke_syscall+0x50/0x120 | [<0>] el0_svc_common.constprop.0+0xc4/0xd4 | [<0>] do_el0_svc+0x30/0x9c | [<0>] el0t_64_sync_handler+0x1a8/0x1b0 | [<0>] el0t_64_sync+0x198/0x19c After: | # cat /proc/self/stack | [<0>] proc_pid_stack+0xc4/0x140 | [<0>] proc_single_show+0x6c/0x120 | [<0>] seq_read_iter+0x240/0x4e0 | [<0>] seq_read+0xe8/0x140 | [<0>] vfs_read+0xb8/0x1e4 | [<0>] ksys_read+0x74/0x100 | [<0>] __arm64_sys_read+0x28/0x3c | [<0>] invoke_syscall+0x50/0x120 | [<0>] el0_svc_common.constprop.0+0xc4/0xd4 | [<0>] do_el0_svc+0x30/0x9c | [<0>] el0_svc+0x2c/0x54 | [<0>] el0t_64_sync_handler+0x1a8/0x1b0 | [<0>] el0t_64_sync+0x198/0x19c | # echo function_graph > /sys/kernel/tracing/current_tracer | # cat /proc/self/stack | [<0>] proc_pid_stack+0xc4/0x140 | [<0>] proc_single_show+0x6c/0x120 | [<0>] seq_read_iter+0x240/0x4e0 | [<0>] seq_read+0xe8/0x140 | [<0>] vfs_read+0xb8/0x1e4 | [<0>] ksys_read+0x74/0x100 | [<0>] __arm64_sys_read+0x28/0x3c | [<0>] invoke_syscall+0x50/0x120 | [<0>] el0_svc_common.constprop.0+0xc4/0xd4 | [<0>] do_el0_svc+0x30/0x9c | [<0>] el0_svc+0x2c/0x54 | [<0>] el0t_64_sync_handler+0x1a8/0x1b0 | [<0>] el0t_64_sync+0x198/0x19c Cc: Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Madhavan T. Venkataraman Cc: Mark Brown Cc: Will Deacon Reviwed-by: Mark Brown Link: https://lore.kernel.org/r/20210802164845.45506-3-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index b83c8d911930..8982a2b78acf 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -218,7 +218,7 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) #ifdef CONFIG_STACKTRACE -noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, +noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) { -- cgit From d82158fa6df4237c9859b27d719c53b4fe09e69e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 30 Jul 2021 12:24:37 +0100 Subject: arm64: Implement task_cpu_possible_mask() Provide an implementation of task_cpu_possible_mask() so that we can prevent 64-bit-only cores being added to the 'cpus_mask' for compat tasks on systems with mismatched 32-bit support at EL0, Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20210730112443.23245-11-will@kernel.org --- arch/arm64/include/asm/mmu_context.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index eeb210997149..f4ba93d4ffeb 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -231,6 +231,19 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, update_saved_ttbr0(tsk, next); } +static inline const struct cpumask * +task_cpu_possible_mask(struct task_struct *p) +{ + if (!static_branch_unlikely(&arm64_mismatched_32bit_el0)) + return cpu_possible_mask; + + if (!is_compat_thread(task_thread_info(p))) + return cpu_possible_mask; + + return system_32bit_el0_cpumask(); +} +#define task_cpu_possible_mask task_cpu_possible_mask + void verify_cpu_asid_bits(void); void post_ttbr_update_workaround(void); -- cgit From 08cd8f4112dbd33bbfe1112dd6e9f0a228a8e132 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 30 Jul 2021 12:24:38 +0100 Subject: arm64: exec: Adjust affinity for compat tasks with mismatched 32-bit EL0 When exec'ing a 32-bit task on a system with mismatched support for 32-bit EL0, try to ensure that it starts life on a CPU that can actually run it. Similarly, when exec'ing a 64-bit task on such a system, try to restore the old affinity mask if it was previously restricted. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Bristot de Oliveira Reviewed-by: Quentin Perret Link: https://lore.kernel.org/r/20210730112443.23245-12-will@kernel.org --- arch/arm64/include/asm/elf.h | 6 ++---- arch/arm64/kernel/process.c | 39 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 5 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 8d1c8dcb87fd..97932fbf973d 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -213,10 +213,8 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; /* AArch32 EABI. */ #define EF_ARM_EABI_MASK 0xff000000 -#define compat_elf_check_arch(x) (system_supports_32bit_el0() && \ - ((x)->e_machine == EM_ARM) && \ - ((x)->e_flags & EF_ARM_EABI_MASK)) - +int compat_elf_check_arch(const struct elf32_hdr *); +#define compat_elf_check_arch compat_elf_check_arch #define compat_start_thread compat_start_thread /* * Unlike the native SET_PERSONALITY macro, the compat version maintains diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index c8989b999250..583ee58f8c9c 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -579,6 +580,28 @@ unsigned long arch_align_stack(unsigned long sp) return sp & ~0xf; } +#ifdef CONFIG_COMPAT +int compat_elf_check_arch(const struct elf32_hdr *hdr) +{ + if (!system_supports_32bit_el0()) + return false; + + if ((hdr)->e_machine != EM_ARM) + return false; + + if (!((hdr)->e_flags & EF_ARM_EABI_MASK)) + return false; + + /* + * Prevent execve() of a 32-bit program from a deadline task + * if the restricted affinity mask would be inadmissible on an + * asymmetric system. + */ + return !static_branch_unlikely(&arm64_mismatched_32bit_el0) || + !dl_task_check_affinity(current, system_32bit_el0_cpumask()); +} +#endif + /* * Called from setup_new_exec() after (COMPAT_)SET_PERSONALITY. */ @@ -588,8 +611,22 @@ void arch_setup_new_exec(void) if (is_compat_task()) { mmflags = MMCF_AARCH32; - if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) + + /* + * Restrict the CPU affinity mask for a 32-bit task so that + * it contains only 32-bit-capable CPUs. + * + * From the perspective of the task, this looks similar to + * what would happen if the 64-bit-only CPUs were hot-unplugged + * at the point of execve(), although we try a bit harder to + * honour the cpuset hierarchy. + */ + if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) { + force_compatible_cpus_allowed_ptr(current); set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); + } + } else if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) { + relax_compatible_cpus_allowed_ptr(current); } current->mm->context.flags = mmflags; -- cgit From df950811f4a884d08f05e22a8e0089d54bb4ba70 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 30 Jul 2021 12:24:39 +0100 Subject: arm64: Prevent offlining first CPU with 32-bit EL0 on mismatched system If we want to support 32-bit applications, then when we identify a CPU with mismatched 32-bit EL0 support we must ensure that we will always have an active 32-bit CPU available to us from then on. This is important for the scheduler, because is_cpu_allowed() will be constrained to 32-bit CPUs for compat tasks and forced migration due to a hotplug event will hang if no 32-bit CPUs are available. On detecting a mismatch, prevent offlining of either the mismatching CPU if it is 32-bit capable, or find the first active 32-bit capable CPU otherwise. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20210730112443.23245-13-will@kernel.org --- arch/arm64/kernel/cpufeature.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 0ead8bfedf20..0af584549499 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2901,15 +2901,38 @@ void __init setup_cpu_features(void) static int enable_mismatched_32bit_el0(unsigned int cpu) { + /* + * The first 32-bit-capable CPU we detected and so can no longer + * be offlined by userspace. -1 indicates we haven't yet onlined + * a 32-bit-capable CPU. + */ + static int lucky_winner = -1; + struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu); bool cpu_32bit = id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0); if (cpu_32bit) { cpumask_set_cpu(cpu, cpu_32bit_el0_mask); static_branch_enable_cpuslocked(&arm64_mismatched_32bit_el0); - setup_elf_hwcaps(compat_elf_hwcaps); } + if (cpumask_test_cpu(0, cpu_32bit_el0_mask) == cpu_32bit) + return 0; + + if (lucky_winner >= 0) + return 0; + + /* + * We've detected a mismatch. We need to keep one of our CPUs with + * 32-bit EL0 online so that is_cpu_allowed() doesn't end up rejecting + * every CPU in the system for a 32-bit task. + */ + lucky_winner = cpu_32bit ? cpu : cpumask_any_and(cpu_32bit_el0_mask, + cpu_active_mask); + get_cpu_device(lucky_winner)->offline_disabled = true; + setup_elf_hwcaps(compat_elf_hwcaps); + pr_info("Asymmetric 32-bit EL0 support detected on CPU %u; CPU hot-unplug disabled on CPU %u\n", + cpu, lucky_winner); return 0; } -- cgit From 7af33504d1c8077b40121294b5eb6e680a859468 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 30 Jul 2021 12:24:40 +0100 Subject: arm64: Advertise CPUs capable of running 32-bit applications in sysfs Since 32-bit applications will be killed if they are caught trying to execute on a 64-bit-only CPU in a mismatched system, advertise the set of 32-bit capable CPUs to userspace in sysfs. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Greg Kroah-Hartman Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20210730112443.23245-14-will@kernel.org --- arch/arm64/kernel/cpufeature.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 0af584549499..0a9bc7eff26e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -67,6 +67,7 @@ #include #include #include +#include #include #include #include @@ -1321,6 +1322,24 @@ const struct cpumask *system_32bit_el0_cpumask(void) return cpu_possible_mask; } +static ssize_t aarch32_el0_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const struct cpumask *mask = system_32bit_el0_cpumask(); + + return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(mask)); +} +static const DEVICE_ATTR_RO(aarch32_el0); + +static int __init aarch32_el0_sysfs_init(void) +{ + if (!allow_mismatched_32bit_el0) + return 0; + + return device_create_file(cpu_subsys.dev_root, &dev_attr_aarch32_el0); +} +device_initcall(aarch32_el0_sysfs_init); + static bool has_32bit_el0(const struct arm64_cpu_capabilities *entry, int scope) { if (!has_cpuid_feature(entry, scope)) -- cgit From ead7de462ae56b2d2e56fb5a414f6e916dddc4c9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 30 Jul 2021 12:24:41 +0100 Subject: arm64: Hook up cmdline parameter to allow mismatched 32-bit EL0 Allow systems with mismatched 32-bit support at EL0 to run 32-bit applications based on a new kernel parameter. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20210730112443.23245-15-will@kernel.org --- arch/arm64/kernel/cpufeature.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 0a9bc7eff26e..77b2149c26b8 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1322,6 +1322,13 @@ const struct cpumask *system_32bit_el0_cpumask(void) return cpu_possible_mask; } +static int __init parse_32bit_el0_param(char *str) +{ + allow_mismatched_32bit_el0 = true; + return 0; +} +early_param("allow_mismatched_32bit_el0", parse_32bit_el0_param); + static ssize_t aarch32_el0_show(struct device *dev, struct device_attribute *attr, char *buf) { -- cgit From 94f9c00f6460c0b175226c8fe6fd137547b239bd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 30 Jul 2021 12:24:42 +0100 Subject: arm64: Remove logic to kill 32-bit tasks on 64-bit-only cores The scheduler now knows enough about these braindead systems to place 32-bit tasks accordingly, so throw out the safety checks and allow the ret-to-user path to avoid do_notify_resume() if there is nothing to do. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20210730112443.23245-16-will@kernel.org --- arch/arm64/kernel/process.c | 14 +------------- arch/arm64/kernel/signal.c | 26 -------------------------- 2 files changed, 1 insertion(+), 39 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 583ee58f8c9c..e0e7f4e9b607 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -469,15 +469,6 @@ static void erratum_1418040_thread_switch(struct task_struct *prev, write_sysreg(val, cntkctl_el1); } -static void compat_thread_switch(struct task_struct *next) -{ - if (!is_compat_thread(task_thread_info(next))) - return; - - if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) - set_tsk_thread_flag(next, TIF_NOTIFY_RESUME); -} - static void update_sctlr_el1(u64 sctlr) { /* @@ -519,7 +510,6 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, ssbs_thread_switch(next); erratum_1418040_thread_switch(prev, next); ptrauth_thread_switch_user(next); - compat_thread_switch(next); /* * Complete any pending TLB or cache maintenance on this CPU in case @@ -621,10 +611,8 @@ void arch_setup_new_exec(void) * at the point of execve(), although we try a bit harder to * honour the cpuset hierarchy. */ - if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) { + if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) force_compatible_cpus_allowed_ptr(current); - set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); - } } else if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) { relax_compatible_cpus_allowed_ptr(current); } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 23036334f4dc..22899c86711a 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -912,19 +912,6 @@ static void do_signal(struct pt_regs *regs) restore_saved_sigmask(); } -static bool cpu_affinity_invalid(struct pt_regs *regs) -{ - if (!compat_user_mode(regs)) - return false; - - /* - * We're preemptible, but a reschedule will cause us to check the - * affinity again. - */ - return !cpumask_test_cpu(raw_smp_processor_id(), - system_32bit_el0_cpumask()); -} - asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) { @@ -952,19 +939,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, if (thread_flags & _TIF_NOTIFY_RESUME) { tracehook_notify_resume(regs); rseq_handle_notify_resume(NULL, regs); - - /* - * If we reschedule after checking the affinity - * then we must ensure that TIF_NOTIFY_RESUME - * is set so that we check the affinity again. - * Since tracehook_notify_resume() clears the - * flag, ensure that the compiler doesn't move - * it after the affinity check. - */ - barrier(); - - if (cpu_affinity_invalid(regs)) - force_sig(SIGKILL); } if (thread_flags & _TIF_FOREIGN_FPSTATE) -- cgit