summaryrefslogtreecommitdiff
path: root/arch/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/Kconfig34
-rw-r--r--arch/arm64/Makefile2
-rw-r--r--arch/arm64/boot/dts/microchip/sparx5.dtsi94
-rw-r--r--arch/arm64/boot/dts/microchip/sparx5_pcb134_board.dtsi481
-rw-r--r--arch/arm64/boot/dts/microchip/sparx5_pcb135_board.dtsi621
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3308.dtsi22
-rw-r--r--arch/arm64/crypto/Makefile10
-rw-r--r--arch/arm64/crypto/poly1305-core.S_shipped835
-rw-r--r--arch/arm64/crypto/sha256-core.S_shipped2069
-rw-r--r--arch/arm64/crypto/sha512-core.S_shipped1093
-rw-r--r--arch/arm64/include/asm/alternative-macros.h9
-rw-r--r--arch/arm64/include/asm/arch_gicv3.h3
-rw-r--r--arch/arm64/include/asm/asm-prototypes.h6
-rw-r--r--arch/arm64/include/asm/asm_pointer_auth.h49
-rw-r--r--arch/arm64/include/asm/assembler.h98
-rw-r--r--arch/arm64/include/asm/atomic.h2
-rw-r--r--arch/arm64/include/asm/cache.h2
-rw-r--r--arch/arm64/include/asm/cacheflush.h71
-rw-r--r--arch/arm64/include/asm/compiler.h16
-rw-r--r--arch/arm64/include/asm/cpu.h45
-rw-r--r--arch/arm64/include/asm/cpufeature.h15
-rw-r--r--arch/arm64/include/asm/cpuidle.h35
-rw-r--r--arch/arm64/include/asm/efi.h2
-rw-r--r--arch/arm64/include/asm/exception.h34
-rw-r--r--arch/arm64/include/asm/fpsimd.h2
-rw-r--r--arch/arm64/include/asm/fpsimdmacros.h4
-rw-r--r--arch/arm64/include/asm/insn-def.h9
-rw-r--r--arch/arm64/include/asm/insn.h67
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h19
-rw-r--r--arch/arm64/include/asm/kvm_arm.h3
-rw-r--r--arch/arm64/include/asm/kvm_asm.h1
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h3
-rw-r--r--arch/arm64/include/asm/kvm_host.h23
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h17
-rw-r--r--arch/arm64/include/asm/kvm_mte.h66
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h42
-rw-r--r--arch/arm64/include/asm/linkage.h8
-rw-r--r--arch/arm64/include/asm/memory.h22
-rw-r--r--arch/arm64/include/asm/mmu_context.h4
-rw-r--r--arch/arm64/include/asm/module.lds.h17
-rw-r--r--arch/arm64/include/asm/mte-def.h1
-rw-r--r--arch/arm64/include/asm/mte-kasan.h93
-rw-r--r--arch/arm64/include/asm/mte.h8
-rw-r--r--arch/arm64/include/asm/page.h10
-rw-r--r--arch/arm64/include/asm/patching.h13
-rw-r--r--arch/arm64/include/asm/perf_event.h5
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h7
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h1
-rw-r--r--arch/arm64/include/asm/pgtable.h27
-rw-r--r--arch/arm64/include/asm/pointer_auth.h59
-rw-r--r--arch/arm64/include/asm/preempt.h2
-rw-r--r--arch/arm64/include/asm/processor.h16
-rw-r--r--arch/arm64/include/asm/scs.h8
-rw-r--r--arch/arm64/include/asm/sdei.h10
-rw-r--r--arch/arm64/include/asm/smp.h2
-rw-r--r--arch/arm64/include/asm/stacktrace.h32
-rw-r--r--arch/arm64/include/asm/sysreg.h5
-rw-r--r--arch/arm64/include/asm/tlb.h4
-rw-r--r--arch/arm64/include/asm/unistd32.h3
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h11
-rw-r--r--arch/arm64/kernel/Makefile11
-rw-r--r--arch/arm64/kernel/acpi.c22
-rw-r--r--arch/arm64/kernel/alternative.c2
-rw-r--r--arch/arm64/kernel/asm-offsets.c17
-rw-r--r--arch/arm64/kernel/cpufeature.c210
-rw-r--r--arch/arm64/kernel/cpuinfo.c58
-rw-r--r--arch/arm64/kernel/efi-entry.S9
-rw-r--r--arch/arm64/kernel/entry-common.c256
-rw-r--r--arch/arm64/kernel/entry-fpsimd.S22
-rw-r--r--arch/arm64/kernel/entry.S369
-rw-r--r--arch/arm64/kernel/fpsimd.c6
-rw-r--r--arch/arm64/kernel/ftrace.c1
-rw-r--r--arch/arm64/kernel/head.S76
-rw-r--r--arch/arm64/kernel/hibernate-asm.S7
-rw-r--r--arch/arm64/kernel/hibernate.c20
-rw-r--r--arch/arm64/kernel/idle.c46
-rw-r--r--arch/arm64/kernel/idreg-override.c3
-rw-r--r--arch/arm64/kernel/image-vars.h2
-rw-r--r--arch/arm64/kernel/jump_label.c1
-rw-r--r--arch/arm64/kernel/kaslr.c12
-rw-r--r--arch/arm64/kernel/kgdb.c1
-rw-r--r--arch/arm64/kernel/machine_kexec.c30
-rw-r--r--arch/arm64/kernel/mte.c18
-rw-r--r--arch/arm64/kernel/patching.c150
-rw-r--r--arch/arm64/kernel/perf_callchain.c2
-rw-r--r--arch/arm64/kernel/perf_event.c40
-rw-r--r--arch/arm64/kernel/probes/kprobes.c35
-rw-r--r--arch/arm64/kernel/probes/simulate-insn.c1
-rw-r--r--arch/arm64/kernel/probes/uprobes.c2
-rw-r--r--arch/arm64/kernel/process.c101
-rw-r--r--arch/arm64/kernel/ptrace.c2
-rw-r--r--arch/arm64/kernel/sdei.c64
-rw-r--r--arch/arm64/kernel/setup.c8
-rw-r--r--arch/arm64/kernel/signal.c26
-rw-r--r--arch/arm64/kernel/smccc-call.S83
-rw-r--r--arch/arm64/kernel/smp.c17
-rw-r--r--arch/arm64/kernel/smp_spin_table.c7
-rw-r--r--arch/arm64/kernel/stacktrace.c18
-rw-r--r--arch/arm64/kernel/suspend.c12
-rw-r--r--arch/arm64/kernel/sys_compat.c2
-rw-r--r--arch/arm64/kernel/traps.c135
-rw-r--r--arch/arm64/kvm/Kconfig5
-rw-r--r--arch/arm64/kvm/Makefile2
-rw-r--r--arch/arm64/kvm/arch_timer.c162
-rw-r--r--arch/arm64/kvm/arm.c33
-rw-r--r--arch/arm64/kvm/guest.c134
-rw-r--r--arch/arm64/kvm/hyp/entry.S7
-rw-r--r--arch/arm64/kvm/hyp/exception.c3
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S6
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h21
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/gfp.h45
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mem_protect.h2
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/memory.h7
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mm.h13
-rw-r--r--arch/arm64/kvm/hyp/nvhe/cache.S4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c60
-rw-r--r--arch/arm64/kvm/hyp/nvhe/page_alloc.c112
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c33
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c2
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c61
-rw-r--r--arch/arm64/kvm/hyp/reserved_mem.c3
-rw-r--r--arch/arm64/kvm/mmu.c196
-rw-r--r--arch/arm64/kvm/pmu-emul.c4
-rw-r--r--arch/arm64/kvm/reset.c4
-rw-r--r--arch/arm64/kvm/sys_regs.c32
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c36
-rw-r--r--arch/arm64/kvm/vgic/vgic-v2.c19
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c19
-rw-r--r--arch/arm64/kvm/vgic/vgic.c14
-rw-r--r--arch/arm64/lib/Makefile4
-rw-r--r--arch/arm64/lib/clear_user.S47
-rw-r--r--arch/arm64/lib/insn.c (renamed from arch/arm64/kernel/insn.c)249
-rw-r--r--arch/arm64/lib/kasan_sw_tags.S76
-rw-r--r--arch/arm64/lib/memchr.S65
-rw-r--r--arch/arm64/lib/memcmp.S346
-rw-r--r--arch/arm64/lib/memcpy.S272
-rw-r--r--arch/arm64/lib/memmove.S189
-rw-r--r--arch/arm64/lib/mte.S20
-rw-r--r--arch/arm64/lib/strcmp.S289
-rw-r--r--arch/arm64/lib/strlen.S258
-rw-r--r--arch/arm64/lib/strncmp.S406
-rw-r--r--arch/arm64/lib/uaccess_flushcache.c4
-rw-r--r--arch/arm64/mm/cache.S158
-rw-r--r--arch/arm64/mm/context.c6
-rw-r--r--arch/arm64/mm/fault.c50
-rw-r--r--arch/arm64/mm/flush.c29
-rw-r--r--arch/arm64/mm/init.c7
-rw-r--r--arch/arm64/mm/mmu.c13
-rw-r--r--arch/arm64/mm/proc.S24
-rw-r--r--arch/arm64/mm/ptdump.c4
-rw-r--r--arch/arm64/net/bpf_jit_comp.c20
-rw-r--r--arch/arm64/tools/cpucaps3
152 files changed, 5011 insertions, 6743 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 39b378ee56ce..e07e7de9ac49 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -94,6 +94,7 @@ config ARM64
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
select ARCH_WANT_LD_ORPHAN_WARN
+ select ARCH_WANTS_NO_INSTR
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARM_AMBA
select ARM_ARCH_TIMER
@@ -1470,12 +1471,6 @@ menu "ARMv8.3 architectural features"
config ARM64_PTR_AUTH
bool "Enable support for pointer authentication"
default y
- depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC
- # Modern compilers insert a .note.gnu.property section note for PAC
- # which is only understood by binutils starting with version 2.33.1.
- depends on LD_IS_LLD || LD_VERSION >= 23301 || (CC_IS_GCC && GCC_VERSION < 90100)
- depends on !CC_IS_CLANG || AS_HAS_CFI_NEGATE_RA_STATE
- depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS)
help
Pointer authentication (part of the ARMv8.3 Extensions) provides
instructions for signing and authenticating pointers against secret
@@ -1487,13 +1482,6 @@ config ARM64_PTR_AUTH
for each process at exec() time, with these keys being
context-switched along with the process.
- If the compiler supports the -mbranch-protection or
- -msign-return-address flag (e.g. GCC 7 or later), then this option
- will also cause the kernel itself to be compiled with return address
- protection. In this case, and if the target hardware is known to
- support pointer authentication, then CONFIG_STACKPROTECTOR can be
- disabled with minimal loss of protection.
-
The feature is detected at runtime. If the feature is not present in
hardware it will not be advertised to userspace/KVM guest nor will it
be enabled.
@@ -1504,6 +1492,24 @@ config ARM64_PTR_AUTH
but with the feature disabled. On such a system, this option should
not be selected.
+config ARM64_PTR_AUTH_KERNEL
+ bool "Use pointer authentication for kernel"
+ default y
+ depends on ARM64_PTR_AUTH
+ depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC
+ # Modern compilers insert a .note.gnu.property section note for PAC
+ # which is only understood by binutils starting with version 2.33.1.
+ depends on LD_IS_LLD || LD_VERSION >= 23301 || (CC_IS_GCC && GCC_VERSION < 90100)
+ depends on !CC_IS_CLANG || AS_HAS_CFI_NEGATE_RA_STATE
+ depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS)
+ help
+ If the compiler supports the -mbranch-protection or
+ -msign-return-address flag (e.g. GCC 7 or later), then this option
+ will cause the kernel itself to be compiled with return address
+ protection. In this case, and if the target hardware is known to
+ support pointer authentication, then CONFIG_STACKPROTECTOR can be
+ disabled with minimal loss of protection.
+
This feature works with FUNCTION_GRAPH_TRACER option only if
DYNAMIC_FTRACE_WITH_REGS is enabled.
@@ -1595,7 +1601,7 @@ config ARM64_BTI_KERNEL
bool "Use Branch Target Identification for kernel"
default y
depends on ARM64_BTI
- depends on ARM64_PTR_AUTH
+ depends on ARM64_PTR_AUTH_KERNEL
depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697
depends on !CC_IS_GCC || GCC_VERSION >= 100100
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index b52481f0605d..3b5b1c480449 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -70,7 +70,7 @@ endif
# off, this will be overridden if we are using branch protection.
branch-prot-flags-y += $(call cc-option,-mbranch-protection=none)
-ifeq ($(CONFIG_ARM64_PTR_AUTH),y)
+ifeq ($(CONFIG_ARM64_PTR_AUTH_KERNEL),y)
branch-prot-flags-$(CONFIG_CC_HAS_SIGN_RETURN_ADDRESS) := -msign-return-address=all
# We enable additional protection for leaf functions as there is some
# narrow potential for ROP protection benefits and no substantial
diff --git a/arch/arm64/boot/dts/microchip/sparx5.dtsi b/arch/arm64/boot/dts/microchip/sparx5.dtsi
index d64621d1213b..ad07fff40544 100644
--- a/arch/arm64/boot/dts/microchip/sparx5.dtsi
+++ b/arch/arm64/boot/dts/microchip/sparx5.dtsi
@@ -135,9 +135,12 @@
};
};
- reset@611010008 {
- compatible = "microchip,sparx5-chip-reset";
+ reset: reset-controller@611010008 {
+ compatible = "microchip,sparx5-switch-reset";
reg = <0x6 0x11010008 0x4>;
+ reg-names = "gcb";
+ #reset-cells = <1>;
+ cpu-syscon = <&cpu_ctrl>;
};
uart0: serial@600100000 {
@@ -275,6 +278,21 @@
"GPIO_46", "GPIO_47";
function = "emmc";
};
+
+ miim1_pins: miim1-pins {
+ pins = "GPIO_56", "GPIO_57";
+ function = "miim";
+ };
+
+ miim2_pins: miim2-pins {
+ pins = "GPIO_58", "GPIO_59";
+ function = "miim";
+ };
+
+ miim3_pins: miim3-pins {
+ pins = "GPIO_52", "GPIO_53";
+ function = "miim";
+ };
};
sgpio0: gpio@61101036c {
@@ -285,6 +303,8 @@
clocks = <&sys_clk>;
pinctrl-0 = <&sgpio0_pins>;
pinctrl-names = "default";
+ resets = <&reset 0>;
+ reset-names = "switch";
reg = <0x6 0x1101036c 0x100>;
sgpio_in0: gpio@0 {
compatible = "microchip,sparx5-sgpio-bank";
@@ -292,6 +312,9 @@
gpio-controller;
#gpio-cells = <3>;
ngpios = <96>;
+ interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-controller;
+ #interrupt-cells = <3>;
};
sgpio_out0: gpio@1 {
compatible = "microchip,sparx5-sgpio-bank";
@@ -310,6 +333,8 @@
clocks = <&sys_clk>;
pinctrl-0 = <&sgpio1_pins>;
pinctrl-names = "default";
+ resets = <&reset 0>;
+ reset-names = "switch";
reg = <0x6 0x11010484 0x100>;
sgpio_in1: gpio@0 {
compatible = "microchip,sparx5-sgpio-bank";
@@ -317,6 +342,9 @@
gpio-controller;
#gpio-cells = <3>;
ngpios = <96>;
+ interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-controller;
+ #interrupt-cells = <3>;
};
sgpio_out1: gpio@1 {
compatible = "microchip,sparx5-sgpio-bank";
@@ -335,6 +363,8 @@
clocks = <&sys_clk>;
pinctrl-0 = <&sgpio2_pins>;
pinctrl-names = "default";
+ resets = <&reset 0>;
+ reset-names = "switch";
reg = <0x6 0x1101059c 0x100>;
sgpio_in2: gpio@0 {
reg = <0>;
@@ -342,6 +372,9 @@
gpio-controller;
#gpio-cells = <3>;
ngpios = <96>;
+ interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-controller;
+ #interrupt-cells = <3>;
};
sgpio_out2: gpio@1 {
compatible = "microchip,sparx5-sgpio-bank";
@@ -386,5 +419,62 @@
#thermal-sensor-cells = <0>;
clocks = <&ahb_clk>;
};
+
+ mdio0: mdio@6110102b0 {
+ compatible = "mscc,ocelot-miim";
+ status = "disabled";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x6 0x110102b0 0x24>;
+ };
+
+ mdio1: mdio@6110102d4 {
+ compatible = "mscc,ocelot-miim";
+ status = "disabled";
+ pinctrl-0 = <&miim1_pins>;
+ pinctrl-names = "default";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x6 0x110102d4 0x24>;
+ };
+
+ mdio2: mdio@6110102f8 {
+ compatible = "mscc,ocelot-miim";
+ status = "disabled";
+ pinctrl-0 = <&miim2_pins>;
+ pinctrl-names = "default";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x6 0x110102d4 0x24>;
+ };
+
+ mdio3: mdio@61101031c {
+ compatible = "mscc,ocelot-miim";
+ status = "disabled";
+ pinctrl-0 = <&miim3_pins>;
+ pinctrl-names = "default";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x6 0x1101031c 0x24>;
+ };
+
+ serdes: serdes@10808000 {
+ compatible = "microchip,sparx5-serdes";
+ #phy-cells = <1>;
+ clocks = <&sys_clk>;
+ reg = <0x6 0x10808000 0x5d0000>;
+ };
+
+ switch: switch@0x600000000 {
+ compatible = "microchip,sparx5-switch";
+ reg = <0x6 0 0x401000>,
+ <0x6 0x10004000 0x7fc000>,
+ <0x6 0x11010000 0xaf0000>;
+ reg-names = "cpu", "dev", "gcb";
+ interrupt-names = "xtr";
+ interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
+ resets = <&reset 0>;
+ reset-names = "switch";
+ };
};
};
diff --git a/arch/arm64/boot/dts/microchip/sparx5_pcb134_board.dtsi b/arch/arm64/boot/dts/microchip/sparx5_pcb134_board.dtsi
index f0c915160990..33faf1f3264f 100644
--- a/arch/arm64/boot/dts/microchip/sparx5_pcb134_board.dtsi
+++ b/arch/arm64/boot/dts/microchip/sparx5_pcb134_board.dtsi
@@ -7,30 +7,6 @@
#include "sparx5_pcb_common.dtsi"
/{
- aliases {
- i2c0 = &i2c0;
- i2c100 = &i2c100;
- i2c101 = &i2c101;
- i2c102 = &i2c102;
- i2c103 = &i2c103;
- i2c104 = &i2c104;
- i2c105 = &i2c105;
- i2c106 = &i2c106;
- i2c107 = &i2c107;
- i2c108 = &i2c108;
- i2c109 = &i2c109;
- i2c110 = &i2c110;
- i2c111 = &i2c111;
- i2c112 = &i2c112;
- i2c113 = &i2c113;
- i2c114 = &i2c114;
- i2c115 = &i2c115;
- i2c116 = &i2c116;
- i2c117 = &i2c117;
- i2c118 = &i2c118;
- i2c119 = &i2c119;
- };
-
gpio-restart {
compatible = "gpio-restart";
gpios = <&gpio 37 GPIO_ACTIVE_LOW>;
@@ -298,17 +274,10 @@
&spi0 {
status = "okay";
- spi@0 {
- compatible = "spi-mux";
- mux-controls = <&mux>;
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <0>; /* CS0 */
- spi-flash@9 {
- compatible = "jedec,spi-nor";
- spi-max-frequency = <8000000>;
- reg = <0x9>; /* SPI */
- };
+ spi-flash@0 {
+ compatible = "jedec,spi-nor";
+ spi-max-frequency = <8000000>;
+ reg = <0>;
};
};
@@ -328,6 +297,33 @@
};
};
+&sgpio0 {
+ status = "okay";
+ microchip,sgpio-port-ranges = <8 15>;
+ gpio@0 {
+ ngpios = <64>;
+ };
+ gpio@1 {
+ ngpios = <64>;
+ };
+};
+
+&sgpio1 {
+ status = "okay";
+ microchip,sgpio-port-ranges = <24 31>;
+ gpio@0 {
+ ngpios = <64>;
+ };
+ gpio@1 {
+ ngpios = <64>;
+ };
+};
+
+&sgpio2 {
+ status = "okay";
+ microchip,sgpio-port-ranges = <0 0>, <11 31>;
+};
+
&gpio {
i2cmux_pins_i: i2cmux-pins-i {
pins = "GPIO_16", "GPIO_17", "GPIO_18", "GPIO_19",
@@ -415,9 +411,9 @@
&i2c0_imux {
pinctrl-names =
- "i2c100", "i2c101", "i2c102", "i2c103",
- "i2c104", "i2c105", "i2c106", "i2c107",
- "i2c108", "i2c109", "i2c110", "i2c111", "idle";
+ "i2c_sfp1", "i2c_sfp2", "i2c_sfp3", "i2c_sfp4",
+ "i2c_sfp5", "i2c_sfp6", "i2c_sfp7", "i2c_sfp8",
+ "i2c_sfp9", "i2c_sfp10", "i2c_sfp11", "i2c_sfp12", "idle";
pinctrl-0 = <&i2cmux_0>;
pinctrl-1 = <&i2cmux_1>;
pinctrl-2 = <&i2cmux_2>;
@@ -431,62 +427,62 @@
pinctrl-10 = <&i2cmux_10>;
pinctrl-11 = <&i2cmux_11>;
pinctrl-12 = <&i2cmux_pins_i>;
- i2c100: i2c_sfp1 {
+ i2c_sfp1: i2c_sfp1 {
reg = <0x0>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c101: i2c_sfp2 {
+ i2c_sfp2: i2c_sfp2 {
reg = <0x1>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c102: i2c_sfp3 {
+ i2c_sfp3: i2c_sfp3 {
reg = <0x2>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c103: i2c_sfp4 {
+ i2c_sfp4: i2c_sfp4 {
reg = <0x3>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c104: i2c_sfp5 {
+ i2c_sfp5: i2c_sfp5 {
reg = <0x4>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c105: i2c_sfp6 {
+ i2c_sfp6: i2c_sfp6 {
reg = <0x5>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c106: i2c_sfp7 {
+ i2c_sfp7: i2c_sfp7 {
reg = <0x6>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c107: i2c_sfp8 {
+ i2c_sfp8: i2c_sfp8 {
reg = <0x7>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c108: i2c_sfp9 {
+ i2c_sfp9: i2c_sfp9 {
reg = <0x8>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c109: i2c_sfp10 {
+ i2c_sfp10: i2c_sfp10 {
reg = <0x9>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c110: i2c_sfp11 {
+ i2c_sfp11: i2c_sfp11 {
reg = <0xa>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c111: i2c_sfp12 {
+ i2c_sfp12: i2c_sfp12 {
reg = <0xb>;
#address-cells = <1>;
#size-cells = <0>;
@@ -499,44 +495,413 @@
&gpio 61 GPIO_ACTIVE_HIGH
&gpio 54 GPIO_ACTIVE_HIGH>;
idle-state = <0x8>;
- i2c112: i2c_sfp13 {
+ i2c_sfp13: i2c_sfp13 {
reg = <0x0>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c113: i2c_sfp14 {
+ i2c_sfp14: i2c_sfp14 {
reg = <0x1>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c114: i2c_sfp15 {
+ i2c_sfp15: i2c_sfp15 {
reg = <0x2>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c115: i2c_sfp16 {
+ i2c_sfp16: i2c_sfp16 {
reg = <0x3>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c116: i2c_sfp17 {
+ i2c_sfp17: i2c_sfp17 {
reg = <0x4>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c117: i2c_sfp18 {
+ i2c_sfp18: i2c_sfp18 {
reg = <0x5>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c118: i2c_sfp19 {
+ i2c_sfp19: i2c_sfp19 {
reg = <0x6>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c119: i2c_sfp20 {
+ i2c_sfp20: i2c_sfp20 {
reg = <0x7>;
#address-cells = <1>;
#size-cells = <0>;
};
};
+
+&mdio3 {
+ status = "ok";
+ phy64: ethernet-phy@64 {
+ reg = <28>;
+ };
+};
+
+&axi {
+ sfp_eth12: sfp-eth12 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp1>;
+ tx-disable-gpios = <&sgpio_out2 11 1 GPIO_ACTIVE_LOW>;
+ los-gpios = <&sgpio_in2 11 1 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 11 2 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 12 0 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth13: sfp-eth13 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp2>;
+ tx-disable-gpios = <&sgpio_out2 12 1 GPIO_ACTIVE_LOW>;
+ los-gpios = <&sgpio_in2 12 1 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 12 2 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 13 0 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth14: sfp-eth14 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp3>;
+ tx-disable-gpios = <&sgpio_out2 13 1 GPIO_ACTIVE_LOW>;
+ los-gpios = <&sgpio_in2 13 1 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 13 2 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 14 0 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth15: sfp-eth15 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp4>;
+ tx-disable-gpios = <&sgpio_out2 14 1 GPIO_ACTIVE_LOW>;
+ los-gpios = <&sgpio_in2 14 1 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 14 2 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 15 0 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth48: sfp-eth48 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp5>;
+ tx-disable-gpios = <&sgpio_out2 15 1 GPIO_ACTIVE_LOW>;
+ los-gpios = <&sgpio_in2 15 1 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 15 2 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 16 0 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth49: sfp-eth49 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp6>;
+ tx-disable-gpios = <&sgpio_out2 16 1 GPIO_ACTIVE_LOW>;
+ los-gpios = <&sgpio_in2 16 1 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 16 2 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 17 0 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth50: sfp-eth50 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp7>;
+ tx-disable-gpios = <&sgpio_out2 17 1 GPIO_ACTIVE_LOW>;
+ los-gpios = <&sgpio_in2 17 1 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 17 2 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 18 0 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth51: sfp-eth51 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp8>;
+ tx-disable-gpios = <&sgpio_out2 18 1 GPIO_ACTIVE_LOW>;
+ los-gpios = <&sgpio_in2 18 1 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 18 2 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 19 0 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth52: sfp-eth52 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp9>;
+ tx-disable-gpios = <&sgpio_out2 19 1 GPIO_ACTIVE_LOW>;
+ los-gpios = <&sgpio_in2 19 1 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 19 2 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 20 0 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth53: sfp-eth53 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp10>;
+ tx-disable-gpios = <&sgpio_out2 20 1 GPIO_ACTIVE_LOW>;
+ los-gpios = <&sgpio_in2 20 1 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 20 2 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 21 0 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth54: sfp-eth54 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp11>;
+ tx-disable-gpios = <&sgpio_out2 21 1 GPIO_ACTIVE_LOW>;
+ los-gpios = <&sgpio_in2 21 1 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 21 2 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 22 0 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth55: sfp-eth55 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp12>;
+ tx-disable-gpios = <&sgpio_out2 22 1 GPIO_ACTIVE_LOW>;
+ los-gpios = <&sgpio_in2 22 1 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 22 2 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 23 0 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth56: sfp-eth56 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp13>;
+ tx-disable-gpios = <&sgpio_out2 23 1 GPIO_ACTIVE_LOW>;
+ los-gpios = <&sgpio_in2 23 1 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 23 2 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 24 0 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth57: sfp-eth57 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp14>;
+ tx-disable-gpios = <&sgpio_out2 24 1 GPIO_ACTIVE_LOW>;
+ los-gpios = <&sgpio_in2 24 1 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 24 2 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 25 0 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth58: sfp-eth58 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp15>;
+ tx-disable-gpios = <&sgpio_out2 25 1 GPIO_ACTIVE_LOW>;
+ los-gpios = <&sgpio_in2 25 1 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 25 2 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 26 0 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth59: sfp-eth59 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp16>;
+ tx-disable-gpios = <&sgpio_out2 26 1 GPIO_ACTIVE_LOW>;
+ los-gpios = <&sgpio_in2 26 1 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 26 2 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 27 0 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth60: sfp-eth60 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp17>;
+ tx-disable-gpios = <&sgpio_out2 27 1 GPIO_ACTIVE_LOW>;
+ los-gpios = <&sgpio_in2 27 1 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 27 2 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 28 0 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth61: sfp-eth61 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp18>;
+ tx-disable-gpios = <&sgpio_out2 28 1 GPIO_ACTIVE_LOW>;
+ los-gpios = <&sgpio_in2 28 1 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 28 2 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 29 0 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth62: sfp-eth62 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp19>;
+ tx-disable-gpios = <&sgpio_out2 29 1 GPIO_ACTIVE_LOW>;
+ los-gpios = <&sgpio_in2 29 1 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 29 2 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 30 0 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth63: sfp-eth63 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp20>;
+ tx-disable-gpios = <&sgpio_out2 30 1 GPIO_ACTIVE_LOW>;
+ los-gpios = <&sgpio_in2 30 1 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 30 2 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 31 0 GPIO_ACTIVE_HIGH>;
+ };
+};
+
+&switch {
+ ethernet-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* 10G SFPs */
+ port12: port@12 {
+ reg = <12>;
+ microchip,bandwidth = <10000>;
+ phys = <&serdes 13>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth12>;
+ microchip,sd-sgpio = <301>;
+ managed = "in-band-status";
+ };
+ port13: port@13 {
+ reg = <13>;
+ /* Example: CU SFP, 1G speed */
+ microchip,bandwidth = <10000>;
+ phys = <&serdes 14>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth13>;
+ microchip,sd-sgpio = <305>;
+ managed = "in-band-status";
+ };
+ port14: port@14 {
+ reg = <14>;
+ microchip,bandwidth = <10000>;
+ phys = <&serdes 15>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth14>;
+ microchip,sd-sgpio = <309>;
+ managed = "in-band-status";
+ };
+ port15: port@15 {
+ reg = <15>;
+ microchip,bandwidth = <10000>;
+ phys = <&serdes 16>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth15>;
+ microchip,sd-sgpio = <313>;
+ managed = "in-band-status";
+ };
+ port48: port@48 {
+ reg = <48>;
+ microchip,bandwidth = <10000>;
+ phys = <&serdes 17>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth48>;
+ microchip,sd-sgpio = <317>;
+ managed = "in-band-status";
+ };
+ port49: port@49 {
+ reg = <49>;
+ microchip,bandwidth = <10000>;
+ phys = <&serdes 18>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth49>;
+ microchip,sd-sgpio = <321>;
+ managed = "in-band-status";
+ };
+ port50: port@50 {
+ reg = <50>;
+ microchip,bandwidth = <10000>;
+ phys = <&serdes 19>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth50>;
+ microchip,sd-sgpio = <325>;
+ managed = "in-band-status";
+ };
+ port51: port@51 {
+ reg = <51>;
+ microchip,bandwidth = <10000>;
+ phys = <&serdes 20>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth51>;
+ microchip,sd-sgpio = <329>;
+ managed = "in-band-status";
+ };
+ port52: port@52 {
+ reg = <52>;
+ microchip,bandwidth = <10000>;
+ phys = <&serdes 21>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth52>;
+ microchip,sd-sgpio = <333>;
+ managed = "in-band-status";
+ };
+ port53: port@53 {
+ reg = <53>;
+ microchip,bandwidth = <10000>;
+ phys = <&serdes 22>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth53>;
+ microchip,sd-sgpio = <337>;
+ managed = "in-band-status";
+ };
+ port54: port@54 {
+ reg = <54>;
+ microchip,bandwidth = <10000>;
+ phys = <&serdes 23>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth54>;
+ microchip,sd-sgpio = <341>;
+ managed = "in-band-status";
+ };
+ port55: port@55 {
+ reg = <55>;
+ microchip,bandwidth = <10000>;
+ phys = <&serdes 24>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth55>;
+ microchip,sd-sgpio = <345>;
+ managed = "in-band-status";
+ };
+ /* 25G SFPs */
+ port56: port@56 {
+ reg = <56>;
+ microchip,bandwidth = <10000>;
+ phys = <&serdes 25>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth56>;
+ microchip,sd-sgpio = <349>;
+ managed = "in-band-status";
+ };
+ port57: port@57 {
+ reg = <57>;
+ microchip,bandwidth = <10000>;
+ phys = <&serdes 26>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth57>;
+ microchip,sd-sgpio = <353>;
+ managed = "in-band-status";
+ };
+ port58: port@58 {
+ reg = <58>;
+ microchip,bandwidth = <10000>;
+ phys = <&serdes 27>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth58>;
+ microchip,sd-sgpio = <357>;
+ managed = "in-band-status";
+ };
+ port59: port@59 {
+ reg = <59>;
+ microchip,bandwidth = <10000>;
+ phys = <&serdes 28>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth59>;
+ microchip,sd-sgpio = <361>;
+ managed = "in-band-status";
+ };
+ port60: port@60 {
+ reg = <60>;
+ microchip,bandwidth = <10000>;
+ phys = <&serdes 29>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth60>;
+ microchip,sd-sgpio = <365>;
+ managed = "in-band-status";
+ };
+ port61: port@61 {
+ reg = <61>;
+ microchip,bandwidth = <10000>;
+ phys = <&serdes 30>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth61>;
+ microchip,sd-sgpio = <369>;
+ managed = "in-band-status";
+ };
+ port62: port@62 {
+ reg = <62>;
+ microchip,bandwidth = <10000>;
+ phys = <&serdes 31>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth62>;
+ microchip,sd-sgpio = <373>;
+ managed = "in-band-status";
+ };
+ port63: port@63 {
+ reg = <63>;
+ microchip,bandwidth = <10000>;
+ phys = <&serdes 32>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth63>;
+ microchip,sd-sgpio = <377>;
+ managed = "in-band-status";
+ };
+ /* Finally the Management interface */
+ port64: port@64 {
+ reg = <64>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 0>;
+ phy-handle = <&phy64>;
+ phy-mode = "sgmii";
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/microchip/sparx5_pcb135_board.dtsi b/arch/arm64/boot/dts/microchip/sparx5_pcb135_board.dtsi
index e28c6dd16377..ef96e6d8c6b3 100644
--- a/arch/arm64/boot/dts/microchip/sparx5_pcb135_board.dtsi
+++ b/arch/arm64/boot/dts/microchip/sparx5_pcb135_board.dtsi
@@ -7,14 +7,6 @@
#include "sparx5_pcb_common.dtsi"
/{
- aliases {
- i2c0 = &i2c0;
- i2c152 = &i2c152;
- i2c153 = &i2c153;
- i2c154 = &i2c154;
- i2c155 = &i2c155;
- };
-
gpio-restart {
compatible = "gpio-restart";
gpios = <&gpio 37 GPIO_ACTIVE_LOW>;
@@ -97,17 +89,10 @@
&spi0 {
status = "okay";
- spi@0 {
- compatible = "spi-mux";
- mux-controls = <&mux>;
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <0>; /* CS0 */
- spi-flash@9 {
- compatible = "jedec,spi-nor";
- spi-max-frequency = <8000000>;
- reg = <0x9>; /* SPI */
- };
+ spi-flash@0 {
+ compatible = "jedec,spi-nor";
+ spi-max-frequency = <8000000>;
+ reg = <0>;
};
};
@@ -138,6 +123,11 @@
};
};
+&sgpio2 {
+ status = "okay";
+ microchip,sgpio-port-ranges = <0 0>, <16 18>, <28 31>;
+};
+
&axi {
i2c0_imux: i2c0-imux@0 {
compatible = "i2c-mux-pinctrl";
@@ -149,31 +139,614 @@
&i2c0_imux {
pinctrl-names =
- "i2c152", "i2c153", "i2c154", "i2c155",
+ "i2c_sfp1", "i2c_sfp2", "i2c_sfp3", "i2c_sfp4",
"idle";
pinctrl-0 = <&i2cmux_s29>;
pinctrl-1 = <&i2cmux_s30>;
pinctrl-2 = <&i2cmux_s31>;
pinctrl-3 = <&i2cmux_s32>;
pinctrl-4 = <&i2cmux_pins_i>;
- i2c152: i2c_sfp1 {
+ i2c_sfp1: i2c_sfp1 {
reg = <0x0>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c153: i2c_sfp2 {
+ i2c_sfp2: i2c_sfp2 {
reg = <0x1>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c154: i2c_sfp3 {
+ i2c_sfp3: i2c_sfp3 {
reg = <0x2>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c155: i2c_sfp4 {
+ i2c_sfp4: i2c_sfp4 {
reg = <0x3>;
#address-cells = <1>;
#size-cells = <0>;
};
};
+
+&axi {
+ sfp_eth60: sfp-eth60 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp1>;
+ tx-disable-gpios = <&sgpio_out2 28 0 GPIO_ACTIVE_LOW>;
+ rate-select0-gpios = <&sgpio_out2 28 1 GPIO_ACTIVE_HIGH>;
+ los-gpios = <&sgpio_in2 28 0 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 28 1 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 28 2 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth61: sfp-eth61 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp2>;
+ tx-disable-gpios = <&sgpio_out2 29 0 GPIO_ACTIVE_LOW>;
+ rate-select0-gpios = <&sgpio_out2 29 1 GPIO_ACTIVE_HIGH>;
+ los-gpios = <&sgpio_in2 29 0 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 29 1 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 29 2 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth62: sfp-eth62 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp3>;
+ tx-disable-gpios = <&sgpio_out2 30 0 GPIO_ACTIVE_LOW>;
+ rate-select0-gpios = <&sgpio_out2 30 1 GPIO_ACTIVE_HIGH>;
+ los-gpios = <&sgpio_in2 30 0 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 30 1 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 30 2 GPIO_ACTIVE_HIGH>;
+ };
+ sfp_eth63: sfp-eth63 {
+ compatible = "sff,sfp";
+ i2c-bus = <&i2c_sfp4>;
+ tx-disable-gpios = <&sgpio_out2 31 0 GPIO_ACTIVE_LOW>;
+ rate-select0-gpios = <&sgpio_out2 31 1 GPIO_ACTIVE_HIGH>;
+ los-gpios = <&sgpio_in2 31 0 GPIO_ACTIVE_HIGH>;
+ mod-def0-gpios = <&sgpio_in2 31 1 GPIO_ACTIVE_LOW>;
+ tx-fault-gpios = <&sgpio_in2 31 2 GPIO_ACTIVE_HIGH>;
+ };
+};
+
+&mdio0 {
+ status = "ok";
+ phy0: ethernet-phy@0 {
+ reg = <0>;
+ };
+ phy1: ethernet-phy@1 {
+ reg = <1>;
+ };
+ phy2: ethernet-phy@2 {
+ reg = <2>;
+ };
+ phy3: ethernet-phy@3 {
+ reg = <3>;
+ };
+ phy4: ethernet-phy@4 {
+ reg = <4>;
+ };
+ phy5: ethernet-phy@5 {
+ reg = <5>;
+ };
+ phy6: ethernet-phy@6 {
+ reg = <6>;
+ };
+ phy7: ethernet-phy@7 {
+ reg = <7>;
+ };
+ phy8: ethernet-phy@8 {
+ reg = <8>;
+ };
+ phy9: ethernet-phy@9 {
+ reg = <9>;
+ };
+ phy10: ethernet-phy@10 {
+ reg = <10>;
+ };
+ phy11: ethernet-phy@11 {
+ reg = <11>;
+ };
+ phy12: ethernet-phy@12 {
+ reg = <12>;
+ };
+ phy13: ethernet-phy@13 {
+ reg = <13>;
+ };
+ phy14: ethernet-phy@14 {
+ reg = <14>;
+ };
+ phy15: ethernet-phy@15 {
+ reg = <15>;
+ };
+ phy16: ethernet-phy@16 {
+ reg = <16>;
+ };
+ phy17: ethernet-phy@17 {
+ reg = <17>;
+ };
+ phy18: ethernet-phy@18 {
+ reg = <18>;
+ };
+ phy19: ethernet-phy@19 {
+ reg = <19>;
+ };
+ phy20: ethernet-phy@20 {
+ reg = <20>;
+ };
+ phy21: ethernet-phy@21 {
+ reg = <21>;
+ };
+ phy22: ethernet-phy@22 {
+ reg = <22>;
+ };
+ phy23: ethernet-phy@23 {
+ reg = <23>;
+ };
+};
+
+&mdio1 {
+ status = "ok";
+ phy24: ethernet-phy@24 {
+ reg = <0>;
+ };
+ phy25: ethernet-phy@25 {
+ reg = <1>;
+ };
+ phy26: ethernet-phy@26 {
+ reg = <2>;
+ };
+ phy27: ethernet-phy@27 {
+ reg = <3>;
+ };
+ phy28: ethernet-phy@28 {
+ reg = <4>;
+ };
+ phy29: ethernet-phy@29 {
+ reg = <5>;
+ };
+ phy30: ethernet-phy@30 {
+ reg = <6>;
+ };
+ phy31: ethernet-phy@31 {
+ reg = <7>;
+ };
+ phy32: ethernet-phy@32 {
+ reg = <8>;
+ };
+ phy33: ethernet-phy@33 {
+ reg = <9>;
+ };
+ phy34: ethernet-phy@34 {
+ reg = <10>;
+ };
+ phy35: ethernet-phy@35 {
+ reg = <11>;
+ };
+ phy36: ethernet-phy@36 {
+ reg = <12>;
+ };
+ phy37: ethernet-phy@37 {
+ reg = <13>;
+ };
+ phy38: ethernet-phy@38 {
+ reg = <14>;
+ };
+ phy39: ethernet-phy@39 {
+ reg = <15>;
+ };
+ phy40: ethernet-phy@40 {
+ reg = <16>;
+ };
+ phy41: ethernet-phy@41 {
+ reg = <17>;
+ };
+ phy42: ethernet-phy@42 {
+ reg = <18>;
+ };
+ phy43: ethernet-phy@43 {
+ reg = <19>;
+ };
+ phy44: ethernet-phy@44 {
+ reg = <20>;
+ };
+ phy45: ethernet-phy@45 {
+ reg = <21>;
+ };
+ phy46: ethernet-phy@46 {
+ reg = <22>;
+ };
+ phy47: ethernet-phy@47 {
+ reg = <23>;
+ };
+};
+
+&mdio3 {
+ status = "ok";
+ phy64: ethernet-phy@64 {
+ reg = <28>;
+ };
+};
+
+&switch {
+ ethernet-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port0: port@0 {
+ reg = <0>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 13>;
+ phy-handle = <&phy0>;
+ phy-mode = "qsgmii";
+ };
+ port1: port@1 {
+ reg = <1>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 13>;
+ phy-handle = <&phy1>;
+ phy-mode = "qsgmii";
+ };
+ port2: port@2 {
+ reg = <2>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 13>;
+ phy-handle = <&phy2>;
+ phy-mode = "qsgmii";
+ };
+ port3: port@3 {
+ reg = <3>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 13>;
+ phy-handle = <&phy3>;
+ phy-mode = "qsgmii";
+ };
+ port4: port@4 {
+ reg = <4>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 14>;
+ phy-handle = <&phy4>;
+ phy-mode = "qsgmii";
+ };
+ port5: port@5 {
+ reg = <5>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 14>;
+ phy-handle = <&phy5>;
+ phy-mode = "qsgmii";
+ };
+ port6: port@6 {
+ reg = <6>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 14>;
+ phy-handle = <&phy6>;
+ phy-mode = "qsgmii";
+ };
+ port7: port@7 {
+ reg = <7>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 14>;
+ phy-handle = <&phy7>;
+ phy-mode = "qsgmii";
+ };
+ port8: port@8 {
+ reg = <8>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 15>;
+ phy-handle = <&phy8>;
+ phy-mode = "qsgmii";
+ };
+ port9: port@9 {
+ reg = <9>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 15>;
+ phy-handle = <&phy9>;
+ phy-mode = "qsgmii";
+ };
+ port10: port@10 {
+ reg = <10>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 15>;
+ phy-handle = <&phy10>;
+ phy-mode = "qsgmii";
+ };
+ port11: port@11 {
+ reg = <11>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 15>;
+ phy-handle = <&phy11>;
+ phy-mode = "qsgmii";
+ };
+ port12: port@12 {
+ reg = <12>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 16>;
+ phy-handle = <&phy12>;
+ phy-mode = "qsgmii";
+ };
+ port13: port@13 {
+ reg = <13>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 16>;
+ phy-handle = <&phy13>;
+ phy-mode = "qsgmii";
+ };
+ port14: port@14 {
+ reg = <14>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 16>;
+ phy-handle = <&phy14>;
+ phy-mode = "qsgmii";
+ };
+ port15: port@15 {
+ reg = <15>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 16>;
+ phy-handle = <&phy15>;
+ phy-mode = "qsgmii";
+ };
+ port16: port@16 {
+ reg = <16>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 17>;
+ phy-handle = <&phy16>;
+ phy-mode = "qsgmii";
+ };
+ port17: port@17 {
+ reg = <17>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 17>;
+ phy-handle = <&phy17>;
+ phy-mode = "qsgmii";
+ };
+ port18: port@18 {
+ reg = <18>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 17>;
+ phy-handle = <&phy18>;
+ phy-mode = "qsgmii";
+ };
+ port19: port@19 {
+ reg = <19>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 17>;
+ phy-handle = <&phy19>;
+ phy-mode = "qsgmii";
+ };
+ port20: port@20 {
+ reg = <20>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 18>;
+ phy-handle = <&phy20>;
+ phy-mode = "qsgmii";
+ };
+ port21: port@21 {
+ reg = <21>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 18>;
+ phy-handle = <&phy21>;
+ phy-mode = "qsgmii";
+ };
+ port22: port@22 {
+ reg = <22>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 18>;
+ phy-handle = <&phy22>;
+ phy-mode = "qsgmii";
+ };
+ port23: port@23 {
+ reg = <23>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 18>;
+ phy-handle = <&phy23>;
+ phy-mode = "qsgmii";
+ };
+ port24: port@24 {
+ reg = <24>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 19>;
+ phy-handle = <&phy24>;
+ phy-mode = "qsgmii";
+ };
+ port25: port@25 {
+ reg = <25>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 19>;
+ phy-handle = <&phy25>;
+ phy-mode = "qsgmii";
+ };
+ port26: port@26 {
+ reg = <26>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 19>;
+ phy-handle = <&phy26>;
+ phy-mode = "qsgmii";
+ };
+ port27: port@27 {
+ reg = <27>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 19>;
+ phy-handle = <&phy27>;
+ phy-mode = "qsgmii";
+ };
+ port28: port@28 {
+ reg = <28>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 20>;
+ phy-handle = <&phy28>;
+ phy-mode = "qsgmii";
+ };
+ port29: port@29 {
+ reg = <29>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 20>;
+ phy-handle = <&phy29>;
+ phy-mode = "qsgmii";
+ };
+ port30: port@30 {
+ reg = <30>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 20>;
+ phy-handle = <&phy30>;
+ phy-mode = "qsgmii";
+ };
+ port31: port@31 {
+ reg = <31>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 20>;
+ phy-handle = <&phy31>;
+ phy-mode = "qsgmii";
+ };
+ port32: port@32 {
+ reg = <32>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 21>;
+ phy-handle = <&phy32>;
+ phy-mode = "qsgmii";
+ };
+ port33: port@33 {
+ reg = <33>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 21>;
+ phy-handle = <&phy33>;
+ phy-mode = "qsgmii";
+ };
+ port34: port@34 {
+ reg = <34>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 21>;
+ phy-handle = <&phy34>;
+ phy-mode = "qsgmii";
+ };
+ port35: port@35 {
+ reg = <35>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 21>;
+ phy-handle = <&phy35>;
+ phy-mode = "qsgmii";
+ };
+ port36: port@36 {
+ reg = <36>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 22>;
+ phy-handle = <&phy36>;
+ phy-mode = "qsgmii";
+ };
+ port37: port@37 {
+ reg = <37>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 22>;
+ phy-handle = <&phy37>;
+ phy-mode = "qsgmii";
+ };
+ port38: port@38 {
+ reg = <38>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 22>;
+ phy-handle = <&phy38>;
+ phy-mode = "qsgmii";
+ };
+ port39: port@39 {
+ reg = <39>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 22>;
+ phy-handle = <&phy39>;
+ phy-mode = "qsgmii";
+ };
+ port40: port@40 {
+ reg = <40>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 23>;
+ phy-handle = <&phy40>;
+ phy-mode = "qsgmii";
+ };
+ port41: port@41 {
+ reg = <41>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 23>;
+ phy-handle = <&phy41>;
+ phy-mode = "qsgmii";
+ };
+ port42: port@42 {
+ reg = <42>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 23>;
+ phy-handle = <&phy42>;
+ phy-mode = "qsgmii";
+ };
+ port43: port@43 {
+ reg = <43>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 23>;
+ phy-handle = <&phy43>;
+ phy-mode = "qsgmii";
+ };
+ port44: port@44 {
+ reg = <44>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 24>;
+ phy-handle = <&phy44>;
+ phy-mode = "qsgmii";
+ };
+ port45: port@45 {
+ reg = <45>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 24>;
+ phy-handle = <&phy45>;
+ phy-mode = "qsgmii";
+ };
+ port46: port@46 {
+ reg = <46>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 24>;
+ phy-handle = <&phy46>;
+ phy-mode = "qsgmii";
+ };
+ port47: port@47 {
+ reg = <47>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 24>;
+ phy-handle = <&phy47>;
+ phy-mode = "qsgmii";
+ };
+ /* Then the 25G interfaces */
+ port60: port@60 {
+ reg = <60>;
+ microchip,bandwidth = <25000>;
+ phys = <&serdes 29>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth60>;
+ managed = "in-band-status";
+ };
+ port61: port@61 {
+ reg = <61>;
+ microchip,bandwidth = <25000>;
+ phys = <&serdes 30>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth61>;
+ managed = "in-band-status";
+ };
+ port62: port@62 {
+ reg = <62>;
+ microchip,bandwidth = <25000>;
+ phys = <&serdes 31>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth62>;
+ managed = "in-band-status";
+ };
+ port63: port@63 {
+ reg = <63>;
+ microchip,bandwidth = <25000>;
+ phys = <&serdes 32>;
+ phy-mode = "10gbase-r";
+ sfp = <&sfp_eth63>;
+ managed = "in-band-status";
+ };
+ /* Finally the Management interface */
+ port64: port@64 {
+ reg = <64>;
+ microchip,bandwidth = <1000>;
+ phys = <&serdes 0>;
+ phy-handle = <&phy64>;
+ phy-mode = "sgmii";
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3308.dtsi b/arch/arm64/boot/dts/rockchip/rk3308.dtsi
index 0c5fa9801e6f..b815ce73e5c6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3308.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3308.dtsi
@@ -637,6 +637,28 @@
status = "disabled";
};
+ gmac: ethernet@ff4e0000 {
+ compatible = "rockchip,rk3308-gmac";
+ reg = <0x0 0xff4e0000 0x0 0x10000>;
+ interrupts = <GIC_SPI 64 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq";
+ clocks = <&cru SCLK_MAC>, <&cru SCLK_MAC_RX_TX>,
+ <&cru SCLK_MAC_RX_TX>, <&cru SCLK_MAC_REF>,
+ <&cru SCLK_MAC>, <&cru ACLK_MAC>,
+ <&cru PCLK_MAC>, <&cru SCLK_MAC_RMII>;
+ clock-names = "stmmaceth", "mac_clk_rx",
+ "mac_clk_tx", "clk_mac_ref",
+ "clk_mac_refout", "aclk_mac",
+ "pclk_mac", "clk_mac_speed";
+ phy-mode = "rmii";
+ pinctrl-names = "default";
+ pinctrl-0 = <&rmii_pins &mac_refclk_12ma>;
+ resets = <&cru SRST_MAC_A>;
+ reset-names = "stmmaceth";
+ rockchip,grf = <&grf>;
+ status = "disabled";
+ };
+
cru: clock-controller@ff500000 {
compatible = "rockchip,rk3308-cru";
reg = <0x0 0xff500000 0x0 0x1000>;
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index d0901e610df3..09a805cc32d7 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -68,19 +68,13 @@ CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS
$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
$(call if_changed_rule,cc_o_c)
-ifdef REGENERATE_ARM64_CRYPTO
quiet_cmd_perlasm = PERLASM $@
cmd_perlasm = $(PERL) $(<) void $(@)
-$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv8.pl
+$(obj)/%-core.S: $(src)/%-armv8.pl
$(call cmd,perlasm)
-$(src)/sha256-core.S_shipped: $(src)/sha512-armv8.pl
+$(obj)/sha256-core.S: $(src)/sha512-armv8.pl
$(call cmd,perlasm)
-$(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl
- $(call cmd,perlasm)
-
-endif
-
clean-files += poly1305-core.S sha256-core.S sha512-core.S
diff --git a/arch/arm64/crypto/poly1305-core.S_shipped b/arch/arm64/crypto/poly1305-core.S_shipped
deleted file mode 100644
index fb2822abf63a..000000000000
--- a/arch/arm64/crypto/poly1305-core.S_shipped
+++ /dev/null
@@ -1,835 +0,0 @@
-#ifndef __KERNEL__
-# include "arm_arch.h"
-.extern OPENSSL_armcap_P
-#endif
-
-.text
-
-// forward "declarations" are required for Apple
-.globl poly1305_blocks
-.globl poly1305_emit
-
-.globl poly1305_init
-.type poly1305_init,%function
-.align 5
-poly1305_init:
- cmp x1,xzr
- stp xzr,xzr,[x0] // zero hash value
- stp xzr,xzr,[x0,#16] // [along with is_base2_26]
-
- csel x0,xzr,x0,eq
- b.eq .Lno_key
-
-#ifndef __KERNEL__
- adrp x17,OPENSSL_armcap_P
- ldr w17,[x17,#:lo12:OPENSSL_armcap_P]
-#endif
-
- ldp x7,x8,[x1] // load key
- mov x9,#0xfffffffc0fffffff
- movk x9,#0x0fff,lsl#48
-#ifdef __AARCH64EB__
- rev x7,x7 // flip bytes
- rev x8,x8
-#endif
- and x7,x7,x9 // &=0ffffffc0fffffff
- and x9,x9,#-4
- and x8,x8,x9 // &=0ffffffc0ffffffc
- mov w9,#-1
- stp x7,x8,[x0,#32] // save key value
- str w9,[x0,#48] // impossible key power value
-
-#ifndef __KERNEL__
- tst w17,#ARMV7_NEON
-
- adr x12,.Lpoly1305_blocks
- adr x7,.Lpoly1305_blocks_neon
- adr x13,.Lpoly1305_emit
-
- csel x12,x12,x7,eq
-
-# ifdef __ILP32__
- stp w12,w13,[x2]
-# else
- stp x12,x13,[x2]
-# endif
-#endif
- mov x0,#1
-.Lno_key:
- ret
-.size poly1305_init,.-poly1305_init
-
-.type poly1305_blocks,%function
-.align 5
-poly1305_blocks:
-.Lpoly1305_blocks:
- ands x2,x2,#-16
- b.eq .Lno_data
-
- ldp x4,x5,[x0] // load hash value
- ldp x6,x17,[x0,#16] // [along with is_base2_26]
- ldp x7,x8,[x0,#32] // load key value
-
-#ifdef __AARCH64EB__
- lsr x12,x4,#32
- mov w13,w4
- lsr x14,x5,#32
- mov w15,w5
- lsr x16,x6,#32
-#else
- mov w12,w4
- lsr x13,x4,#32
- mov w14,w5
- lsr x15,x5,#32
- mov w16,w6
-#endif
-
- add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64
- lsr x13,x14,#12
- adds x12,x12,x14,lsl#52
- add x13,x13,x15,lsl#14
- adc x13,x13,xzr
- lsr x14,x16,#24
- adds x13,x13,x16,lsl#40
- adc x14,x14,xzr
-
- cmp x17,#0 // is_base2_26?
- add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
- csel x4,x4,x12,eq // choose between radixes
- csel x5,x5,x13,eq
- csel x6,x6,x14,eq
-
-.Loop:
- ldp x10,x11,[x1],#16 // load input
- sub x2,x2,#16
-#ifdef __AARCH64EB__
- rev x10,x10
- rev x11,x11
-#endif
- adds x4,x4,x10 // accumulate input
- adcs x5,x5,x11
-
- mul x12,x4,x7 // h0*r0
- adc x6,x6,x3
- umulh x13,x4,x7
-
- mul x10,x5,x9 // h1*5*r1
- umulh x11,x5,x9
-
- adds x12,x12,x10
- mul x10,x4,x8 // h0*r1
- adc x13,x13,x11
- umulh x14,x4,x8
-
- adds x13,x13,x10
- mul x10,x5,x7 // h1*r0
- adc x14,x14,xzr
- umulh x11,x5,x7
-
- adds x13,x13,x10
- mul x10,x6,x9 // h2*5*r1
- adc x14,x14,x11
- mul x11,x6,x7 // h2*r0
-
- adds x13,x13,x10
- adc x14,x14,x11
-
- and x10,x14,#-4 // final reduction
- and x6,x14,#3
- add x10,x10,x14,lsr#2
- adds x4,x12,x10
- adcs x5,x13,xzr
- adc x6,x6,xzr
-
- cbnz x2,.Loop
-
- stp x4,x5,[x0] // store hash value
- stp x6,xzr,[x0,#16] // [and clear is_base2_26]
-
-.Lno_data:
- ret
-.size poly1305_blocks,.-poly1305_blocks
-
-.type poly1305_emit,%function
-.align 5
-poly1305_emit:
-.Lpoly1305_emit:
- ldp x4,x5,[x0] // load hash base 2^64
- ldp x6,x7,[x0,#16] // [along with is_base2_26]
- ldp x10,x11,[x2] // load nonce
-
-#ifdef __AARCH64EB__
- lsr x12,x4,#32
- mov w13,w4
- lsr x14,x5,#32
- mov w15,w5
- lsr x16,x6,#32
-#else
- mov w12,w4
- lsr x13,x4,#32
- mov w14,w5
- lsr x15,x5,#32
- mov w16,w6
-#endif
-
- add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64
- lsr x13,x14,#12
- adds x12,x12,x14,lsl#52
- add x13,x13,x15,lsl#14
- adc x13,x13,xzr
- lsr x14,x16,#24
- adds x13,x13,x16,lsl#40
- adc x14,x14,xzr
-
- cmp x7,#0 // is_base2_26?
- csel x4,x4,x12,eq // choose between radixes
- csel x5,x5,x13,eq
- csel x6,x6,x14,eq
-
- adds x12,x4,#5 // compare to modulus
- adcs x13,x5,xzr
- adc x14,x6,xzr
-
- tst x14,#-4 // see if it's carried/borrowed
-
- csel x4,x4,x12,eq
- csel x5,x5,x13,eq
-
-#ifdef __AARCH64EB__
- ror x10,x10,#32 // flip nonce words
- ror x11,x11,#32
-#endif
- adds x4,x4,x10 // accumulate nonce
- adc x5,x5,x11
-#ifdef __AARCH64EB__
- rev x4,x4 // flip output bytes
- rev x5,x5
-#endif
- stp x4,x5,[x1] // write result
-
- ret
-.size poly1305_emit,.-poly1305_emit
-.type poly1305_mult,%function
-.align 5
-poly1305_mult:
- mul x12,x4,x7 // h0*r0
- umulh x13,x4,x7
-
- mul x10,x5,x9 // h1*5*r1
- umulh x11,x5,x9
-
- adds x12,x12,x10
- mul x10,x4,x8 // h0*r1
- adc x13,x13,x11
- umulh x14,x4,x8
-
- adds x13,x13,x10
- mul x10,x5,x7 // h1*r0
- adc x14,x14,xzr
- umulh x11,x5,x7
-
- adds x13,x13,x10
- mul x10,x6,x9 // h2*5*r1
- adc x14,x14,x11
- mul x11,x6,x7 // h2*r0
-
- adds x13,x13,x10
- adc x14,x14,x11
-
- and x10,x14,#-4 // final reduction
- and x6,x14,#3
- add x10,x10,x14,lsr#2
- adds x4,x12,x10
- adcs x5,x13,xzr
- adc x6,x6,xzr
-
- ret
-.size poly1305_mult,.-poly1305_mult
-
-.type poly1305_splat,%function
-.align 4
-poly1305_splat:
- and x12,x4,#0x03ffffff // base 2^64 -> base 2^26
- ubfx x13,x4,#26,#26
- extr x14,x5,x4,#52
- and x14,x14,#0x03ffffff
- ubfx x15,x5,#14,#26
- extr x16,x6,x5,#40
-
- str w12,[x0,#16*0] // r0
- add w12,w13,w13,lsl#2 // r1*5
- str w13,[x0,#16*1] // r1
- add w13,w14,w14,lsl#2 // r2*5
- str w12,[x0,#16*2] // s1
- str w14,[x0,#16*3] // r2
- add w14,w15,w15,lsl#2 // r3*5
- str w13,[x0,#16*4] // s2
- str w15,[x0,#16*5] // r3
- add w15,w16,w16,lsl#2 // r4*5
- str w14,[x0,#16*6] // s3
- str w16,[x0,#16*7] // r4
- str w15,[x0,#16*8] // s4
-
- ret
-.size poly1305_splat,.-poly1305_splat
-
-#ifdef __KERNEL__
-.globl poly1305_blocks_neon
-#endif
-.type poly1305_blocks_neon,%function
-.align 5
-poly1305_blocks_neon:
-.Lpoly1305_blocks_neon:
- ldr x17,[x0,#24]
- cmp x2,#128
- b.lo .Lpoly1305_blocks
-
- .inst 0xd503233f // paciasp
- stp x29,x30,[sp,#-80]!
- add x29,sp,#0
-
- stp d8,d9,[sp,#16] // meet ABI requirements
- stp d10,d11,[sp,#32]
- stp d12,d13,[sp,#48]
- stp d14,d15,[sp,#64]
-
- cbz x17,.Lbase2_64_neon
-
- ldp w10,w11,[x0] // load hash value base 2^26
- ldp w12,w13,[x0,#8]
- ldr w14,[x0,#16]
-
- tst x2,#31
- b.eq .Leven_neon
-
- ldp x7,x8,[x0,#32] // load key value
-
- add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64
- lsr x5,x12,#12
- adds x4,x4,x12,lsl#52
- add x5,x5,x13,lsl#14
- adc x5,x5,xzr
- lsr x6,x14,#24
- adds x5,x5,x14,lsl#40
- adc x14,x6,xzr // can be partially reduced...
-
- ldp x12,x13,[x1],#16 // load input
- sub x2,x2,#16
- add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
-
-#ifdef __AARCH64EB__
- rev x12,x12
- rev x13,x13
-#endif
- adds x4,x4,x12 // accumulate input
- adcs x5,x5,x13
- adc x6,x6,x3
-
- bl poly1305_mult
-
- and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
- ubfx x11,x4,#26,#26
- extr x12,x5,x4,#52
- and x12,x12,#0x03ffffff
- ubfx x13,x5,#14,#26
- extr x14,x6,x5,#40
-
- b .Leven_neon
-
-.align 4
-.Lbase2_64_neon:
- ldp x7,x8,[x0,#32] // load key value
-
- ldp x4,x5,[x0] // load hash value base 2^64
- ldr x6,[x0,#16]
-
- tst x2,#31
- b.eq .Linit_neon
-
- ldp x12,x13,[x1],#16 // load input
- sub x2,x2,#16
- add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
-#ifdef __AARCH64EB__
- rev x12,x12
- rev x13,x13
-#endif
- adds x4,x4,x12 // accumulate input
- adcs x5,x5,x13
- adc x6,x6,x3
-
- bl poly1305_mult
-
-.Linit_neon:
- ldr w17,[x0,#48] // first table element
- and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
- ubfx x11,x4,#26,#26
- extr x12,x5,x4,#52
- and x12,x12,#0x03ffffff
- ubfx x13,x5,#14,#26
- extr x14,x6,x5,#40
-
- cmp w17,#-1 // is value impossible?
- b.ne .Leven_neon
-
- fmov d24,x10
- fmov d25,x11
- fmov d26,x12
- fmov d27,x13
- fmov d28,x14
-
- ////////////////////////////////// initialize r^n table
- mov x4,x7 // r^1
- add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
- mov x5,x8
- mov x6,xzr
- add x0,x0,#48+12
- bl poly1305_splat
-
- bl poly1305_mult // r^2
- sub x0,x0,#4
- bl poly1305_splat
-
- bl poly1305_mult // r^3
- sub x0,x0,#4
- bl poly1305_splat
-
- bl poly1305_mult // r^4
- sub x0,x0,#4
- bl poly1305_splat
- sub x0,x0,#48 // restore original x0
- b .Ldo_neon
-
-.align 4
-.Leven_neon:
- fmov d24,x10
- fmov d25,x11
- fmov d26,x12
- fmov d27,x13
- fmov d28,x14
-
-.Ldo_neon:
- ldp x8,x12,[x1,#32] // inp[2:3]
- subs x2,x2,#64
- ldp x9,x13,[x1,#48]
- add x16,x1,#96
- adr x17,.Lzeros
-
- lsl x3,x3,#24
- add x15,x0,#48
-
-#ifdef __AARCH64EB__
- rev x8,x8
- rev x12,x12
- rev x9,x9
- rev x13,x13
-#endif
- and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
- and x5,x9,#0x03ffffff
- ubfx x6,x8,#26,#26
- ubfx x7,x9,#26,#26
- add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
- extr x8,x12,x8,#52
- extr x9,x13,x9,#52
- add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
- fmov d14,x4
- and x8,x8,#0x03ffffff
- and x9,x9,#0x03ffffff
- ubfx x10,x12,#14,#26
- ubfx x11,x13,#14,#26
- add x12,x3,x12,lsr#40
- add x13,x3,x13,lsr#40
- add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
- fmov d15,x6
- add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
- add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
- fmov d16,x8
- fmov d17,x10
- fmov d18,x12
-
- ldp x8,x12,[x1],#16 // inp[0:1]
- ldp x9,x13,[x1],#48
-
- ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x15],#64
- ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x15],#64
- ld1 {v8.4s},[x15]
-
-#ifdef __AARCH64EB__
- rev x8,x8
- rev x12,x12
- rev x9,x9
- rev x13,x13
-#endif
- and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
- and x5,x9,#0x03ffffff
- ubfx x6,x8,#26,#26
- ubfx x7,x9,#26,#26
- add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
- extr x8,x12,x8,#52
- extr x9,x13,x9,#52
- add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
- fmov d9,x4
- and x8,x8,#0x03ffffff
- and x9,x9,#0x03ffffff
- ubfx x10,x12,#14,#26
- ubfx x11,x13,#14,#26
- add x12,x3,x12,lsr#40
- add x13,x3,x13,lsr#40
- add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
- fmov d10,x6
- add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
- add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
- movi v31.2d,#-1
- fmov d11,x8
- fmov d12,x10
- fmov d13,x12
- ushr v31.2d,v31.2d,#38
-
- b.ls .Lskip_loop
-
-.align 4
-.Loop_neon:
- ////////////////////////////////////////////////////////////////
- // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
- // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
- // ___________________/
- // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
- // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
- // ___________________/ ____________________/
- //
- // Note that we start with inp[2:3]*r^2. This is because it
- // doesn't depend on reduction in previous iteration.
- ////////////////////////////////////////////////////////////////
- // d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0
- // d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4
- // d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3
- // d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2
- // d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
-
- subs x2,x2,#64
- umull v23.2d,v14.2s,v7.s[2]
- csel x16,x17,x16,lo
- umull v22.2d,v14.2s,v5.s[2]
- umull v21.2d,v14.2s,v3.s[2]
- ldp x8,x12,[x16],#16 // inp[2:3] (or zero)
- umull v20.2d,v14.2s,v1.s[2]
- ldp x9,x13,[x16],#48
- umull v19.2d,v14.2s,v0.s[2]
-#ifdef __AARCH64EB__
- rev x8,x8
- rev x12,x12
- rev x9,x9
- rev x13,x13
-#endif
-
- umlal v23.2d,v15.2s,v5.s[2]
- and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
- umlal v22.2d,v15.2s,v3.s[2]
- and x5,x9,#0x03ffffff
- umlal v21.2d,v15.2s,v1.s[2]
- ubfx x6,x8,#26,#26
- umlal v20.2d,v15.2s,v0.s[2]
- ubfx x7,x9,#26,#26
- umlal v19.2d,v15.2s,v8.s[2]
- add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
-
- umlal v23.2d,v16.2s,v3.s[2]
- extr x8,x12,x8,#52
- umlal v22.2d,v16.2s,v1.s[2]
- extr x9,x13,x9,#52
- umlal v21.2d,v16.2s,v0.s[2]
- add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
- umlal v20.2d,v16.2s,v8.s[2]
- fmov d14,x4
- umlal v19.2d,v16.2s,v6.s[2]
- and x8,x8,#0x03ffffff
-
- umlal v23.2d,v17.2s,v1.s[2]
- and x9,x9,#0x03ffffff
- umlal v22.2d,v17.2s,v0.s[2]
- ubfx x10,x12,#14,#26
- umlal v21.2d,v17.2s,v8.s[2]
- ubfx x11,x13,#14,#26
- umlal v20.2d,v17.2s,v6.s[2]
- add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
- umlal v19.2d,v17.2s,v4.s[2]
- fmov d15,x6
-
- add v11.2s,v11.2s,v26.2s
- add x12,x3,x12,lsr#40
- umlal v23.2d,v18.2s,v0.s[2]
- add x13,x3,x13,lsr#40
- umlal v22.2d,v18.2s,v8.s[2]
- add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
- umlal v21.2d,v18.2s,v6.s[2]
- add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
- umlal v20.2d,v18.2s,v4.s[2]
- fmov d16,x8
- umlal v19.2d,v18.2s,v2.s[2]
- fmov d17,x10
-
- ////////////////////////////////////////////////////////////////
- // (hash+inp[0:1])*r^4 and accumulate
-
- add v9.2s,v9.2s,v24.2s
- fmov d18,x12
- umlal v22.2d,v11.2s,v1.s[0]
- ldp x8,x12,[x1],#16 // inp[0:1]
- umlal v19.2d,v11.2s,v6.s[0]
- ldp x9,x13,[x1],#48
- umlal v23.2d,v11.2s,v3.s[0]
- umlal v20.2d,v11.2s,v8.s[0]
- umlal v21.2d,v11.2s,v0.s[0]
-#ifdef __AARCH64EB__
- rev x8,x8
- rev x12,x12
- rev x9,x9
- rev x13,x13
-#endif
-
- add v10.2s,v10.2s,v25.2s
- umlal v22.2d,v9.2s,v5.s[0]
- umlal v23.2d,v9.2s,v7.s[0]
- and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
- umlal v21.2d,v9.2s,v3.s[0]
- and x5,x9,#0x03ffffff
- umlal v19.2d,v9.2s,v0.s[0]
- ubfx x6,x8,#26,#26
- umlal v20.2d,v9.2s,v1.s[0]
- ubfx x7,x9,#26,#26
-
- add v12.2s,v12.2s,v27.2s
- add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
- umlal v22.2d,v10.2s,v3.s[0]
- extr x8,x12,x8,#52
- umlal v23.2d,v10.2s,v5.s[0]
- extr x9,x13,x9,#52
- umlal v19.2d,v10.2s,v8.s[0]
- add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
- umlal v21.2d,v10.2s,v1.s[0]
- fmov d9,x4
- umlal v20.2d,v10.2s,v0.s[0]
- and x8,x8,#0x03ffffff
-
- add v13.2s,v13.2s,v28.2s
- and x9,x9,#0x03ffffff
- umlal v22.2d,v12.2s,v0.s[0]
- ubfx x10,x12,#14,#26
- umlal v19.2d,v12.2s,v4.s[0]
- ubfx x11,x13,#14,#26
- umlal v23.2d,v12.2s,v1.s[0]
- add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
- umlal v20.2d,v12.2s,v6.s[0]
- fmov d10,x6
- umlal v21.2d,v12.2s,v8.s[0]
- add x12,x3,x12,lsr#40
-
- umlal v22.2d,v13.2s,v8.s[0]
- add x13,x3,x13,lsr#40
- umlal v19.2d,v13.2s,v2.s[0]
- add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
- umlal v23.2d,v13.2s,v0.s[0]
- add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
- umlal v20.2d,v13.2s,v4.s[0]
- fmov d11,x8
- umlal v21.2d,v13.2s,v6.s[0]
- fmov d12,x10
- fmov d13,x12
-
- /////////////////////////////////////////////////////////////////
- // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
- // and P. Schwabe
- //
- // [see discussion in poly1305-armv4 module]
-
- ushr v29.2d,v22.2d,#26
- xtn v27.2s,v22.2d
- ushr v30.2d,v19.2d,#26
- and v19.16b,v19.16b,v31.16b
- add v23.2d,v23.2d,v29.2d // h3 -> h4
- bic v27.2s,#0xfc,lsl#24 // &=0x03ffffff
- add v20.2d,v20.2d,v30.2d // h0 -> h1
-
- ushr v29.2d,v23.2d,#26
- xtn v28.2s,v23.2d
- ushr v30.2d,v20.2d,#26
- xtn v25.2s,v20.2d
- bic v28.2s,#0xfc,lsl#24
- add v21.2d,v21.2d,v30.2d // h1 -> h2
-
- add v19.2d,v19.2d,v29.2d
- shl v29.2d,v29.2d,#2
- shrn v30.2s,v21.2d,#26
- xtn v26.2s,v21.2d
- add v19.2d,v19.2d,v29.2d // h4 -> h0
- bic v25.2s,#0xfc,lsl#24
- add v27.2s,v27.2s,v30.2s // h2 -> h3
- bic v26.2s,#0xfc,lsl#24
-
- shrn v29.2s,v19.2d,#26
- xtn v24.2s,v19.2d
- ushr v30.2s,v27.2s,#26
- bic v27.2s,#0xfc,lsl#24
- bic v24.2s,#0xfc,lsl#24
- add v25.2s,v25.2s,v29.2s // h0 -> h1
- add v28.2s,v28.2s,v30.2s // h3 -> h4
-
- b.hi .Loop_neon
-
-.Lskip_loop:
- dup v16.2d,v16.d[0]
- add v11.2s,v11.2s,v26.2s
-
- ////////////////////////////////////////////////////////////////
- // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
-
- adds x2,x2,#32
- b.ne .Long_tail
-
- dup v16.2d,v11.d[0]
- add v14.2s,v9.2s,v24.2s
- add v17.2s,v12.2s,v27.2s
- add v15.2s,v10.2s,v25.2s
- add v18.2s,v13.2s,v28.2s
-
-.Long_tail:
- dup v14.2d,v14.d[0]
- umull2 v19.2d,v16.4s,v6.4s
- umull2 v22.2d,v16.4s,v1.4s
- umull2 v23.2d,v16.4s,v3.4s
- umull2 v21.2d,v16.4s,v0.4s
- umull2 v20.2d,v16.4s,v8.4s
-
- dup v15.2d,v15.d[0]
- umlal2 v19.2d,v14.4s,v0.4s
- umlal2 v21.2d,v14.4s,v3.4s
- umlal2 v22.2d,v14.4s,v5.4s
- umlal2 v23.2d,v14.4s,v7.4s
- umlal2 v20.2d,v14.4s,v1.4s
-
- dup v17.2d,v17.d[0]
- umlal2 v19.2d,v15.4s,v8.4s
- umlal2 v22.2d,v15.4s,v3.4s
- umlal2 v21.2d,v15.4s,v1.4s
- umlal2 v23.2d,v15.4s,v5.4s
- umlal2 v20.2d,v15.4s,v0.4s
-
- dup v18.2d,v18.d[0]
- umlal2 v22.2d,v17.4s,v0.4s
- umlal2 v23.2d,v17.4s,v1.4s
- umlal2 v19.2d,v17.4s,v4.4s
- umlal2 v20.2d,v17.4s,v6.4s
- umlal2 v21.2d,v17.4s,v8.4s
-
- umlal2 v22.2d,v18.4s,v8.4s
- umlal2 v19.2d,v18.4s,v2.4s
- umlal2 v23.2d,v18.4s,v0.4s
- umlal2 v20.2d,v18.4s,v4.4s
- umlal2 v21.2d,v18.4s,v6.4s
-
- b.eq .Lshort_tail
-
- ////////////////////////////////////////////////////////////////
- // (hash+inp[0:1])*r^4:r^3 and accumulate
-
- add v9.2s,v9.2s,v24.2s
- umlal v22.2d,v11.2s,v1.2s
- umlal v19.2d,v11.2s,v6.2s
- umlal v23.2d,v11.2s,v3.2s
- umlal v20.2d,v11.2s,v8.2s
- umlal v21.2d,v11.2s,v0.2s
-
- add v10.2s,v10.2s,v25.2s
- umlal v22.2d,v9.2s,v5.2s
- umlal v19.2d,v9.2s,v0.2s
- umlal v23.2d,v9.2s,v7.2s
- umlal v20.2d,v9.2s,v1.2s
- umlal v21.2d,v9.2s,v3.2s
-
- add v12.2s,v12.2s,v27.2s
- umlal v22.2d,v10.2s,v3.2s
- umlal v19.2d,v10.2s,v8.2s
- umlal v23.2d,v10.2s,v5.2s
- umlal v20.2d,v10.2s,v0.2s
- umlal v21.2d,v10.2s,v1.2s
-
- add v13.2s,v13.2s,v28.2s
- umlal v22.2d,v12.2s,v0.2s
- umlal v19.2d,v12.2s,v4.2s
- umlal v23.2d,v12.2s,v1.2s
- umlal v20.2d,v12.2s,v6.2s
- umlal v21.2d,v12.2s,v8.2s
-
- umlal v22.2d,v13.2s,v8.2s
- umlal v19.2d,v13.2s,v2.2s
- umlal v23.2d,v13.2s,v0.2s
- umlal v20.2d,v13.2s,v4.2s
- umlal v21.2d,v13.2s,v6.2s
-
-.Lshort_tail:
- ////////////////////////////////////////////////////////////////
- // horizontal add
-
- addp v22.2d,v22.2d,v22.2d
- ldp d8,d9,[sp,#16] // meet ABI requirements
- addp v19.2d,v19.2d,v19.2d
- ldp d10,d11,[sp,#32]
- addp v23.2d,v23.2d,v23.2d
- ldp d12,d13,[sp,#48]
- addp v20.2d,v20.2d,v20.2d
- ldp d14,d15,[sp,#64]
- addp v21.2d,v21.2d,v21.2d
- ldr x30,[sp,#8]
-
- ////////////////////////////////////////////////////////////////
- // lazy reduction, but without narrowing
-
- ushr v29.2d,v22.2d,#26
- and v22.16b,v22.16b,v31.16b
- ushr v30.2d,v19.2d,#26
- and v19.16b,v19.16b,v31.16b
-
- add v23.2d,v23.2d,v29.2d // h3 -> h4
- add v20.2d,v20.2d,v30.2d // h0 -> h1
-
- ushr v29.2d,v23.2d,#26
- and v23.16b,v23.16b,v31.16b
- ushr v30.2d,v20.2d,#26
- and v20.16b,v20.16b,v31.16b
- add v21.2d,v21.2d,v30.2d // h1 -> h2
-
- add v19.2d,v19.2d,v29.2d
- shl v29.2d,v29.2d,#2
- ushr v30.2d,v21.2d,#26
- and v21.16b,v21.16b,v31.16b
- add v19.2d,v19.2d,v29.2d // h4 -> h0
- add v22.2d,v22.2d,v30.2d // h2 -> h3
-
- ushr v29.2d,v19.2d,#26
- and v19.16b,v19.16b,v31.16b
- ushr v30.2d,v22.2d,#26
- and v22.16b,v22.16b,v31.16b
- add v20.2d,v20.2d,v29.2d // h0 -> h1
- add v23.2d,v23.2d,v30.2d // h3 -> h4
-
- ////////////////////////////////////////////////////////////////
- // write the result, can be partially reduced
-
- st4 {v19.s,v20.s,v21.s,v22.s}[0],[x0],#16
- mov x4,#1
- st1 {v23.s}[0],[x0]
- str x4,[x0,#8] // set is_base2_26
-
- ldr x29,[sp],#80
- .inst 0xd50323bf // autiasp
- ret
-.size poly1305_blocks_neon,.-poly1305_blocks_neon
-
-.align 5
-.Lzeros:
-.long 0,0,0,0,0,0,0,0
-.asciz "Poly1305 for ARMv8, CRYPTOGAMS by @dot-asm"
-.align 2
-#if !defined(__KERNEL__) && !defined(_WIN64)
-.comm OPENSSL_armcap_P,4,4
-.hidden OPENSSL_armcap_P
-#endif
diff --git a/arch/arm64/crypto/sha256-core.S_shipped b/arch/arm64/crypto/sha256-core.S_shipped
deleted file mode 100644
index 7c7ce2e3bad6..000000000000
--- a/arch/arm64/crypto/sha256-core.S_shipped
+++ /dev/null
@@ -1,2069 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-// This code is taken from the OpenSSL project but the author (Andy Polyakov)
-// has relicensed it under the GPLv2. Therefore this program is free software;
-// you can redistribute it and/or modify it under the terms of the GNU General
-// Public License version 2 as published by the Free Software Foundation.
-//
-// The original headers, including the original license headers, are
-// included below for completeness.
-
-// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
-//
-// Licensed under the OpenSSL license (the "License"). You may not use
-// this file except in compliance with the License. You can obtain a copy
-// in the file LICENSE in the source distribution or at
-// https://www.openssl.org/source/license.html
-
-// ====================================================================
-// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-// project. The module is, however, dual licensed under OpenSSL and
-// CRYPTOGAMS licenses depending on where you obtain it. For further
-// details see http://www.openssl.org/~appro/cryptogams/.
-// ====================================================================
-//
-// SHA256/512 for ARMv8.
-//
-// Performance in cycles per processed byte and improvement coefficient
-// over code generated with "default" compiler:
-//
-// SHA256-hw SHA256(*) SHA512
-// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**))
-// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***))
-// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***))
-// Denver 2.01 10.5 (+26%) 6.70 (+8%)
-// X-Gene 20.0 (+100%) 12.8 (+300%(***))
-// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
-//
-// (*) Software SHA256 results are of lesser relevance, presented
-// mostly for informational purposes.
-// (**) The result is a trade-off: it's possible to improve it by
-// 10% (or by 1 cycle per round), but at the cost of 20% loss
-// on Cortex-A53 (or by 4 cycles per round).
-// (***) Super-impressive coefficients over gcc-generated code are
-// indication of some compiler "pathology", most notably code
-// generated with -mgeneral-regs-only is significanty faster
-// and the gap is only 40-90%.
-//
-// October 2016.
-//
-// Originally it was reckoned that it makes no sense to implement NEON
-// version of SHA256 for 64-bit processors. This is because performance
-// improvement on most wide-spread Cortex-A5x processors was observed
-// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
-// observed that 32-bit NEON SHA256 performs significantly better than
-// 64-bit scalar version on *some* of the more recent processors. As
-// result 64-bit NEON version of SHA256 was added to provide best
-// all-round performance. For example it executes ~30% faster on X-Gene
-// and Mongoose. [For reference, NEON version of SHA512 is bound to
-// deliver much less improvement, likely *negative* on Cortex-A5x.
-// Which is why NEON support is limited to SHA256.]
-
-#ifndef __KERNEL__
-# include "arm_arch.h"
-#endif
-
-.text
-
-.extern OPENSSL_armcap_P
-.globl sha256_block_data_order
-.type sha256_block_data_order,%function
-.align 6
-sha256_block_data_order:
-#ifndef __KERNEL__
-# ifdef __ILP32__
- ldrsw x16,.LOPENSSL_armcap_P
-# else
- ldr x16,.LOPENSSL_armcap_P
-# endif
- adr x17,.LOPENSSL_armcap_P
- add x16,x16,x17
- ldr w16,[x16]
- tst w16,#ARMV8_SHA256
- b.ne .Lv8_entry
- tst w16,#ARMV7_NEON
- b.ne .Lneon_entry
-#endif
- stp x29,x30,[sp,#-128]!
- add x29,sp,#0
-
- stp x19,x20,[sp,#16]
- stp x21,x22,[sp,#32]
- stp x23,x24,[sp,#48]
- stp x25,x26,[sp,#64]
- stp x27,x28,[sp,#80]
- sub sp,sp,#4*4
-
- ldp w20,w21,[x0] // load context
- ldp w22,w23,[x0,#2*4]
- ldp w24,w25,[x0,#4*4]
- add x2,x1,x2,lsl#6 // end of input
- ldp w26,w27,[x0,#6*4]
- adr x30,.LK256
- stp x0,x2,[x29,#96]
-
-.Loop:
- ldp w3,w4,[x1],#2*4
- ldr w19,[x30],#4 // *K++
- eor w28,w21,w22 // magic seed
- str x1,[x29,#112]
-#ifndef __AARCH64EB__
- rev w3,w3 // 0
-#endif
- ror w16,w24,#6
- add w27,w27,w19 // h+=K[i]
- eor w6,w24,w24,ror#14
- and w17,w25,w24
- bic w19,w26,w24
- add w27,w27,w3 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w20,w21 // a^b, b^c in next round
- eor w16,w16,w6,ror#11 // Sigma1(e)
- ror w6,w20,#2
- add w27,w27,w17 // h+=Ch(e,f,g)
- eor w17,w20,w20,ror#9
- add w27,w27,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w23,w23,w27 // d+=h
- eor w28,w28,w21 // Maj(a,b,c)
- eor w17,w6,w17,ror#13 // Sigma0(a)
- add w27,w27,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w27,w27,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w4,w4 // 1
-#endif
- ldp w5,w6,[x1],#2*4
- add w27,w27,w17 // h+=Sigma0(a)
- ror w16,w23,#6
- add w26,w26,w28 // h+=K[i]
- eor w7,w23,w23,ror#14
- and w17,w24,w23
- bic w28,w25,w23
- add w26,w26,w4 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w27,w20 // a^b, b^c in next round
- eor w16,w16,w7,ror#11 // Sigma1(e)
- ror w7,w27,#2
- add w26,w26,w17 // h+=Ch(e,f,g)
- eor w17,w27,w27,ror#9
- add w26,w26,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w22,w22,w26 // d+=h
- eor w19,w19,w20 // Maj(a,b,c)
- eor w17,w7,w17,ror#13 // Sigma0(a)
- add w26,w26,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w26,w26,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w5,w5 // 2
-#endif
- add w26,w26,w17 // h+=Sigma0(a)
- ror w16,w22,#6
- add w25,w25,w19 // h+=K[i]
- eor w8,w22,w22,ror#14
- and w17,w23,w22
- bic w19,w24,w22
- add w25,w25,w5 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w26,w27 // a^b, b^c in next round
- eor w16,w16,w8,ror#11 // Sigma1(e)
- ror w8,w26,#2
- add w25,w25,w17 // h+=Ch(e,f,g)
- eor w17,w26,w26,ror#9
- add w25,w25,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w21,w21,w25 // d+=h
- eor w28,w28,w27 // Maj(a,b,c)
- eor w17,w8,w17,ror#13 // Sigma0(a)
- add w25,w25,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w25,w25,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w6,w6 // 3
-#endif
- ldp w7,w8,[x1],#2*4
- add w25,w25,w17 // h+=Sigma0(a)
- ror w16,w21,#6
- add w24,w24,w28 // h+=K[i]
- eor w9,w21,w21,ror#14
- and w17,w22,w21
- bic w28,w23,w21
- add w24,w24,w6 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w25,w26 // a^b, b^c in next round
- eor w16,w16,w9,ror#11 // Sigma1(e)
- ror w9,w25,#2
- add w24,w24,w17 // h+=Ch(e,f,g)
- eor w17,w25,w25,ror#9
- add w24,w24,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w20,w20,w24 // d+=h
- eor w19,w19,w26 // Maj(a,b,c)
- eor w17,w9,w17,ror#13 // Sigma0(a)
- add w24,w24,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w24,w24,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w7,w7 // 4
-#endif
- add w24,w24,w17 // h+=Sigma0(a)
- ror w16,w20,#6
- add w23,w23,w19 // h+=K[i]
- eor w10,w20,w20,ror#14
- and w17,w21,w20
- bic w19,w22,w20
- add w23,w23,w7 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w24,w25 // a^b, b^c in next round
- eor w16,w16,w10,ror#11 // Sigma1(e)
- ror w10,w24,#2
- add w23,w23,w17 // h+=Ch(e,f,g)
- eor w17,w24,w24,ror#9
- add w23,w23,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w27,w27,w23 // d+=h
- eor w28,w28,w25 // Maj(a,b,c)
- eor w17,w10,w17,ror#13 // Sigma0(a)
- add w23,w23,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w23,w23,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w8,w8 // 5
-#endif
- ldp w9,w10,[x1],#2*4
- add w23,w23,w17 // h+=Sigma0(a)
- ror w16,w27,#6
- add w22,w22,w28 // h+=K[i]
- eor w11,w27,w27,ror#14
- and w17,w20,w27
- bic w28,w21,w27
- add w22,w22,w8 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w23,w24 // a^b, b^c in next round
- eor w16,w16,w11,ror#11 // Sigma1(e)
- ror w11,w23,#2
- add w22,w22,w17 // h+=Ch(e,f,g)
- eor w17,w23,w23,ror#9
- add w22,w22,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w26,w26,w22 // d+=h
- eor w19,w19,w24 // Maj(a,b,c)
- eor w17,w11,w17,ror#13 // Sigma0(a)
- add w22,w22,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w22,w22,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w9,w9 // 6
-#endif
- add w22,w22,w17 // h+=Sigma0(a)
- ror w16,w26,#6
- add w21,w21,w19 // h+=K[i]
- eor w12,w26,w26,ror#14
- and w17,w27,w26
- bic w19,w20,w26
- add w21,w21,w9 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w22,w23 // a^b, b^c in next round
- eor w16,w16,w12,ror#11 // Sigma1(e)
- ror w12,w22,#2
- add w21,w21,w17 // h+=Ch(e,f,g)
- eor w17,w22,w22,ror#9
- add w21,w21,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w25,w25,w21 // d+=h
- eor w28,w28,w23 // Maj(a,b,c)
- eor w17,w12,w17,ror#13 // Sigma0(a)
- add w21,w21,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w21,w21,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w10,w10 // 7
-#endif
- ldp w11,w12,[x1],#2*4
- add w21,w21,w17 // h+=Sigma0(a)
- ror w16,w25,#6
- add w20,w20,w28 // h+=K[i]
- eor w13,w25,w25,ror#14
- and w17,w26,w25
- bic w28,w27,w25
- add w20,w20,w10 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w21,w22 // a^b, b^c in next round
- eor w16,w16,w13,ror#11 // Sigma1(e)
- ror w13,w21,#2
- add w20,w20,w17 // h+=Ch(e,f,g)
- eor w17,w21,w21,ror#9
- add w20,w20,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w24,w24,w20 // d+=h
- eor w19,w19,w22 // Maj(a,b,c)
- eor w17,w13,w17,ror#13 // Sigma0(a)
- add w20,w20,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w20,w20,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w11,w11 // 8
-#endif
- add w20,w20,w17 // h+=Sigma0(a)
- ror w16,w24,#6
- add w27,w27,w19 // h+=K[i]
- eor w14,w24,w24,ror#14
- and w17,w25,w24
- bic w19,w26,w24
- add w27,w27,w11 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w20,w21 // a^b, b^c in next round
- eor w16,w16,w14,ror#11 // Sigma1(e)
- ror w14,w20,#2
- add w27,w27,w17 // h+=Ch(e,f,g)
- eor w17,w20,w20,ror#9
- add w27,w27,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w23,w23,w27 // d+=h
- eor w28,w28,w21 // Maj(a,b,c)
- eor w17,w14,w17,ror#13 // Sigma0(a)
- add w27,w27,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w27,w27,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w12,w12 // 9
-#endif
- ldp w13,w14,[x1],#2*4
- add w27,w27,w17 // h+=Sigma0(a)
- ror w16,w23,#6
- add w26,w26,w28 // h+=K[i]
- eor w15,w23,w23,ror#14
- and w17,w24,w23
- bic w28,w25,w23
- add w26,w26,w12 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w27,w20 // a^b, b^c in next round
- eor w16,w16,w15,ror#11 // Sigma1(e)
- ror w15,w27,#2
- add w26,w26,w17 // h+=Ch(e,f,g)
- eor w17,w27,w27,ror#9
- add w26,w26,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w22,w22,w26 // d+=h
- eor w19,w19,w20 // Maj(a,b,c)
- eor w17,w15,w17,ror#13 // Sigma0(a)
- add w26,w26,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w26,w26,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w13,w13 // 10
-#endif
- add w26,w26,w17 // h+=Sigma0(a)
- ror w16,w22,#6
- add w25,w25,w19 // h+=K[i]
- eor w0,w22,w22,ror#14
- and w17,w23,w22
- bic w19,w24,w22
- add w25,w25,w13 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w26,w27 // a^b, b^c in next round
- eor w16,w16,w0,ror#11 // Sigma1(e)
- ror w0,w26,#2
- add w25,w25,w17 // h+=Ch(e,f,g)
- eor w17,w26,w26,ror#9
- add w25,w25,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w21,w21,w25 // d+=h
- eor w28,w28,w27 // Maj(a,b,c)
- eor w17,w0,w17,ror#13 // Sigma0(a)
- add w25,w25,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w25,w25,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w14,w14 // 11
-#endif
- ldp w15,w0,[x1],#2*4
- add w25,w25,w17 // h+=Sigma0(a)
- str w6,[sp,#12]
- ror w16,w21,#6
- add w24,w24,w28 // h+=K[i]
- eor w6,w21,w21,ror#14
- and w17,w22,w21
- bic w28,w23,w21
- add w24,w24,w14 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w25,w26 // a^b, b^c in next round
- eor w16,w16,w6,ror#11 // Sigma1(e)
- ror w6,w25,#2
- add w24,w24,w17 // h+=Ch(e,f,g)
- eor w17,w25,w25,ror#9
- add w24,w24,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w20,w20,w24 // d+=h
- eor w19,w19,w26 // Maj(a,b,c)
- eor w17,w6,w17,ror#13 // Sigma0(a)
- add w24,w24,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w24,w24,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w15,w15 // 12
-#endif
- add w24,w24,w17 // h+=Sigma0(a)
- str w7,[sp,#0]
- ror w16,w20,#6
- add w23,w23,w19 // h+=K[i]
- eor w7,w20,w20,ror#14
- and w17,w21,w20
- bic w19,w22,w20
- add w23,w23,w15 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w24,w25 // a^b, b^c in next round
- eor w16,w16,w7,ror#11 // Sigma1(e)
- ror w7,w24,#2
- add w23,w23,w17 // h+=Ch(e,f,g)
- eor w17,w24,w24,ror#9
- add w23,w23,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w27,w27,w23 // d+=h
- eor w28,w28,w25 // Maj(a,b,c)
- eor w17,w7,w17,ror#13 // Sigma0(a)
- add w23,w23,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w23,w23,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w0,w0 // 13
-#endif
- ldp w1,w2,[x1]
- add w23,w23,w17 // h+=Sigma0(a)
- str w8,[sp,#4]
- ror w16,w27,#6
- add w22,w22,w28 // h+=K[i]
- eor w8,w27,w27,ror#14
- and w17,w20,w27
- bic w28,w21,w27
- add w22,w22,w0 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w23,w24 // a^b, b^c in next round
- eor w16,w16,w8,ror#11 // Sigma1(e)
- ror w8,w23,#2
- add w22,w22,w17 // h+=Ch(e,f,g)
- eor w17,w23,w23,ror#9
- add w22,w22,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w26,w26,w22 // d+=h
- eor w19,w19,w24 // Maj(a,b,c)
- eor w17,w8,w17,ror#13 // Sigma0(a)
- add w22,w22,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w22,w22,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w1,w1 // 14
-#endif
- ldr w6,[sp,#12]
- add w22,w22,w17 // h+=Sigma0(a)
- str w9,[sp,#8]
- ror w16,w26,#6
- add w21,w21,w19 // h+=K[i]
- eor w9,w26,w26,ror#14
- and w17,w27,w26
- bic w19,w20,w26
- add w21,w21,w1 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w22,w23 // a^b, b^c in next round
- eor w16,w16,w9,ror#11 // Sigma1(e)
- ror w9,w22,#2
- add w21,w21,w17 // h+=Ch(e,f,g)
- eor w17,w22,w22,ror#9
- add w21,w21,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w25,w25,w21 // d+=h
- eor w28,w28,w23 // Maj(a,b,c)
- eor w17,w9,w17,ror#13 // Sigma0(a)
- add w21,w21,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w21,w21,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w2,w2 // 15
-#endif
- ldr w7,[sp,#0]
- add w21,w21,w17 // h+=Sigma0(a)
- str w10,[sp,#12]
- ror w16,w25,#6
- add w20,w20,w28 // h+=K[i]
- ror w9,w4,#7
- and w17,w26,w25
- ror w8,w1,#17
- bic w28,w27,w25
- ror w10,w21,#2
- add w20,w20,w2 // h+=X[i]
- eor w16,w16,w25,ror#11
- eor w9,w9,w4,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w21,w22 // a^b, b^c in next round
- eor w16,w16,w25,ror#25 // Sigma1(e)
- eor w10,w10,w21,ror#13
- add w20,w20,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w8,w8,w1,ror#19
- eor w9,w9,w4,lsr#3 // sigma0(X[i+1])
- add w20,w20,w16 // h+=Sigma1(e)
- eor w19,w19,w22 // Maj(a,b,c)
- eor w17,w10,w21,ror#22 // Sigma0(a)
- eor w8,w8,w1,lsr#10 // sigma1(X[i+14])
- add w3,w3,w12
- add w24,w24,w20 // d+=h
- add w20,w20,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w3,w3,w9
- add w20,w20,w17 // h+=Sigma0(a)
- add w3,w3,w8
-.Loop_16_xx:
- ldr w8,[sp,#4]
- str w11,[sp,#0]
- ror w16,w24,#6
- add w27,w27,w19 // h+=K[i]
- ror w10,w5,#7
- and w17,w25,w24
- ror w9,w2,#17
- bic w19,w26,w24
- ror w11,w20,#2
- add w27,w27,w3 // h+=X[i]
- eor w16,w16,w24,ror#11
- eor w10,w10,w5,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w20,w21 // a^b, b^c in next round
- eor w16,w16,w24,ror#25 // Sigma1(e)
- eor w11,w11,w20,ror#13
- add w27,w27,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w9,w9,w2,ror#19
- eor w10,w10,w5,lsr#3 // sigma0(X[i+1])
- add w27,w27,w16 // h+=Sigma1(e)
- eor w28,w28,w21 // Maj(a,b,c)
- eor w17,w11,w20,ror#22 // Sigma0(a)
- eor w9,w9,w2,lsr#10 // sigma1(X[i+14])
- add w4,w4,w13
- add w23,w23,w27 // d+=h
- add w27,w27,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w4,w4,w10
- add w27,w27,w17 // h+=Sigma0(a)
- add w4,w4,w9
- ldr w9,[sp,#8]
- str w12,[sp,#4]
- ror w16,w23,#6
- add w26,w26,w28 // h+=K[i]
- ror w11,w6,#7
- and w17,w24,w23
- ror w10,w3,#17
- bic w28,w25,w23
- ror w12,w27,#2
- add w26,w26,w4 // h+=X[i]
- eor w16,w16,w23,ror#11
- eor w11,w11,w6,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w27,w20 // a^b, b^c in next round
- eor w16,w16,w23,ror#25 // Sigma1(e)
- eor w12,w12,w27,ror#13
- add w26,w26,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w10,w10,w3,ror#19
- eor w11,w11,w6,lsr#3 // sigma0(X[i+1])
- add w26,w26,w16 // h+=Sigma1(e)
- eor w19,w19,w20 // Maj(a,b,c)
- eor w17,w12,w27,ror#22 // Sigma0(a)
- eor w10,w10,w3,lsr#10 // sigma1(X[i+14])
- add w5,w5,w14
- add w22,w22,w26 // d+=h
- add w26,w26,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w5,w5,w11
- add w26,w26,w17 // h+=Sigma0(a)
- add w5,w5,w10
- ldr w10,[sp,#12]
- str w13,[sp,#8]
- ror w16,w22,#6
- add w25,w25,w19 // h+=K[i]
- ror w12,w7,#7
- and w17,w23,w22
- ror w11,w4,#17
- bic w19,w24,w22
- ror w13,w26,#2
- add w25,w25,w5 // h+=X[i]
- eor w16,w16,w22,ror#11
- eor w12,w12,w7,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w26,w27 // a^b, b^c in next round
- eor w16,w16,w22,ror#25 // Sigma1(e)
- eor w13,w13,w26,ror#13
- add w25,w25,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w11,w11,w4,ror#19
- eor w12,w12,w7,lsr#3 // sigma0(X[i+1])
- add w25,w25,w16 // h+=Sigma1(e)
- eor w28,w28,w27 // Maj(a,b,c)
- eor w17,w13,w26,ror#22 // Sigma0(a)
- eor w11,w11,w4,lsr#10 // sigma1(X[i+14])
- add w6,w6,w15
- add w21,w21,w25 // d+=h
- add w25,w25,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w6,w6,w12
- add w25,w25,w17 // h+=Sigma0(a)
- add w6,w6,w11
- ldr w11,[sp,#0]
- str w14,[sp,#12]
- ror w16,w21,#6
- add w24,w24,w28 // h+=K[i]
- ror w13,w8,#7
- and w17,w22,w21
- ror w12,w5,#17
- bic w28,w23,w21
- ror w14,w25,#2
- add w24,w24,w6 // h+=X[i]
- eor w16,w16,w21,ror#11
- eor w13,w13,w8,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w25,w26 // a^b, b^c in next round
- eor w16,w16,w21,ror#25 // Sigma1(e)
- eor w14,w14,w25,ror#13
- add w24,w24,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w12,w12,w5,ror#19
- eor w13,w13,w8,lsr#3 // sigma0(X[i+1])
- add w24,w24,w16 // h+=Sigma1(e)
- eor w19,w19,w26 // Maj(a,b,c)
- eor w17,w14,w25,ror#22 // Sigma0(a)
- eor w12,w12,w5,lsr#10 // sigma1(X[i+14])
- add w7,w7,w0
- add w20,w20,w24 // d+=h
- add w24,w24,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w7,w7,w13
- add w24,w24,w17 // h+=Sigma0(a)
- add w7,w7,w12
- ldr w12,[sp,#4]
- str w15,[sp,#0]
- ror w16,w20,#6
- add w23,w23,w19 // h+=K[i]
- ror w14,w9,#7
- and w17,w21,w20
- ror w13,w6,#17
- bic w19,w22,w20
- ror w15,w24,#2
- add w23,w23,w7 // h+=X[i]
- eor w16,w16,w20,ror#11
- eor w14,w14,w9,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w24,w25 // a^b, b^c in next round
- eor w16,w16,w20,ror#25 // Sigma1(e)
- eor w15,w15,w24,ror#13
- add w23,w23,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w13,w13,w6,ror#19
- eor w14,w14,w9,lsr#3 // sigma0(X[i+1])
- add w23,w23,w16 // h+=Sigma1(e)
- eor w28,w28,w25 // Maj(a,b,c)
- eor w17,w15,w24,ror#22 // Sigma0(a)
- eor w13,w13,w6,lsr#10 // sigma1(X[i+14])
- add w8,w8,w1
- add w27,w27,w23 // d+=h
- add w23,w23,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w8,w8,w14
- add w23,w23,w17 // h+=Sigma0(a)
- add w8,w8,w13
- ldr w13,[sp,#8]
- str w0,[sp,#4]
- ror w16,w27,#6
- add w22,w22,w28 // h+=K[i]
- ror w15,w10,#7
- and w17,w20,w27
- ror w14,w7,#17
- bic w28,w21,w27
- ror w0,w23,#2
- add w22,w22,w8 // h+=X[i]
- eor w16,w16,w27,ror#11
- eor w15,w15,w10,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w23,w24 // a^b, b^c in next round
- eor w16,w16,w27,ror#25 // Sigma1(e)
- eor w0,w0,w23,ror#13
- add w22,w22,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w14,w14,w7,ror#19
- eor w15,w15,w10,lsr#3 // sigma0(X[i+1])
- add w22,w22,w16 // h+=Sigma1(e)
- eor w19,w19,w24 // Maj(a,b,c)
- eor w17,w0,w23,ror#22 // Sigma0(a)
- eor w14,w14,w7,lsr#10 // sigma1(X[i+14])
- add w9,w9,w2
- add w26,w26,w22 // d+=h
- add w22,w22,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w9,w9,w15
- add w22,w22,w17 // h+=Sigma0(a)
- add w9,w9,w14
- ldr w14,[sp,#12]
- str w1,[sp,#8]
- ror w16,w26,#6
- add w21,w21,w19 // h+=K[i]
- ror w0,w11,#7
- and w17,w27,w26
- ror w15,w8,#17
- bic w19,w20,w26
- ror w1,w22,#2
- add w21,w21,w9 // h+=X[i]
- eor w16,w16,w26,ror#11
- eor w0,w0,w11,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w22,w23 // a^b, b^c in next round
- eor w16,w16,w26,ror#25 // Sigma1(e)
- eor w1,w1,w22,ror#13
- add w21,w21,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w15,w15,w8,ror#19
- eor w0,w0,w11,lsr#3 // sigma0(X[i+1])
- add w21,w21,w16 // h+=Sigma1(e)
- eor w28,w28,w23 // Maj(a,b,c)
- eor w17,w1,w22,ror#22 // Sigma0(a)
- eor w15,w15,w8,lsr#10 // sigma1(X[i+14])
- add w10,w10,w3
- add w25,w25,w21 // d+=h
- add w21,w21,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w10,w10,w0
- add w21,w21,w17 // h+=Sigma0(a)
- add w10,w10,w15
- ldr w15,[sp,#0]
- str w2,[sp,#12]
- ror w16,w25,#6
- add w20,w20,w28 // h+=K[i]
- ror w1,w12,#7
- and w17,w26,w25
- ror w0,w9,#17
- bic w28,w27,w25
- ror w2,w21,#2
- add w20,w20,w10 // h+=X[i]
- eor w16,w16,w25,ror#11
- eor w1,w1,w12,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w21,w22 // a^b, b^c in next round
- eor w16,w16,w25,ror#25 // Sigma1(e)
- eor w2,w2,w21,ror#13
- add w20,w20,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w0,w0,w9,ror#19
- eor w1,w1,w12,lsr#3 // sigma0(X[i+1])
- add w20,w20,w16 // h+=Sigma1(e)
- eor w19,w19,w22 // Maj(a,b,c)
- eor w17,w2,w21,ror#22 // Sigma0(a)
- eor w0,w0,w9,lsr#10 // sigma1(X[i+14])
- add w11,w11,w4
- add w24,w24,w20 // d+=h
- add w20,w20,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w11,w11,w1
- add w20,w20,w17 // h+=Sigma0(a)
- add w11,w11,w0
- ldr w0,[sp,#4]
- str w3,[sp,#0]
- ror w16,w24,#6
- add w27,w27,w19 // h+=K[i]
- ror w2,w13,#7
- and w17,w25,w24
- ror w1,w10,#17
- bic w19,w26,w24
- ror w3,w20,#2
- add w27,w27,w11 // h+=X[i]
- eor w16,w16,w24,ror#11
- eor w2,w2,w13,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w20,w21 // a^b, b^c in next round
- eor w16,w16,w24,ror#25 // Sigma1(e)
- eor w3,w3,w20,ror#13
- add w27,w27,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w1,w1,w10,ror#19
- eor w2,w2,w13,lsr#3 // sigma0(X[i+1])
- add w27,w27,w16 // h+=Sigma1(e)
- eor w28,w28,w21 // Maj(a,b,c)
- eor w17,w3,w20,ror#22 // Sigma0(a)
- eor w1,w1,w10,lsr#10 // sigma1(X[i+14])
- add w12,w12,w5
- add w23,w23,w27 // d+=h
- add w27,w27,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w12,w12,w2
- add w27,w27,w17 // h+=Sigma0(a)
- add w12,w12,w1
- ldr w1,[sp,#8]
- str w4,[sp,#4]
- ror w16,w23,#6
- add w26,w26,w28 // h+=K[i]
- ror w3,w14,#7
- and w17,w24,w23
- ror w2,w11,#17
- bic w28,w25,w23
- ror w4,w27,#2
- add w26,w26,w12 // h+=X[i]
- eor w16,w16,w23,ror#11
- eor w3,w3,w14,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w27,w20 // a^b, b^c in next round
- eor w16,w16,w23,ror#25 // Sigma1(e)
- eor w4,w4,w27,ror#13
- add w26,w26,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w2,w2,w11,ror#19
- eor w3,w3,w14,lsr#3 // sigma0(X[i+1])
- add w26,w26,w16 // h+=Sigma1(e)
- eor w19,w19,w20 // Maj(a,b,c)
- eor w17,w4,w27,ror#22 // Sigma0(a)
- eor w2,w2,w11,lsr#10 // sigma1(X[i+14])
- add w13,w13,w6
- add w22,w22,w26 // d+=h
- add w26,w26,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w13,w13,w3
- add w26,w26,w17 // h+=Sigma0(a)
- add w13,w13,w2
- ldr w2,[sp,#12]
- str w5,[sp,#8]
- ror w16,w22,#6
- add w25,w25,w19 // h+=K[i]
- ror w4,w15,#7
- and w17,w23,w22
- ror w3,w12,#17
- bic w19,w24,w22
- ror w5,w26,#2
- add w25,w25,w13 // h+=X[i]
- eor w16,w16,w22,ror#11
- eor w4,w4,w15,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w26,w27 // a^b, b^c in next round
- eor w16,w16,w22,ror#25 // Sigma1(e)
- eor w5,w5,w26,ror#13
- add w25,w25,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w3,w3,w12,ror#19
- eor w4,w4,w15,lsr#3 // sigma0(X[i+1])
- add w25,w25,w16 // h+=Sigma1(e)
- eor w28,w28,w27 // Maj(a,b,c)
- eor w17,w5,w26,ror#22 // Sigma0(a)
- eor w3,w3,w12,lsr#10 // sigma1(X[i+14])
- add w14,w14,w7
- add w21,w21,w25 // d+=h
- add w25,w25,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w14,w14,w4
- add w25,w25,w17 // h+=Sigma0(a)
- add w14,w14,w3
- ldr w3,[sp,#0]
- str w6,[sp,#12]
- ror w16,w21,#6
- add w24,w24,w28 // h+=K[i]
- ror w5,w0,#7
- and w17,w22,w21
- ror w4,w13,#17
- bic w28,w23,w21
- ror w6,w25,#2
- add w24,w24,w14 // h+=X[i]
- eor w16,w16,w21,ror#11
- eor w5,w5,w0,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w25,w26 // a^b, b^c in next round
- eor w16,w16,w21,ror#25 // Sigma1(e)
- eor w6,w6,w25,ror#13
- add w24,w24,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w4,w4,w13,ror#19
- eor w5,w5,w0,lsr#3 // sigma0(X[i+1])
- add w24,w24,w16 // h+=Sigma1(e)
- eor w19,w19,w26 // Maj(a,b,c)
- eor w17,w6,w25,ror#22 // Sigma0(a)
- eor w4,w4,w13,lsr#10 // sigma1(X[i+14])
- add w15,w15,w8
- add w20,w20,w24 // d+=h
- add w24,w24,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w15,w15,w5
- add w24,w24,w17 // h+=Sigma0(a)
- add w15,w15,w4
- ldr w4,[sp,#4]
- str w7,[sp,#0]
- ror w16,w20,#6
- add w23,w23,w19 // h+=K[i]
- ror w6,w1,#7
- and w17,w21,w20
- ror w5,w14,#17
- bic w19,w22,w20
- ror w7,w24,#2
- add w23,w23,w15 // h+=X[i]
- eor w16,w16,w20,ror#11
- eor w6,w6,w1,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w24,w25 // a^b, b^c in next round
- eor w16,w16,w20,ror#25 // Sigma1(e)
- eor w7,w7,w24,ror#13
- add w23,w23,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w5,w5,w14,ror#19
- eor w6,w6,w1,lsr#3 // sigma0(X[i+1])
- add w23,w23,w16 // h+=Sigma1(e)
- eor w28,w28,w25 // Maj(a,b,c)
- eor w17,w7,w24,ror#22 // Sigma0(a)
- eor w5,w5,w14,lsr#10 // sigma1(X[i+14])
- add w0,w0,w9
- add w27,w27,w23 // d+=h
- add w23,w23,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w0,w0,w6
- add w23,w23,w17 // h+=Sigma0(a)
- add w0,w0,w5
- ldr w5,[sp,#8]
- str w8,[sp,#4]
- ror w16,w27,#6
- add w22,w22,w28 // h+=K[i]
- ror w7,w2,#7
- and w17,w20,w27
- ror w6,w15,#17
- bic w28,w21,w27
- ror w8,w23,#2
- add w22,w22,w0 // h+=X[i]
- eor w16,w16,w27,ror#11
- eor w7,w7,w2,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w23,w24 // a^b, b^c in next round
- eor w16,w16,w27,ror#25 // Sigma1(e)
- eor w8,w8,w23,ror#13
- add w22,w22,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w6,w6,w15,ror#19
- eor w7,w7,w2,lsr#3 // sigma0(X[i+1])
- add w22,w22,w16 // h+=Sigma1(e)
- eor w19,w19,w24 // Maj(a,b,c)
- eor w17,w8,w23,ror#22 // Sigma0(a)
- eor w6,w6,w15,lsr#10 // sigma1(X[i+14])
- add w1,w1,w10
- add w26,w26,w22 // d+=h
- add w22,w22,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w1,w1,w7
- add w22,w22,w17 // h+=Sigma0(a)
- add w1,w1,w6
- ldr w6,[sp,#12]
- str w9,[sp,#8]
- ror w16,w26,#6
- add w21,w21,w19 // h+=K[i]
- ror w8,w3,#7
- and w17,w27,w26
- ror w7,w0,#17
- bic w19,w20,w26
- ror w9,w22,#2
- add w21,w21,w1 // h+=X[i]
- eor w16,w16,w26,ror#11
- eor w8,w8,w3,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w22,w23 // a^b, b^c in next round
- eor w16,w16,w26,ror#25 // Sigma1(e)
- eor w9,w9,w22,ror#13
- add w21,w21,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w7,w7,w0,ror#19
- eor w8,w8,w3,lsr#3 // sigma0(X[i+1])
- add w21,w21,w16 // h+=Sigma1(e)
- eor w28,w28,w23 // Maj(a,b,c)
- eor w17,w9,w22,ror#22 // Sigma0(a)
- eor w7,w7,w0,lsr#10 // sigma1(X[i+14])
- add w2,w2,w11
- add w25,w25,w21 // d+=h
- add w21,w21,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w2,w2,w8
- add w21,w21,w17 // h+=Sigma0(a)
- add w2,w2,w7
- ldr w7,[sp,#0]
- str w10,[sp,#12]
- ror w16,w25,#6
- add w20,w20,w28 // h+=K[i]
- ror w9,w4,#7
- and w17,w26,w25
- ror w8,w1,#17
- bic w28,w27,w25
- ror w10,w21,#2
- add w20,w20,w2 // h+=X[i]
- eor w16,w16,w25,ror#11
- eor w9,w9,w4,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w21,w22 // a^b, b^c in next round
- eor w16,w16,w25,ror#25 // Sigma1(e)
- eor w10,w10,w21,ror#13
- add w20,w20,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w8,w8,w1,ror#19
- eor w9,w9,w4,lsr#3 // sigma0(X[i+1])
- add w20,w20,w16 // h+=Sigma1(e)
- eor w19,w19,w22 // Maj(a,b,c)
- eor w17,w10,w21,ror#22 // Sigma0(a)
- eor w8,w8,w1,lsr#10 // sigma1(X[i+14])
- add w3,w3,w12
- add w24,w24,w20 // d+=h
- add w20,w20,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w3,w3,w9
- add w20,w20,w17 // h+=Sigma0(a)
- add w3,w3,w8
- cbnz w19,.Loop_16_xx
-
- ldp x0,x2,[x29,#96]
- ldr x1,[x29,#112]
- sub x30,x30,#260 // rewind
-
- ldp w3,w4,[x0]
- ldp w5,w6,[x0,#2*4]
- add x1,x1,#14*4 // advance input pointer
- ldp w7,w8,[x0,#4*4]
- add w20,w20,w3
- ldp w9,w10,[x0,#6*4]
- add w21,w21,w4
- add w22,w22,w5
- add w23,w23,w6
- stp w20,w21,[x0]
- add w24,w24,w7
- add w25,w25,w8
- stp w22,w23,[x0,#2*4]
- add w26,w26,w9
- add w27,w27,w10
- cmp x1,x2
- stp w24,w25,[x0,#4*4]
- stp w26,w27,[x0,#6*4]
- b.ne .Loop
-
- ldp x19,x20,[x29,#16]
- add sp,sp,#4*4
- ldp x21,x22,[x29,#32]
- ldp x23,x24,[x29,#48]
- ldp x25,x26,[x29,#64]
- ldp x27,x28,[x29,#80]
- ldp x29,x30,[sp],#128
- ret
-.size sha256_block_data_order,.-sha256_block_data_order
-
-.align 6
-.type .LK256,%object
-.LK256:
- .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
- .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
- .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
- .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
- .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
- .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
- .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
- .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
- .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
- .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
- .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
- .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
- .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
- .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
- .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
- .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
- .long 0 //terminator
-.size .LK256,.-.LK256
-#ifndef __KERNEL__
-.align 3
-.LOPENSSL_armcap_P:
-# ifdef __ILP32__
- .long OPENSSL_armcap_P-.
-# else
- .quad OPENSSL_armcap_P-.
-# endif
-#endif
-.asciz "SHA256 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
-.align 2
-#ifndef __KERNEL__
-.type sha256_block_armv8,%function
-.align 6
-sha256_block_armv8:
-.Lv8_entry:
- stp x29,x30,[sp,#-16]!
- add x29,sp,#0
-
- ld1 {v0.4s,v1.4s},[x0]
- adr x3,.LK256
-
-.Loop_hw:
- ld1 {v4.16b-v7.16b},[x1],#64
- sub x2,x2,#1
- ld1 {v16.4s},[x3],#16
- rev32 v4.16b,v4.16b
- rev32 v5.16b,v5.16b
- rev32 v6.16b,v6.16b
- rev32 v7.16b,v7.16b
- orr v18.16b,v0.16b,v0.16b // offload
- orr v19.16b,v1.16b,v1.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v4.4s
- .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v5.4s
- .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v6.4s
- .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v7.4s
- .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v4.4s
- .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v5.4s
- .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v6.4s
- .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v7.4s
- .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v4.4s
- .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v5.4s
- .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v6.4s
- .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v7.4s
- .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v4.4s
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
-
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v5.4s
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
-
- ld1 {v17.4s},[x3]
- add v16.4s,v16.4s,v6.4s
- sub x3,x3,#64*4-16 // rewind
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
-
- add v17.4s,v17.4s,v7.4s
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
-
- add v0.4s,v0.4s,v18.4s
- add v1.4s,v1.4s,v19.4s
-
- cbnz x2,.Loop_hw
-
- st1 {v0.4s,v1.4s},[x0]
-
- ldr x29,[sp],#16
- ret
-.size sha256_block_armv8,.-sha256_block_armv8
-#endif
-#ifdef __KERNEL__
-.globl sha256_block_neon
-#endif
-.type sha256_block_neon,%function
-.align 4
-sha256_block_neon:
-.Lneon_entry:
- stp x29, x30, [sp, #-16]!
- mov x29, sp
- sub sp,sp,#16*4
-
- adr x16,.LK256
- add x2,x1,x2,lsl#6 // len to point at the end of inp
-
- ld1 {v0.16b},[x1], #16
- ld1 {v1.16b},[x1], #16
- ld1 {v2.16b},[x1], #16
- ld1 {v3.16b},[x1], #16
- ld1 {v4.4s},[x16], #16
- ld1 {v5.4s},[x16], #16
- ld1 {v6.4s},[x16], #16
- ld1 {v7.4s},[x16], #16
- rev32 v0.16b,v0.16b // yes, even on
- rev32 v1.16b,v1.16b // big-endian
- rev32 v2.16b,v2.16b
- rev32 v3.16b,v3.16b
- mov x17,sp
- add v4.4s,v4.4s,v0.4s
- add v5.4s,v5.4s,v1.4s
- add v6.4s,v6.4s,v2.4s
- st1 {v4.4s-v5.4s},[x17], #32
- add v7.4s,v7.4s,v3.4s
- st1 {v6.4s-v7.4s},[x17]
- sub x17,x17,#32
-
- ldp w3,w4,[x0]
- ldp w5,w6,[x0,#8]
- ldp w7,w8,[x0,#16]
- ldp w9,w10,[x0,#24]
- ldr w12,[sp,#0]
- mov w13,wzr
- eor w14,w4,w5
- mov w15,wzr
- b .L_00_48
-
-.align 4
-.L_00_48:
- ext v4.16b,v0.16b,v1.16b,#4
- add w10,w10,w12
- add w3,w3,w15
- and w12,w8,w7
- bic w15,w9,w7
- ext v7.16b,v2.16b,v3.16b,#4
- eor w11,w7,w7,ror#5
- add w3,w3,w13
- mov d19,v3.d[1]
- orr w12,w12,w15
- eor w11,w11,w7,ror#19
- ushr v6.4s,v4.4s,#7
- eor w15,w3,w3,ror#11
- ushr v5.4s,v4.4s,#3
- add w10,w10,w12
- add v0.4s,v0.4s,v7.4s
- ror w11,w11,#6
- sli v6.4s,v4.4s,#25
- eor w13,w3,w4
- eor w15,w15,w3,ror#20
- ushr v7.4s,v4.4s,#18
- add w10,w10,w11
- ldr w12,[sp,#4]
- and w14,w14,w13
- eor v5.16b,v5.16b,v6.16b
- ror w15,w15,#2
- add w6,w6,w10
- sli v7.4s,v4.4s,#14
- eor w14,w14,w4
- ushr v16.4s,v19.4s,#17
- add w9,w9,w12
- add w10,w10,w15
- and w12,w7,w6
- eor v5.16b,v5.16b,v7.16b
- bic w15,w8,w6
- eor w11,w6,w6,ror#5
- sli v16.4s,v19.4s,#15
- add w10,w10,w14
- orr w12,w12,w15
- ushr v17.4s,v19.4s,#10
- eor w11,w11,w6,ror#19
- eor w15,w10,w10,ror#11
- ushr v7.4s,v19.4s,#19
- add w9,w9,w12
- ror w11,w11,#6
- add v0.4s,v0.4s,v5.4s
- eor w14,w10,w3
- eor w15,w15,w10,ror#20
- sli v7.4s,v19.4s,#13
- add w9,w9,w11
- ldr w12,[sp,#8]
- and w13,w13,w14
- eor v17.16b,v17.16b,v16.16b
- ror w15,w15,#2
- add w5,w5,w9
- eor w13,w13,w3
- eor v17.16b,v17.16b,v7.16b
- add w8,w8,w12
- add w9,w9,w15
- and w12,w6,w5
- add v0.4s,v0.4s,v17.4s
- bic w15,w7,w5
- eor w11,w5,w5,ror#5
- add w9,w9,w13
- ushr v18.4s,v0.4s,#17
- orr w12,w12,w15
- ushr v19.4s,v0.4s,#10
- eor w11,w11,w5,ror#19
- eor w15,w9,w9,ror#11
- sli v18.4s,v0.4s,#15
- add w8,w8,w12
- ushr v17.4s,v0.4s,#19
- ror w11,w11,#6
- eor w13,w9,w10
- eor v19.16b,v19.16b,v18.16b
- eor w15,w15,w9,ror#20
- add w8,w8,w11
- sli v17.4s,v0.4s,#13
- ldr w12,[sp,#12]
- and w14,w14,w13
- ror w15,w15,#2
- ld1 {v4.4s},[x16], #16
- add w4,w4,w8
- eor v19.16b,v19.16b,v17.16b
- eor w14,w14,w10
- eor v17.16b,v17.16b,v17.16b
- add w7,w7,w12
- add w8,w8,w15
- and w12,w5,w4
- mov v17.d[1],v19.d[0]
- bic w15,w6,w4
- eor w11,w4,w4,ror#5
- add w8,w8,w14
- add v0.4s,v0.4s,v17.4s
- orr w12,w12,w15
- eor w11,w11,w4,ror#19
- eor w15,w8,w8,ror#11
- add v4.4s,v4.4s,v0.4s
- add w7,w7,w12
- ror w11,w11,#6
- eor w14,w8,w9
- eor w15,w15,w8,ror#20
- add w7,w7,w11
- ldr w12,[sp,#16]
- and w13,w13,w14
- ror w15,w15,#2
- add w3,w3,w7
- eor w13,w13,w9
- st1 {v4.4s},[x17], #16
- ext v4.16b,v1.16b,v2.16b,#4
- add w6,w6,w12
- add w7,w7,w15
- and w12,w4,w3
- bic w15,w5,w3
- ext v7.16b,v3.16b,v0.16b,#4
- eor w11,w3,w3,ror#5
- add w7,w7,w13
- mov d19,v0.d[1]
- orr w12,w12,w15
- eor w11,w11,w3,ror#19
- ushr v6.4s,v4.4s,#7
- eor w15,w7,w7,ror#11
- ushr v5.4s,v4.4s,#3
- add w6,w6,w12
- add v1.4s,v1.4s,v7.4s
- ror w11,w11,#6
- sli v6.4s,v4.4s,#25
- eor w13,w7,w8
- eor w15,w15,w7,ror#20
- ushr v7.4s,v4.4s,#18
- add w6,w6,w11
- ldr w12,[sp,#20]
- and w14,w14,w13
- eor v5.16b,v5.16b,v6.16b
- ror w15,w15,#2
- add w10,w10,w6
- sli v7.4s,v4.4s,#14
- eor w14,w14,w8
- ushr v16.4s,v19.4s,#17
- add w5,w5,w12
- add w6,w6,w15
- and w12,w3,w10
- eor v5.16b,v5.16b,v7.16b
- bic w15,w4,w10
- eor w11,w10,w10,ror#5
- sli v16.4s,v19.4s,#15
- add w6,w6,w14
- orr w12,w12,w15
- ushr v17.4s,v19.4s,#10
- eor w11,w11,w10,ror#19
- eor w15,w6,w6,ror#11
- ushr v7.4s,v19.4s,#19
- add w5,w5,w12
- ror w11,w11,#6
- add v1.4s,v1.4s,v5.4s
- eor w14,w6,w7
- eor w15,w15,w6,ror#20
- sli v7.4s,v19.4s,#13
- add w5,w5,w11
- ldr w12,[sp,#24]
- and w13,w13,w14
- eor v17.16b,v17.16b,v16.16b
- ror w15,w15,#2
- add w9,w9,w5
- eor w13,w13,w7
- eor v17.16b,v17.16b,v7.16b
- add w4,w4,w12
- add w5,w5,w15
- and w12,w10,w9
- add v1.4s,v1.4s,v17.4s
- bic w15,w3,w9
- eor w11,w9,w9,ror#5
- add w5,w5,w13
- ushr v18.4s,v1.4s,#17
- orr w12,w12,w15
- ushr v19.4s,v1.4s,#10
- eor w11,w11,w9,ror#19
- eor w15,w5,w5,ror#11
- sli v18.4s,v1.4s,#15
- add w4,w4,w12
- ushr v17.4s,v1.4s,#19
- ror w11,w11,#6
- eor w13,w5,w6
- eor v19.16b,v19.16b,v18.16b
- eor w15,w15,w5,ror#20
- add w4,w4,w11
- sli v17.4s,v1.4s,#13
- ldr w12,[sp,#28]
- and w14,w14,w13
- ror w15,w15,#2
- ld1 {v4.4s},[x16], #16
- add w8,w8,w4
- eor v19.16b,v19.16b,v17.16b
- eor w14,w14,w6
- eor v17.16b,v17.16b,v17.16b
- add w3,w3,w12
- add w4,w4,w15
- and w12,w9,w8
- mov v17.d[1],v19.d[0]
- bic w15,w10,w8
- eor w11,w8,w8,ror#5
- add w4,w4,w14
- add v1.4s,v1.4s,v17.4s
- orr w12,w12,w15
- eor w11,w11,w8,ror#19
- eor w15,w4,w4,ror#11
- add v4.4s,v4.4s,v1.4s
- add w3,w3,w12
- ror w11,w11,#6
- eor w14,w4,w5
- eor w15,w15,w4,ror#20
- add w3,w3,w11
- ldr w12,[sp,#32]
- and w13,w13,w14
- ror w15,w15,#2
- add w7,w7,w3
- eor w13,w13,w5
- st1 {v4.4s},[x17], #16
- ext v4.16b,v2.16b,v3.16b,#4
- add w10,w10,w12
- add w3,w3,w15
- and w12,w8,w7
- bic w15,w9,w7
- ext v7.16b,v0.16b,v1.16b,#4
- eor w11,w7,w7,ror#5
- add w3,w3,w13
- mov d19,v1.d[1]
- orr w12,w12,w15
- eor w11,w11,w7,ror#19
- ushr v6.4s,v4.4s,#7
- eor w15,w3,w3,ror#11
- ushr v5.4s,v4.4s,#3
- add w10,w10,w12
- add v2.4s,v2.4s,v7.4s
- ror w11,w11,#6
- sli v6.4s,v4.4s,#25
- eor w13,w3,w4
- eor w15,w15,w3,ror#20
- ushr v7.4s,v4.4s,#18
- add w10,w10,w11
- ldr w12,[sp,#36]
- and w14,w14,w13
- eor v5.16b,v5.16b,v6.16b
- ror w15,w15,#2
- add w6,w6,w10
- sli v7.4s,v4.4s,#14
- eor w14,w14,w4
- ushr v16.4s,v19.4s,#17
- add w9,w9,w12
- add w10,w10,w15
- and w12,w7,w6
- eor v5.16b,v5.16b,v7.16b
- bic w15,w8,w6
- eor w11,w6,w6,ror#5
- sli v16.4s,v19.4s,#15
- add w10,w10,w14
- orr w12,w12,w15
- ushr v17.4s,v19.4s,#10
- eor w11,w11,w6,ror#19
- eor w15,w10,w10,ror#11
- ushr v7.4s,v19.4s,#19
- add w9,w9,w12
- ror w11,w11,#6
- add v2.4s,v2.4s,v5.4s
- eor w14,w10,w3
- eor w15,w15,w10,ror#20
- sli v7.4s,v19.4s,#13
- add w9,w9,w11
- ldr w12,[sp,#40]
- and w13,w13,w14
- eor v17.16b,v17.16b,v16.16b
- ror w15,w15,#2
- add w5,w5,w9
- eor w13,w13,w3
- eor v17.16b,v17.16b,v7.16b
- add w8,w8,w12
- add w9,w9,w15
- and w12,w6,w5
- add v2.4s,v2.4s,v17.4s
- bic w15,w7,w5
- eor w11,w5,w5,ror#5
- add w9,w9,w13
- ushr v18.4s,v2.4s,#17
- orr w12,w12,w15
- ushr v19.4s,v2.4s,#10
- eor w11,w11,w5,ror#19
- eor w15,w9,w9,ror#11
- sli v18.4s,v2.4s,#15
- add w8,w8,w12
- ushr v17.4s,v2.4s,#19
- ror w11,w11,#6
- eor w13,w9,w10
- eor v19.16b,v19.16b,v18.16b
- eor w15,w15,w9,ror#20
- add w8,w8,w11
- sli v17.4s,v2.4s,#13
- ldr w12,[sp,#44]
- and w14,w14,w13
- ror w15,w15,#2
- ld1 {v4.4s},[x16], #16
- add w4,w4,w8
- eor v19.16b,v19.16b,v17.16b
- eor w14,w14,w10
- eor v17.16b,v17.16b,v17.16b
- add w7,w7,w12
- add w8,w8,w15
- and w12,w5,w4
- mov v17.d[1],v19.d[0]
- bic w15,w6,w4
- eor w11,w4,w4,ror#5
- add w8,w8,w14
- add v2.4s,v2.4s,v17.4s
- orr w12,w12,w15
- eor w11,w11,w4,ror#19
- eor w15,w8,w8,ror#11
- add v4.4s,v4.4s,v2.4s
- add w7,w7,w12
- ror w11,w11,#6
- eor w14,w8,w9
- eor w15,w15,w8,ror#20
- add w7,w7,w11
- ldr w12,[sp,#48]
- and w13,w13,w14
- ror w15,w15,#2
- add w3,w3,w7
- eor w13,w13,w9
- st1 {v4.4s},[x17], #16
- ext v4.16b,v3.16b,v0.16b,#4
- add w6,w6,w12
- add w7,w7,w15
- and w12,w4,w3
- bic w15,w5,w3
- ext v7.16b,v1.16b,v2.16b,#4
- eor w11,w3,w3,ror#5
- add w7,w7,w13
- mov d19,v2.d[1]
- orr w12,w12,w15
- eor w11,w11,w3,ror#19
- ushr v6.4s,v4.4s,#7
- eor w15,w7,w7,ror#11
- ushr v5.4s,v4.4s,#3
- add w6,w6,w12
- add v3.4s,v3.4s,v7.4s
- ror w11,w11,#6
- sli v6.4s,v4.4s,#25
- eor w13,w7,w8
- eor w15,w15,w7,ror#20
- ushr v7.4s,v4.4s,#18
- add w6,w6,w11
- ldr w12,[sp,#52]
- and w14,w14,w13
- eor v5.16b,v5.16b,v6.16b
- ror w15,w15,#2
- add w10,w10,w6
- sli v7.4s,v4.4s,#14
- eor w14,w14,w8
- ushr v16.4s,v19.4s,#17
- add w5,w5,w12
- add w6,w6,w15
- and w12,w3,w10
- eor v5.16b,v5.16b,v7.16b
- bic w15,w4,w10
- eor w11,w10,w10,ror#5
- sli v16.4s,v19.4s,#15
- add w6,w6,w14
- orr w12,w12,w15
- ushr v17.4s,v19.4s,#10
- eor w11,w11,w10,ror#19
- eor w15,w6,w6,ror#11
- ushr v7.4s,v19.4s,#19
- add w5,w5,w12
- ror w11,w11,#6
- add v3.4s,v3.4s,v5.4s
- eor w14,w6,w7
- eor w15,w15,w6,ror#20
- sli v7.4s,v19.4s,#13
- add w5,w5,w11
- ldr w12,[sp,#56]
- and w13,w13,w14
- eor v17.16b,v17.16b,v16.16b
- ror w15,w15,#2
- add w9,w9,w5
- eor w13,w13,w7
- eor v17.16b,v17.16b,v7.16b
- add w4,w4,w12
- add w5,w5,w15
- and w12,w10,w9
- add v3.4s,v3.4s,v17.4s
- bic w15,w3,w9
- eor w11,w9,w9,ror#5
- add w5,w5,w13
- ushr v18.4s,v3.4s,#17
- orr w12,w12,w15
- ushr v19.4s,v3.4s,#10
- eor w11,w11,w9,ror#19
- eor w15,w5,w5,ror#11
- sli v18.4s,v3.4s,#15
- add w4,w4,w12
- ushr v17.4s,v3.4s,#19
- ror w11,w11,#6
- eor w13,w5,w6
- eor v19.16b,v19.16b,v18.16b
- eor w15,w15,w5,ror#20
- add w4,w4,w11
- sli v17.4s,v3.4s,#13
- ldr w12,[sp,#60]
- and w14,w14,w13
- ror w15,w15,#2
- ld1 {v4.4s},[x16], #16
- add w8,w8,w4
- eor v19.16b,v19.16b,v17.16b
- eor w14,w14,w6
- eor v17.16b,v17.16b,v17.16b
- add w3,w3,w12
- add w4,w4,w15
- and w12,w9,w8
- mov v17.d[1],v19.d[0]
- bic w15,w10,w8
- eor w11,w8,w8,ror#5
- add w4,w4,w14
- add v3.4s,v3.4s,v17.4s
- orr w12,w12,w15
- eor w11,w11,w8,ror#19
- eor w15,w4,w4,ror#11
- add v4.4s,v4.4s,v3.4s
- add w3,w3,w12
- ror w11,w11,#6
- eor w14,w4,w5
- eor w15,w15,w4,ror#20
- add w3,w3,w11
- ldr w12,[x16]
- and w13,w13,w14
- ror w15,w15,#2
- add w7,w7,w3
- eor w13,w13,w5
- st1 {v4.4s},[x17], #16
- cmp w12,#0 // check for K256 terminator
- ldr w12,[sp,#0]
- sub x17,x17,#64
- bne .L_00_48
-
- sub x16,x16,#256 // rewind x16
- cmp x1,x2
- mov x17, #64
- csel x17, x17, xzr, eq
- sub x1,x1,x17 // avoid SEGV
- mov x17,sp
- add w10,w10,w12
- add w3,w3,w15
- and w12,w8,w7
- ld1 {v0.16b},[x1],#16
- bic w15,w9,w7
- eor w11,w7,w7,ror#5
- ld1 {v4.4s},[x16],#16
- add w3,w3,w13
- orr w12,w12,w15
- eor w11,w11,w7,ror#19
- eor w15,w3,w3,ror#11
- rev32 v0.16b,v0.16b
- add w10,w10,w12
- ror w11,w11,#6
- eor w13,w3,w4
- eor w15,w15,w3,ror#20
- add v4.4s,v4.4s,v0.4s
- add w10,w10,w11
- ldr w12,[sp,#4]
- and w14,w14,w13
- ror w15,w15,#2
- add w6,w6,w10
- eor w14,w14,w4
- add w9,w9,w12
- add w10,w10,w15
- and w12,w7,w6
- bic w15,w8,w6
- eor w11,w6,w6,ror#5
- add w10,w10,w14
- orr w12,w12,w15
- eor w11,w11,w6,ror#19
- eor w15,w10,w10,ror#11
- add w9,w9,w12
- ror w11,w11,#6
- eor w14,w10,w3
- eor w15,w15,w10,ror#20
- add w9,w9,w11
- ldr w12,[sp,#8]
- and w13,w13,w14
- ror w15,w15,#2
- add w5,w5,w9
- eor w13,w13,w3
- add w8,w8,w12
- add w9,w9,w15
- and w12,w6,w5
- bic w15,w7,w5
- eor w11,w5,w5,ror#5
- add w9,w9,w13
- orr w12,w12,w15
- eor w11,w11,w5,ror#19
- eor w15,w9,w9,ror#11
- add w8,w8,w12
- ror w11,w11,#6
- eor w13,w9,w10
- eor w15,w15,w9,ror#20
- add w8,w8,w11
- ldr w12,[sp,#12]
- and w14,w14,w13
- ror w15,w15,#2
- add w4,w4,w8
- eor w14,w14,w10
- add w7,w7,w12
- add w8,w8,w15
- and w12,w5,w4
- bic w15,w6,w4
- eor w11,w4,w4,ror#5
- add w8,w8,w14
- orr w12,w12,w15
- eor w11,w11,w4,ror#19
- eor w15,w8,w8,ror#11
- add w7,w7,w12
- ror w11,w11,#6
- eor w14,w8,w9
- eor w15,w15,w8,ror#20
- add w7,w7,w11
- ldr w12,[sp,#16]
- and w13,w13,w14
- ror w15,w15,#2
- add w3,w3,w7
- eor w13,w13,w9
- st1 {v4.4s},[x17], #16
- add w6,w6,w12
- add w7,w7,w15
- and w12,w4,w3
- ld1 {v1.16b},[x1],#16
- bic w15,w5,w3
- eor w11,w3,w3,ror#5
- ld1 {v4.4s},[x16],#16
- add w7,w7,w13
- orr w12,w12,w15
- eor w11,w11,w3,ror#19
- eor w15,w7,w7,ror#11
- rev32 v1.16b,v1.16b
- add w6,w6,w12
- ror w11,w11,#6
- eor w13,w7,w8
- eor w15,w15,w7,ror#20
- add v4.4s,v4.4s,v1.4s
- add w6,w6,w11
- ldr w12,[sp,#20]
- and w14,w14,w13
- ror w15,w15,#2
- add w10,w10,w6
- eor w14,w14,w8
- add w5,w5,w12
- add w6,w6,w15
- and w12,w3,w10
- bic w15,w4,w10
- eor w11,w10,w10,ror#5
- add w6,w6,w14
- orr w12,w12,w15
- eor w11,w11,w10,ror#19
- eor w15,w6,w6,ror#11
- add w5,w5,w12
- ror w11,w11,#6
- eor w14,w6,w7
- eor w15,w15,w6,ror#20
- add w5,w5,w11
- ldr w12,[sp,#24]
- and w13,w13,w14
- ror w15,w15,#2
- add w9,w9,w5
- eor w13,w13,w7
- add w4,w4,w12
- add w5,w5,w15
- and w12,w10,w9
- bic w15,w3,w9
- eor w11,w9,w9,ror#5
- add w5,w5,w13
- orr w12,w12,w15
- eor w11,w11,w9,ror#19
- eor w15,w5,w5,ror#11
- add w4,w4,w12
- ror w11,w11,#6
- eor w13,w5,w6
- eor w15,w15,w5,ror#20
- add w4,w4,w11
- ldr w12,[sp,#28]
- and w14,w14,w13
- ror w15,w15,#2
- add w8,w8,w4
- eor w14,w14,w6
- add w3,w3,w12
- add w4,w4,w15
- and w12,w9,w8
- bic w15,w10,w8
- eor w11,w8,w8,ror#5
- add w4,w4,w14
- orr w12,w12,w15
- eor w11,w11,w8,ror#19
- eor w15,w4,w4,ror#11
- add w3,w3,w12
- ror w11,w11,#6
- eor w14,w4,w5
- eor w15,w15,w4,ror#20
- add w3,w3,w11
- ldr w12,[sp,#32]
- and w13,w13,w14
- ror w15,w15,#2
- add w7,w7,w3
- eor w13,w13,w5
- st1 {v4.4s},[x17], #16
- add w10,w10,w12
- add w3,w3,w15
- and w12,w8,w7
- ld1 {v2.16b},[x1],#16
- bic w15,w9,w7
- eor w11,w7,w7,ror#5
- ld1 {v4.4s},[x16],#16
- add w3,w3,w13
- orr w12,w12,w15
- eor w11,w11,w7,ror#19
- eor w15,w3,w3,ror#11
- rev32 v2.16b,v2.16b
- add w10,w10,w12
- ror w11,w11,#6
- eor w13,w3,w4
- eor w15,w15,w3,ror#20
- add v4.4s,v4.4s,v2.4s
- add w10,w10,w11
- ldr w12,[sp,#36]
- and w14,w14,w13
- ror w15,w15,#2
- add w6,w6,w10
- eor w14,w14,w4
- add w9,w9,w12
- add w10,w10,w15
- and w12,w7,w6
- bic w15,w8,w6
- eor w11,w6,w6,ror#5
- add w10,w10,w14
- orr w12,w12,w15
- eor w11,w11,w6,ror#19
- eor w15,w10,w10,ror#11
- add w9,w9,w12
- ror w11,w11,#6
- eor w14,w10,w3
- eor w15,w15,w10,ror#20
- add w9,w9,w11
- ldr w12,[sp,#40]
- and w13,w13,w14
- ror w15,w15,#2
- add w5,w5,w9
- eor w13,w13,w3
- add w8,w8,w12
- add w9,w9,w15
- and w12,w6,w5
- bic w15,w7,w5
- eor w11,w5,w5,ror#5
- add w9,w9,w13
- orr w12,w12,w15
- eor w11,w11,w5,ror#19
- eor w15,w9,w9,ror#11
- add w8,w8,w12
- ror w11,w11,#6
- eor w13,w9,w10
- eor w15,w15,w9,ror#20
- add w8,w8,w11
- ldr w12,[sp,#44]
- and w14,w14,w13
- ror w15,w15,#2
- add w4,w4,w8
- eor w14,w14,w10
- add w7,w7,w12
- add w8,w8,w15
- and w12,w5,w4
- bic w15,w6,w4
- eor w11,w4,w4,ror#5
- add w8,w8,w14
- orr w12,w12,w15
- eor w11,w11,w4,ror#19
- eor w15,w8,w8,ror#11
- add w7,w7,w12
- ror w11,w11,#6
- eor w14,w8,w9
- eor w15,w15,w8,ror#20
- add w7,w7,w11
- ldr w12,[sp,#48]
- and w13,w13,w14
- ror w15,w15,#2
- add w3,w3,w7
- eor w13,w13,w9
- st1 {v4.4s},[x17], #16
- add w6,w6,w12
- add w7,w7,w15
- and w12,w4,w3
- ld1 {v3.16b},[x1],#16
- bic w15,w5,w3
- eor w11,w3,w3,ror#5
- ld1 {v4.4s},[x16],#16
- add w7,w7,w13
- orr w12,w12,w15
- eor w11,w11,w3,ror#19
- eor w15,w7,w7,ror#11
- rev32 v3.16b,v3.16b
- add w6,w6,w12
- ror w11,w11,#6
- eor w13,w7,w8
- eor w15,w15,w7,ror#20
- add v4.4s,v4.4s,v3.4s
- add w6,w6,w11
- ldr w12,[sp,#52]
- and w14,w14,w13
- ror w15,w15,#2
- add w10,w10,w6
- eor w14,w14,w8
- add w5,w5,w12
- add w6,w6,w15
- and w12,w3,w10
- bic w15,w4,w10
- eor w11,w10,w10,ror#5
- add w6,w6,w14
- orr w12,w12,w15
- eor w11,w11,w10,ror#19
- eor w15,w6,w6,ror#11
- add w5,w5,w12
- ror w11,w11,#6
- eor w14,w6,w7
- eor w15,w15,w6,ror#20
- add w5,w5,w11
- ldr w12,[sp,#56]
- and w13,w13,w14
- ror w15,w15,#2
- add w9,w9,w5
- eor w13,w13,w7
- add w4,w4,w12
- add w5,w5,w15
- and w12,w10,w9
- bic w15,w3,w9
- eor w11,w9,w9,ror#5
- add w5,w5,w13
- orr w12,w12,w15
- eor w11,w11,w9,ror#19
- eor w15,w5,w5,ror#11
- add w4,w4,w12
- ror w11,w11,#6
- eor w13,w5,w6
- eor w15,w15,w5,ror#20
- add w4,w4,w11
- ldr w12,[sp,#60]
- and w14,w14,w13
- ror w15,w15,#2
- add w8,w8,w4
- eor w14,w14,w6
- add w3,w3,w12
- add w4,w4,w15
- and w12,w9,w8
- bic w15,w10,w8
- eor w11,w8,w8,ror#5
- add w4,w4,w14
- orr w12,w12,w15
- eor w11,w11,w8,ror#19
- eor w15,w4,w4,ror#11
- add w3,w3,w12
- ror w11,w11,#6
- eor w14,w4,w5
- eor w15,w15,w4,ror#20
- add w3,w3,w11
- and w13,w13,w14
- ror w15,w15,#2
- add w7,w7,w3
- eor w13,w13,w5
- st1 {v4.4s},[x17], #16
- add w3,w3,w15 // h+=Sigma0(a) from the past
- ldp w11,w12,[x0,#0]
- add w3,w3,w13 // h+=Maj(a,b,c) from the past
- ldp w13,w14,[x0,#8]
- add w3,w3,w11 // accumulate
- add w4,w4,w12
- ldp w11,w12,[x0,#16]
- add w5,w5,w13
- add w6,w6,w14
- ldp w13,w14,[x0,#24]
- add w7,w7,w11
- add w8,w8,w12
- ldr w12,[sp,#0]
- stp w3,w4,[x0,#0]
- add w9,w9,w13
- mov w13,wzr
- stp w5,w6,[x0,#8]
- add w10,w10,w14
- stp w7,w8,[x0,#16]
- eor w14,w4,w5
- stp w9,w10,[x0,#24]
- mov w15,wzr
- mov x17,sp
- b.ne .L_00_48
-
- ldr x29,[x29]
- add sp,sp,#16*4+16
- ret
-.size sha256_block_neon,.-sha256_block_neon
-#ifndef __KERNEL__
-.comm OPENSSL_armcap_P,4,4
-#endif
diff --git a/arch/arm64/crypto/sha512-core.S_shipped b/arch/arm64/crypto/sha512-core.S_shipped
deleted file mode 100644
index e063a6106720..000000000000
--- a/arch/arm64/crypto/sha512-core.S_shipped
+++ /dev/null
@@ -1,1093 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-// This code is taken from the OpenSSL project but the author (Andy Polyakov)
-// has relicensed it under the GPLv2. Therefore this program is free software;
-// you can redistribute it and/or modify it under the terms of the GNU General
-// Public License version 2 as published by the Free Software Foundation.
-//
-// The original headers, including the original license headers, are
-// included below for completeness.
-
-// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
-//
-// Licensed under the OpenSSL license (the "License"). You may not use
-// this file except in compliance with the License. You can obtain a copy
-// in the file LICENSE in the source distribution or at
-// https://www.openssl.org/source/license.html
-
-// ====================================================================
-// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-// project. The module is, however, dual licensed under OpenSSL and
-// CRYPTOGAMS licenses depending on where you obtain it. For further
-// details see http://www.openssl.org/~appro/cryptogams/.
-// ====================================================================
-//
-// SHA256/512 for ARMv8.
-//
-// Performance in cycles per processed byte and improvement coefficient
-// over code generated with "default" compiler:
-//
-// SHA256-hw SHA256(*) SHA512
-// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**))
-// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***))
-// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***))
-// Denver 2.01 10.5 (+26%) 6.70 (+8%)
-// X-Gene 20.0 (+100%) 12.8 (+300%(***))
-// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
-//
-// (*) Software SHA256 results are of lesser relevance, presented
-// mostly for informational purposes.
-// (**) The result is a trade-off: it's possible to improve it by
-// 10% (or by 1 cycle per round), but at the cost of 20% loss
-// on Cortex-A53 (or by 4 cycles per round).
-// (***) Super-impressive coefficients over gcc-generated code are
-// indication of some compiler "pathology", most notably code
-// generated with -mgeneral-regs-only is significanty faster
-// and the gap is only 40-90%.
-//
-// October 2016.
-//
-// Originally it was reckoned that it makes no sense to implement NEON
-// version of SHA256 for 64-bit processors. This is because performance
-// improvement on most wide-spread Cortex-A5x processors was observed
-// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
-// observed that 32-bit NEON SHA256 performs significantly better than
-// 64-bit scalar version on *some* of the more recent processors. As
-// result 64-bit NEON version of SHA256 was added to provide best
-// all-round performance. For example it executes ~30% faster on X-Gene
-// and Mongoose. [For reference, NEON version of SHA512 is bound to
-// deliver much less improvement, likely *negative* on Cortex-A5x.
-// Which is why NEON support is limited to SHA256.]
-
-#ifndef __KERNEL__
-# include "arm_arch.h"
-#endif
-
-.text
-
-.extern OPENSSL_armcap_P
-.globl sha512_block_data_order
-.type sha512_block_data_order,%function
-.align 6
-sha512_block_data_order:
- stp x29,x30,[sp,#-128]!
- add x29,sp,#0
-
- stp x19,x20,[sp,#16]
- stp x21,x22,[sp,#32]
- stp x23,x24,[sp,#48]
- stp x25,x26,[sp,#64]
- stp x27,x28,[sp,#80]
- sub sp,sp,#4*8
-
- ldp x20,x21,[x0] // load context
- ldp x22,x23,[x0,#2*8]
- ldp x24,x25,[x0,#4*8]
- add x2,x1,x2,lsl#7 // end of input
- ldp x26,x27,[x0,#6*8]
- adr x30,.LK512
- stp x0,x2,[x29,#96]
-
-.Loop:
- ldp x3,x4,[x1],#2*8
- ldr x19,[x30],#8 // *K++
- eor x28,x21,x22 // magic seed
- str x1,[x29,#112]
-#ifndef __AARCH64EB__
- rev x3,x3 // 0
-#endif
- ror x16,x24,#14
- add x27,x27,x19 // h+=K[i]
- eor x6,x24,x24,ror#23
- and x17,x25,x24
- bic x19,x26,x24
- add x27,x27,x3 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x20,x21 // a^b, b^c in next round
- eor x16,x16,x6,ror#18 // Sigma1(e)
- ror x6,x20,#28
- add x27,x27,x17 // h+=Ch(e,f,g)
- eor x17,x20,x20,ror#5
- add x27,x27,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x23,x23,x27 // d+=h
- eor x28,x28,x21 // Maj(a,b,c)
- eor x17,x6,x17,ror#34 // Sigma0(a)
- add x27,x27,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x27,x27,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x4,x4 // 1
-#endif
- ldp x5,x6,[x1],#2*8
- add x27,x27,x17 // h+=Sigma0(a)
- ror x16,x23,#14
- add x26,x26,x28 // h+=K[i]
- eor x7,x23,x23,ror#23
- and x17,x24,x23
- bic x28,x25,x23
- add x26,x26,x4 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x27,x20 // a^b, b^c in next round
- eor x16,x16,x7,ror#18 // Sigma1(e)
- ror x7,x27,#28
- add x26,x26,x17 // h+=Ch(e,f,g)
- eor x17,x27,x27,ror#5
- add x26,x26,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x22,x22,x26 // d+=h
- eor x19,x19,x20 // Maj(a,b,c)
- eor x17,x7,x17,ror#34 // Sigma0(a)
- add x26,x26,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x26,x26,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x5,x5 // 2
-#endif
- add x26,x26,x17 // h+=Sigma0(a)
- ror x16,x22,#14
- add x25,x25,x19 // h+=K[i]
- eor x8,x22,x22,ror#23
- and x17,x23,x22
- bic x19,x24,x22
- add x25,x25,x5 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x26,x27 // a^b, b^c in next round
- eor x16,x16,x8,ror#18 // Sigma1(e)
- ror x8,x26,#28
- add x25,x25,x17 // h+=Ch(e,f,g)
- eor x17,x26,x26,ror#5
- add x25,x25,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x21,x21,x25 // d+=h
- eor x28,x28,x27 // Maj(a,b,c)
- eor x17,x8,x17,ror#34 // Sigma0(a)
- add x25,x25,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x25,x25,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x6,x6 // 3
-#endif
- ldp x7,x8,[x1],#2*8
- add x25,x25,x17 // h+=Sigma0(a)
- ror x16,x21,#14
- add x24,x24,x28 // h+=K[i]
- eor x9,x21,x21,ror#23
- and x17,x22,x21
- bic x28,x23,x21
- add x24,x24,x6 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x25,x26 // a^b, b^c in next round
- eor x16,x16,x9,ror#18 // Sigma1(e)
- ror x9,x25,#28
- add x24,x24,x17 // h+=Ch(e,f,g)
- eor x17,x25,x25,ror#5
- add x24,x24,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x20,x20,x24 // d+=h
- eor x19,x19,x26 // Maj(a,b,c)
- eor x17,x9,x17,ror#34 // Sigma0(a)
- add x24,x24,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x24,x24,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x7,x7 // 4
-#endif
- add x24,x24,x17 // h+=Sigma0(a)
- ror x16,x20,#14
- add x23,x23,x19 // h+=K[i]
- eor x10,x20,x20,ror#23
- and x17,x21,x20
- bic x19,x22,x20
- add x23,x23,x7 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x24,x25 // a^b, b^c in next round
- eor x16,x16,x10,ror#18 // Sigma1(e)
- ror x10,x24,#28
- add x23,x23,x17 // h+=Ch(e,f,g)
- eor x17,x24,x24,ror#5
- add x23,x23,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x27,x27,x23 // d+=h
- eor x28,x28,x25 // Maj(a,b,c)
- eor x17,x10,x17,ror#34 // Sigma0(a)
- add x23,x23,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x23,x23,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x8,x8 // 5
-#endif
- ldp x9,x10,[x1],#2*8
- add x23,x23,x17 // h+=Sigma0(a)
- ror x16,x27,#14
- add x22,x22,x28 // h+=K[i]
- eor x11,x27,x27,ror#23
- and x17,x20,x27
- bic x28,x21,x27
- add x22,x22,x8 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x23,x24 // a^b, b^c in next round
- eor x16,x16,x11,ror#18 // Sigma1(e)
- ror x11,x23,#28
- add x22,x22,x17 // h+=Ch(e,f,g)
- eor x17,x23,x23,ror#5
- add x22,x22,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x26,x26,x22 // d+=h
- eor x19,x19,x24 // Maj(a,b,c)
- eor x17,x11,x17,ror#34 // Sigma0(a)
- add x22,x22,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x22,x22,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x9,x9 // 6
-#endif
- add x22,x22,x17 // h+=Sigma0(a)
- ror x16,x26,#14
- add x21,x21,x19 // h+=K[i]
- eor x12,x26,x26,ror#23
- and x17,x27,x26
- bic x19,x20,x26
- add x21,x21,x9 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x22,x23 // a^b, b^c in next round
- eor x16,x16,x12,ror#18 // Sigma1(e)
- ror x12,x22,#28
- add x21,x21,x17 // h+=Ch(e,f,g)
- eor x17,x22,x22,ror#5
- add x21,x21,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x25,x25,x21 // d+=h
- eor x28,x28,x23 // Maj(a,b,c)
- eor x17,x12,x17,ror#34 // Sigma0(a)
- add x21,x21,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x21,x21,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x10,x10 // 7
-#endif
- ldp x11,x12,[x1],#2*8
- add x21,x21,x17 // h+=Sigma0(a)
- ror x16,x25,#14
- add x20,x20,x28 // h+=K[i]
- eor x13,x25,x25,ror#23
- and x17,x26,x25
- bic x28,x27,x25
- add x20,x20,x10 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x21,x22 // a^b, b^c in next round
- eor x16,x16,x13,ror#18 // Sigma1(e)
- ror x13,x21,#28
- add x20,x20,x17 // h+=Ch(e,f,g)
- eor x17,x21,x21,ror#5
- add x20,x20,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x24,x24,x20 // d+=h
- eor x19,x19,x22 // Maj(a,b,c)
- eor x17,x13,x17,ror#34 // Sigma0(a)
- add x20,x20,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x20,x20,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x11,x11 // 8
-#endif
- add x20,x20,x17 // h+=Sigma0(a)
- ror x16,x24,#14
- add x27,x27,x19 // h+=K[i]
- eor x14,x24,x24,ror#23
- and x17,x25,x24
- bic x19,x26,x24
- add x27,x27,x11 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x20,x21 // a^b, b^c in next round
- eor x16,x16,x14,ror#18 // Sigma1(e)
- ror x14,x20,#28
- add x27,x27,x17 // h+=Ch(e,f,g)
- eor x17,x20,x20,ror#5
- add x27,x27,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x23,x23,x27 // d+=h
- eor x28,x28,x21 // Maj(a,b,c)
- eor x17,x14,x17,ror#34 // Sigma0(a)
- add x27,x27,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x27,x27,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x12,x12 // 9
-#endif
- ldp x13,x14,[x1],#2*8
- add x27,x27,x17 // h+=Sigma0(a)
- ror x16,x23,#14
- add x26,x26,x28 // h+=K[i]
- eor x15,x23,x23,ror#23
- and x17,x24,x23
- bic x28,x25,x23
- add x26,x26,x12 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x27,x20 // a^b, b^c in next round
- eor x16,x16,x15,ror#18 // Sigma1(e)
- ror x15,x27,#28
- add x26,x26,x17 // h+=Ch(e,f,g)
- eor x17,x27,x27,ror#5
- add x26,x26,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x22,x22,x26 // d+=h
- eor x19,x19,x20 // Maj(a,b,c)
- eor x17,x15,x17,ror#34 // Sigma0(a)
- add x26,x26,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x26,x26,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x13,x13 // 10
-#endif
- add x26,x26,x17 // h+=Sigma0(a)
- ror x16,x22,#14
- add x25,x25,x19 // h+=K[i]
- eor x0,x22,x22,ror#23
- and x17,x23,x22
- bic x19,x24,x22
- add x25,x25,x13 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x26,x27 // a^b, b^c in next round
- eor x16,x16,x0,ror#18 // Sigma1(e)
- ror x0,x26,#28
- add x25,x25,x17 // h+=Ch(e,f,g)
- eor x17,x26,x26,ror#5
- add x25,x25,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x21,x21,x25 // d+=h
- eor x28,x28,x27 // Maj(a,b,c)
- eor x17,x0,x17,ror#34 // Sigma0(a)
- add x25,x25,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x25,x25,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x14,x14 // 11
-#endif
- ldp x15,x0,[x1],#2*8
- add x25,x25,x17 // h+=Sigma0(a)
- str x6,[sp,#24]
- ror x16,x21,#14
- add x24,x24,x28 // h+=K[i]
- eor x6,x21,x21,ror#23
- and x17,x22,x21
- bic x28,x23,x21
- add x24,x24,x14 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x25,x26 // a^b, b^c in next round
- eor x16,x16,x6,ror#18 // Sigma1(e)
- ror x6,x25,#28
- add x24,x24,x17 // h+=Ch(e,f,g)
- eor x17,x25,x25,ror#5
- add x24,x24,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x20,x20,x24 // d+=h
- eor x19,x19,x26 // Maj(a,b,c)
- eor x17,x6,x17,ror#34 // Sigma0(a)
- add x24,x24,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x24,x24,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x15,x15 // 12
-#endif
- add x24,x24,x17 // h+=Sigma0(a)
- str x7,[sp,#0]
- ror x16,x20,#14
- add x23,x23,x19 // h+=K[i]
- eor x7,x20,x20,ror#23
- and x17,x21,x20
- bic x19,x22,x20
- add x23,x23,x15 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x24,x25 // a^b, b^c in next round
- eor x16,x16,x7,ror#18 // Sigma1(e)
- ror x7,x24,#28
- add x23,x23,x17 // h+=Ch(e,f,g)
- eor x17,x24,x24,ror#5
- add x23,x23,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x27,x27,x23 // d+=h
- eor x28,x28,x25 // Maj(a,b,c)
- eor x17,x7,x17,ror#34 // Sigma0(a)
- add x23,x23,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x23,x23,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x0,x0 // 13
-#endif
- ldp x1,x2,[x1]
- add x23,x23,x17 // h+=Sigma0(a)
- str x8,[sp,#8]
- ror x16,x27,#14
- add x22,x22,x28 // h+=K[i]
- eor x8,x27,x27,ror#23
- and x17,x20,x27
- bic x28,x21,x27
- add x22,x22,x0 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x23,x24 // a^b, b^c in next round
- eor x16,x16,x8,ror#18 // Sigma1(e)
- ror x8,x23,#28
- add x22,x22,x17 // h+=Ch(e,f,g)
- eor x17,x23,x23,ror#5
- add x22,x22,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x26,x26,x22 // d+=h
- eor x19,x19,x24 // Maj(a,b,c)
- eor x17,x8,x17,ror#34 // Sigma0(a)
- add x22,x22,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x22,x22,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x1,x1 // 14
-#endif
- ldr x6,[sp,#24]
- add x22,x22,x17 // h+=Sigma0(a)
- str x9,[sp,#16]
- ror x16,x26,#14
- add x21,x21,x19 // h+=K[i]
- eor x9,x26,x26,ror#23
- and x17,x27,x26
- bic x19,x20,x26
- add x21,x21,x1 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x22,x23 // a^b, b^c in next round
- eor x16,x16,x9,ror#18 // Sigma1(e)
- ror x9,x22,#28
- add x21,x21,x17 // h+=Ch(e,f,g)
- eor x17,x22,x22,ror#5
- add x21,x21,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x25,x25,x21 // d+=h
- eor x28,x28,x23 // Maj(a,b,c)
- eor x17,x9,x17,ror#34 // Sigma0(a)
- add x21,x21,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x21,x21,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x2,x2 // 15
-#endif
- ldr x7,[sp,#0]
- add x21,x21,x17 // h+=Sigma0(a)
- str x10,[sp,#24]
- ror x16,x25,#14
- add x20,x20,x28 // h+=K[i]
- ror x9,x4,#1
- and x17,x26,x25
- ror x8,x1,#19
- bic x28,x27,x25
- ror x10,x21,#28
- add x20,x20,x2 // h+=X[i]
- eor x16,x16,x25,ror#18
- eor x9,x9,x4,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x21,x22 // a^b, b^c in next round
- eor x16,x16,x25,ror#41 // Sigma1(e)
- eor x10,x10,x21,ror#34
- add x20,x20,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x8,x8,x1,ror#61
- eor x9,x9,x4,lsr#7 // sigma0(X[i+1])
- add x20,x20,x16 // h+=Sigma1(e)
- eor x19,x19,x22 // Maj(a,b,c)
- eor x17,x10,x21,ror#39 // Sigma0(a)
- eor x8,x8,x1,lsr#6 // sigma1(X[i+14])
- add x3,x3,x12
- add x24,x24,x20 // d+=h
- add x20,x20,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x3,x3,x9
- add x20,x20,x17 // h+=Sigma0(a)
- add x3,x3,x8
-.Loop_16_xx:
- ldr x8,[sp,#8]
- str x11,[sp,#0]
- ror x16,x24,#14
- add x27,x27,x19 // h+=K[i]
- ror x10,x5,#1
- and x17,x25,x24
- ror x9,x2,#19
- bic x19,x26,x24
- ror x11,x20,#28
- add x27,x27,x3 // h+=X[i]
- eor x16,x16,x24,ror#18
- eor x10,x10,x5,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x20,x21 // a^b, b^c in next round
- eor x16,x16,x24,ror#41 // Sigma1(e)
- eor x11,x11,x20,ror#34
- add x27,x27,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x9,x9,x2,ror#61
- eor x10,x10,x5,lsr#7 // sigma0(X[i+1])
- add x27,x27,x16 // h+=Sigma1(e)
- eor x28,x28,x21 // Maj(a,b,c)
- eor x17,x11,x20,ror#39 // Sigma0(a)
- eor x9,x9,x2,lsr#6 // sigma1(X[i+14])
- add x4,x4,x13
- add x23,x23,x27 // d+=h
- add x27,x27,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x4,x4,x10
- add x27,x27,x17 // h+=Sigma0(a)
- add x4,x4,x9
- ldr x9,[sp,#16]
- str x12,[sp,#8]
- ror x16,x23,#14
- add x26,x26,x28 // h+=K[i]
- ror x11,x6,#1
- and x17,x24,x23
- ror x10,x3,#19
- bic x28,x25,x23
- ror x12,x27,#28
- add x26,x26,x4 // h+=X[i]
- eor x16,x16,x23,ror#18
- eor x11,x11,x6,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x27,x20 // a^b, b^c in next round
- eor x16,x16,x23,ror#41 // Sigma1(e)
- eor x12,x12,x27,ror#34
- add x26,x26,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x10,x10,x3,ror#61
- eor x11,x11,x6,lsr#7 // sigma0(X[i+1])
- add x26,x26,x16 // h+=Sigma1(e)
- eor x19,x19,x20 // Maj(a,b,c)
- eor x17,x12,x27,ror#39 // Sigma0(a)
- eor x10,x10,x3,lsr#6 // sigma1(X[i+14])
- add x5,x5,x14
- add x22,x22,x26 // d+=h
- add x26,x26,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x5,x5,x11
- add x26,x26,x17 // h+=Sigma0(a)
- add x5,x5,x10
- ldr x10,[sp,#24]
- str x13,[sp,#16]
- ror x16,x22,#14
- add x25,x25,x19 // h+=K[i]
- ror x12,x7,#1
- and x17,x23,x22
- ror x11,x4,#19
- bic x19,x24,x22
- ror x13,x26,#28
- add x25,x25,x5 // h+=X[i]
- eor x16,x16,x22,ror#18
- eor x12,x12,x7,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x26,x27 // a^b, b^c in next round
- eor x16,x16,x22,ror#41 // Sigma1(e)
- eor x13,x13,x26,ror#34
- add x25,x25,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x11,x11,x4,ror#61
- eor x12,x12,x7,lsr#7 // sigma0(X[i+1])
- add x25,x25,x16 // h+=Sigma1(e)
- eor x28,x28,x27 // Maj(a,b,c)
- eor x17,x13,x26,ror#39 // Sigma0(a)
- eor x11,x11,x4,lsr#6 // sigma1(X[i+14])
- add x6,x6,x15
- add x21,x21,x25 // d+=h
- add x25,x25,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x6,x6,x12
- add x25,x25,x17 // h+=Sigma0(a)
- add x6,x6,x11
- ldr x11,[sp,#0]
- str x14,[sp,#24]
- ror x16,x21,#14
- add x24,x24,x28 // h+=K[i]
- ror x13,x8,#1
- and x17,x22,x21
- ror x12,x5,#19
- bic x28,x23,x21
- ror x14,x25,#28
- add x24,x24,x6 // h+=X[i]
- eor x16,x16,x21,ror#18
- eor x13,x13,x8,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x25,x26 // a^b, b^c in next round
- eor x16,x16,x21,ror#41 // Sigma1(e)
- eor x14,x14,x25,ror#34
- add x24,x24,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x12,x12,x5,ror#61
- eor x13,x13,x8,lsr#7 // sigma0(X[i+1])
- add x24,x24,x16 // h+=Sigma1(e)
- eor x19,x19,x26 // Maj(a,b,c)
- eor x17,x14,x25,ror#39 // Sigma0(a)
- eor x12,x12,x5,lsr#6 // sigma1(X[i+14])
- add x7,x7,x0
- add x20,x20,x24 // d+=h
- add x24,x24,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x7,x7,x13
- add x24,x24,x17 // h+=Sigma0(a)
- add x7,x7,x12
- ldr x12,[sp,#8]
- str x15,[sp,#0]
- ror x16,x20,#14
- add x23,x23,x19 // h+=K[i]
- ror x14,x9,#1
- and x17,x21,x20
- ror x13,x6,#19
- bic x19,x22,x20
- ror x15,x24,#28
- add x23,x23,x7 // h+=X[i]
- eor x16,x16,x20,ror#18
- eor x14,x14,x9,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x24,x25 // a^b, b^c in next round
- eor x16,x16,x20,ror#41 // Sigma1(e)
- eor x15,x15,x24,ror#34
- add x23,x23,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x13,x13,x6,ror#61
- eor x14,x14,x9,lsr#7 // sigma0(X[i+1])
- add x23,x23,x16 // h+=Sigma1(e)
- eor x28,x28,x25 // Maj(a,b,c)
- eor x17,x15,x24,ror#39 // Sigma0(a)
- eor x13,x13,x6,lsr#6 // sigma1(X[i+14])
- add x8,x8,x1
- add x27,x27,x23 // d+=h
- add x23,x23,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x8,x8,x14
- add x23,x23,x17 // h+=Sigma0(a)
- add x8,x8,x13
- ldr x13,[sp,#16]
- str x0,[sp,#8]
- ror x16,x27,#14
- add x22,x22,x28 // h+=K[i]
- ror x15,x10,#1
- and x17,x20,x27
- ror x14,x7,#19
- bic x28,x21,x27
- ror x0,x23,#28
- add x22,x22,x8 // h+=X[i]
- eor x16,x16,x27,ror#18
- eor x15,x15,x10,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x23,x24 // a^b, b^c in next round
- eor x16,x16,x27,ror#41 // Sigma1(e)
- eor x0,x0,x23,ror#34
- add x22,x22,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x14,x14,x7,ror#61
- eor x15,x15,x10,lsr#7 // sigma0(X[i+1])
- add x22,x22,x16 // h+=Sigma1(e)
- eor x19,x19,x24 // Maj(a,b,c)
- eor x17,x0,x23,ror#39 // Sigma0(a)
- eor x14,x14,x7,lsr#6 // sigma1(X[i+14])
- add x9,x9,x2
- add x26,x26,x22 // d+=h
- add x22,x22,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x9,x9,x15
- add x22,x22,x17 // h+=Sigma0(a)
- add x9,x9,x14
- ldr x14,[sp,#24]
- str x1,[sp,#16]
- ror x16,x26,#14
- add x21,x21,x19 // h+=K[i]
- ror x0,x11,#1
- and x17,x27,x26
- ror x15,x8,#19
- bic x19,x20,x26
- ror x1,x22,#28
- add x21,x21,x9 // h+=X[i]
- eor x16,x16,x26,ror#18
- eor x0,x0,x11,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x22,x23 // a^b, b^c in next round
- eor x16,x16,x26,ror#41 // Sigma1(e)
- eor x1,x1,x22,ror#34
- add x21,x21,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x15,x15,x8,ror#61
- eor x0,x0,x11,lsr#7 // sigma0(X[i+1])
- add x21,x21,x16 // h+=Sigma1(e)
- eor x28,x28,x23 // Maj(a,b,c)
- eor x17,x1,x22,ror#39 // Sigma0(a)
- eor x15,x15,x8,lsr#6 // sigma1(X[i+14])
- add x10,x10,x3
- add x25,x25,x21 // d+=h
- add x21,x21,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x10,x10,x0
- add x21,x21,x17 // h+=Sigma0(a)
- add x10,x10,x15
- ldr x15,[sp,#0]
- str x2,[sp,#24]
- ror x16,x25,#14
- add x20,x20,x28 // h+=K[i]
- ror x1,x12,#1
- and x17,x26,x25
- ror x0,x9,#19
- bic x28,x27,x25
- ror x2,x21,#28
- add x20,x20,x10 // h+=X[i]
- eor x16,x16,x25,ror#18
- eor x1,x1,x12,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x21,x22 // a^b, b^c in next round
- eor x16,x16,x25,ror#41 // Sigma1(e)
- eor x2,x2,x21,ror#34
- add x20,x20,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x0,x0,x9,ror#61
- eor x1,x1,x12,lsr#7 // sigma0(X[i+1])
- add x20,x20,x16 // h+=Sigma1(e)
- eor x19,x19,x22 // Maj(a,b,c)
- eor x17,x2,x21,ror#39 // Sigma0(a)
- eor x0,x0,x9,lsr#6 // sigma1(X[i+14])
- add x11,x11,x4
- add x24,x24,x20 // d+=h
- add x20,x20,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x11,x11,x1
- add x20,x20,x17 // h+=Sigma0(a)
- add x11,x11,x0
- ldr x0,[sp,#8]
- str x3,[sp,#0]
- ror x16,x24,#14
- add x27,x27,x19 // h+=K[i]
- ror x2,x13,#1
- and x17,x25,x24
- ror x1,x10,#19
- bic x19,x26,x24
- ror x3,x20,#28
- add x27,x27,x11 // h+=X[i]
- eor x16,x16,x24,ror#18
- eor x2,x2,x13,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x20,x21 // a^b, b^c in next round
- eor x16,x16,x24,ror#41 // Sigma1(e)
- eor x3,x3,x20,ror#34
- add x27,x27,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x1,x1,x10,ror#61
- eor x2,x2,x13,lsr#7 // sigma0(X[i+1])
- add x27,x27,x16 // h+=Sigma1(e)
- eor x28,x28,x21 // Maj(a,b,c)
- eor x17,x3,x20,ror#39 // Sigma0(a)
- eor x1,x1,x10,lsr#6 // sigma1(X[i+14])
- add x12,x12,x5
- add x23,x23,x27 // d+=h
- add x27,x27,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x12,x12,x2
- add x27,x27,x17 // h+=Sigma0(a)
- add x12,x12,x1
- ldr x1,[sp,#16]
- str x4,[sp,#8]
- ror x16,x23,#14
- add x26,x26,x28 // h+=K[i]
- ror x3,x14,#1
- and x17,x24,x23
- ror x2,x11,#19
- bic x28,x25,x23
- ror x4,x27,#28
- add x26,x26,x12 // h+=X[i]
- eor x16,x16,x23,ror#18
- eor x3,x3,x14,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x27,x20 // a^b, b^c in next round
- eor x16,x16,x23,ror#41 // Sigma1(e)
- eor x4,x4,x27,ror#34
- add x26,x26,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x2,x2,x11,ror#61
- eor x3,x3,x14,lsr#7 // sigma0(X[i+1])
- add x26,x26,x16 // h+=Sigma1(e)
- eor x19,x19,x20 // Maj(a,b,c)
- eor x17,x4,x27,ror#39 // Sigma0(a)
- eor x2,x2,x11,lsr#6 // sigma1(X[i+14])
- add x13,x13,x6
- add x22,x22,x26 // d+=h
- add x26,x26,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x13,x13,x3
- add x26,x26,x17 // h+=Sigma0(a)
- add x13,x13,x2
- ldr x2,[sp,#24]
- str x5,[sp,#16]
- ror x16,x22,#14
- add x25,x25,x19 // h+=K[i]
- ror x4,x15,#1
- and x17,x23,x22
- ror x3,x12,#19
- bic x19,x24,x22
- ror x5,x26,#28
- add x25,x25,x13 // h+=X[i]
- eor x16,x16,x22,ror#18
- eor x4,x4,x15,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x26,x27 // a^b, b^c in next round
- eor x16,x16,x22,ror#41 // Sigma1(e)
- eor x5,x5,x26,ror#34
- add x25,x25,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x3,x3,x12,ror#61
- eor x4,x4,x15,lsr#7 // sigma0(X[i+1])
- add x25,x25,x16 // h+=Sigma1(e)
- eor x28,x28,x27 // Maj(a,b,c)
- eor x17,x5,x26,ror#39 // Sigma0(a)
- eor x3,x3,x12,lsr#6 // sigma1(X[i+14])
- add x14,x14,x7
- add x21,x21,x25 // d+=h
- add x25,x25,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x14,x14,x4
- add x25,x25,x17 // h+=Sigma0(a)
- add x14,x14,x3
- ldr x3,[sp,#0]
- str x6,[sp,#24]
- ror x16,x21,#14
- add x24,x24,x28 // h+=K[i]
- ror x5,x0,#1
- and x17,x22,x21
- ror x4,x13,#19
- bic x28,x23,x21
- ror x6,x25,#28
- add x24,x24,x14 // h+=X[i]
- eor x16,x16,x21,ror#18
- eor x5,x5,x0,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x25,x26 // a^b, b^c in next round
- eor x16,x16,x21,ror#41 // Sigma1(e)
- eor x6,x6,x25,ror#34
- add x24,x24,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x4,x4,x13,ror#61
- eor x5,x5,x0,lsr#7 // sigma0(X[i+1])
- add x24,x24,x16 // h+=Sigma1(e)
- eor x19,x19,x26 // Maj(a,b,c)
- eor x17,x6,x25,ror#39 // Sigma0(a)
- eor x4,x4,x13,lsr#6 // sigma1(X[i+14])
- add x15,x15,x8
- add x20,x20,x24 // d+=h
- add x24,x24,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x15,x15,x5
- add x24,x24,x17 // h+=Sigma0(a)
- add x15,x15,x4
- ldr x4,[sp,#8]
- str x7,[sp,#0]
- ror x16,x20,#14
- add x23,x23,x19 // h+=K[i]
- ror x6,x1,#1
- and x17,x21,x20
- ror x5,x14,#19
- bic x19,x22,x20
- ror x7,x24,#28
- add x23,x23,x15 // h+=X[i]
- eor x16,x16,x20,ror#18
- eor x6,x6,x1,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x24,x25 // a^b, b^c in next round
- eor x16,x16,x20,ror#41 // Sigma1(e)
- eor x7,x7,x24,ror#34
- add x23,x23,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x5,x5,x14,ror#61
- eor x6,x6,x1,lsr#7 // sigma0(X[i+1])
- add x23,x23,x16 // h+=Sigma1(e)
- eor x28,x28,x25 // Maj(a,b,c)
- eor x17,x7,x24,ror#39 // Sigma0(a)
- eor x5,x5,x14,lsr#6 // sigma1(X[i+14])
- add x0,x0,x9
- add x27,x27,x23 // d+=h
- add x23,x23,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x0,x0,x6
- add x23,x23,x17 // h+=Sigma0(a)
- add x0,x0,x5
- ldr x5,[sp,#16]
- str x8,[sp,#8]
- ror x16,x27,#14
- add x22,x22,x28 // h+=K[i]
- ror x7,x2,#1
- and x17,x20,x27
- ror x6,x15,#19
- bic x28,x21,x27
- ror x8,x23,#28
- add x22,x22,x0 // h+=X[i]
- eor x16,x16,x27,ror#18
- eor x7,x7,x2,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x23,x24 // a^b, b^c in next round
- eor x16,x16,x27,ror#41 // Sigma1(e)
- eor x8,x8,x23,ror#34
- add x22,x22,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x6,x6,x15,ror#61
- eor x7,x7,x2,lsr#7 // sigma0(X[i+1])
- add x22,x22,x16 // h+=Sigma1(e)
- eor x19,x19,x24 // Maj(a,b,c)
- eor x17,x8,x23,ror#39 // Sigma0(a)
- eor x6,x6,x15,lsr#6 // sigma1(X[i+14])
- add x1,x1,x10
- add x26,x26,x22 // d+=h
- add x22,x22,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x1,x1,x7
- add x22,x22,x17 // h+=Sigma0(a)
- add x1,x1,x6
- ldr x6,[sp,#24]
- str x9,[sp,#16]
- ror x16,x26,#14
- add x21,x21,x19 // h+=K[i]
- ror x8,x3,#1
- and x17,x27,x26
- ror x7,x0,#19
- bic x19,x20,x26
- ror x9,x22,#28
- add x21,x21,x1 // h+=X[i]
- eor x16,x16,x26,ror#18
- eor x8,x8,x3,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x22,x23 // a^b, b^c in next round
- eor x16,x16,x26,ror#41 // Sigma1(e)
- eor x9,x9,x22,ror#34
- add x21,x21,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x7,x7,x0,ror#61
- eor x8,x8,x3,lsr#7 // sigma0(X[i+1])
- add x21,x21,x16 // h+=Sigma1(e)
- eor x28,x28,x23 // Maj(a,b,c)
- eor x17,x9,x22,ror#39 // Sigma0(a)
- eor x7,x7,x0,lsr#6 // sigma1(X[i+14])
- add x2,x2,x11
- add x25,x25,x21 // d+=h
- add x21,x21,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x2,x2,x8
- add x21,x21,x17 // h+=Sigma0(a)
- add x2,x2,x7
- ldr x7,[sp,#0]
- str x10,[sp,#24]
- ror x16,x25,#14
- add x20,x20,x28 // h+=K[i]
- ror x9,x4,#1
- and x17,x26,x25
- ror x8,x1,#19
- bic x28,x27,x25
- ror x10,x21,#28
- add x20,x20,x2 // h+=X[i]
- eor x16,x16,x25,ror#18
- eor x9,x9,x4,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x21,x22 // a^b, b^c in next round
- eor x16,x16,x25,ror#41 // Sigma1(e)
- eor x10,x10,x21,ror#34
- add x20,x20,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x8,x8,x1,ror#61
- eor x9,x9,x4,lsr#7 // sigma0(X[i+1])
- add x20,x20,x16 // h+=Sigma1(e)
- eor x19,x19,x22 // Maj(a,b,c)
- eor x17,x10,x21,ror#39 // Sigma0(a)
- eor x8,x8,x1,lsr#6 // sigma1(X[i+14])
- add x3,x3,x12
- add x24,x24,x20 // d+=h
- add x20,x20,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x3,x3,x9
- add x20,x20,x17 // h+=Sigma0(a)
- add x3,x3,x8
- cbnz x19,.Loop_16_xx
-
- ldp x0,x2,[x29,#96]
- ldr x1,[x29,#112]
- sub x30,x30,#648 // rewind
-
- ldp x3,x4,[x0]
- ldp x5,x6,[x0,#2*8]
- add x1,x1,#14*8 // advance input pointer
- ldp x7,x8,[x0,#4*8]
- add x20,x20,x3
- ldp x9,x10,[x0,#6*8]
- add x21,x21,x4
- add x22,x22,x5
- add x23,x23,x6
- stp x20,x21,[x0]
- add x24,x24,x7
- add x25,x25,x8
- stp x22,x23,[x0,#2*8]
- add x26,x26,x9
- add x27,x27,x10
- cmp x1,x2
- stp x24,x25,[x0,#4*8]
- stp x26,x27,[x0,#6*8]
- b.ne .Loop
-
- ldp x19,x20,[x29,#16]
- add sp,sp,#4*8
- ldp x21,x22,[x29,#32]
- ldp x23,x24,[x29,#48]
- ldp x25,x26,[x29,#64]
- ldp x27,x28,[x29,#80]
- ldp x29,x30,[sp],#128
- ret
-.size sha512_block_data_order,.-sha512_block_data_order
-
-.align 6
-.type .LK512,%object
-.LK512:
- .quad 0x428a2f98d728ae22,0x7137449123ef65cd
- .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
- .quad 0x3956c25bf348b538,0x59f111f1b605d019
- .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
- .quad 0xd807aa98a3030242,0x12835b0145706fbe
- .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
- .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
- .quad 0x9bdc06a725c71235,0xc19bf174cf692694
- .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
- .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
- .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
- .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
- .quad 0x983e5152ee66dfab,0xa831c66d2db43210
- .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
- .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
- .quad 0x06ca6351e003826f,0x142929670a0e6e70
- .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
- .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
- .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
- .quad 0x81c2c92e47edaee6,0x92722c851482353b
- .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
- .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
- .quad 0xd192e819d6ef5218,0xd69906245565a910
- .quad 0xf40e35855771202a,0x106aa07032bbd1b8
- .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
- .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
- .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
- .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
- .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
- .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
- .quad 0x90befffa23631e28,0xa4506cebde82bde9
- .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
- .quad 0xca273eceea26619c,0xd186b8c721c0c207
- .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
- .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
- .quad 0x113f9804bef90dae,0x1b710b35131c471b
- .quad 0x28db77f523047d84,0x32caab7b40c72493
- .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
- .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
- .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
- .quad 0 // terminator
-.size .LK512,.-.LK512
-#ifndef __KERNEL__
-.align 3
-.LOPENSSL_armcap_P:
-# ifdef __ILP32__
- .long OPENSSL_armcap_P-.
-# else
- .quad OPENSSL_armcap_P-.
-# endif
-#endif
-.asciz "SHA512 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
-.align 2
-#ifndef __KERNEL__
-.comm OPENSSL_armcap_P,4,4
-#endif
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
index 8a078fc662ac..7e157ab6cd50 100644
--- a/arch/arm64/include/asm/alternative-macros.h
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -3,12 +3,10 @@
#define __ASM_ALTERNATIVE_MACROS_H
#include <asm/cpucaps.h>
+#include <asm/insn-def.h>
#define ARM64_CB_PATCH ARM64_NCAPS
-/* A64 instructions are always 32 bits. */
-#define AARCH64_INSN_SIZE 4
-
#ifndef __ASSEMBLY__
#include <linux/stringify.h>
@@ -197,11 +195,6 @@ alternative_endif
#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \
alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
-.macro user_alt, label, oldinstr, newinstr, cond
-9999: alternative_insn "\oldinstr", "\newinstr", \cond
- _asm_extable 9999b, \label
-.endm
-
#endif /* __ASSEMBLY__ */
/*
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 934b9be582d2..4ad22c3135db 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -124,7 +124,8 @@ static inline u32 gic_read_rpr(void)
#define gic_read_lpir(c) readq_relaxed(c)
#define gic_write_lpir(v, c) writeq_relaxed(v, c)
-#define gic_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))
+#define gic_flush_dcache_to_poc(a,l) \
+ dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l))
#define gits_read_baser(c) readq_relaxed(c)
#define gits_write_baser(v, c) writeq_relaxed(v, c)
diff --git a/arch/arm64/include/asm/asm-prototypes.h b/arch/arm64/include/asm/asm-prototypes.h
index 1c9a3a0c5fa5..ec1d9655f885 100644
--- a/arch/arm64/include/asm/asm-prototypes.h
+++ b/arch/arm64/include/asm/asm-prototypes.h
@@ -23,4 +23,10 @@ long long __ashlti3(long long a, int b);
long long __ashrti3(long long a, int b);
long long __lshrti3(long long a, int b);
+/*
+ * This function uses a custom calling convention and cannot be called from C so
+ * this prototype is not entirely accurate.
+ */
+void __hwasan_tag_mismatch(unsigned long addr, unsigned long access_info);
+
#endif /* __ASM_PROTOTYPES_H */
diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h
index 8ca2dc0661ee..f1bba5fc61c4 100644
--- a/arch/arm64/include/asm/asm_pointer_auth.h
+++ b/arch/arm64/include/asm/asm_pointer_auth.h
@@ -7,19 +7,7 @@
#include <asm/cpufeature.h>
#include <asm/sysreg.h>
-#ifdef CONFIG_ARM64_PTR_AUTH
-/*
- * thread.keys_user.ap* as offset exceeds the #imm offset range
- * so use the base value of ldp as thread.keys_user and offset as
- * thread.keys_user.ap*.
- */
- .macro __ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3
- mov \tmp1, #THREAD_KEYS_USER
- add \tmp1, \tsk, \tmp1
- ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APIA]
- msr_s SYS_APIAKEYLO_EL1, \tmp2
- msr_s SYS_APIAKEYHI_EL1, \tmp3
- .endm
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
.macro __ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
mov \tmp1, #THREAD_KEYS_KERNEL
@@ -42,6 +30,33 @@ alternative_if ARM64_HAS_ADDRESS_AUTH
alternative_else_nop_endif
.endm
+#else /* CONFIG_ARM64_PTR_AUTH_KERNEL */
+
+ .macro __ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
+ .endm
+
+ .macro ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
+ .endm
+
+ .macro ptrauth_keys_install_kernel tsk, tmp1, tmp2, tmp3
+ .endm
+
+#endif /* CONFIG_ARM64_PTR_AUTH_KERNEL */
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+/*
+ * thread.keys_user.ap* as offset exceeds the #imm offset range
+ * so use the base value of ldp as thread.keys_user and offset as
+ * thread.keys_user.ap*.
+ */
+ .macro __ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3
+ mov \tmp1, #THREAD_KEYS_USER
+ add \tmp1, \tsk, \tmp1
+ ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APIA]
+ msr_s SYS_APIAKEYLO_EL1, \tmp2
+ msr_s SYS_APIAKEYHI_EL1, \tmp3
+ .endm
+
.macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3
mrs \tmp1, id_aa64isar1_el1
ubfx \tmp1, \tmp1, #ID_AA64ISAR1_APA_SHIFT, #8
@@ -64,17 +79,11 @@ alternative_else_nop_endif
.Lno_addr_auth\@:
.endm
-#else /* CONFIG_ARM64_PTR_AUTH */
+#else /* !CONFIG_ARM64_PTR_AUTH */
.macro ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3
.endm
- .macro ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
- .endm
-
- .macro ptrauth_keys_install_kernel tsk, tmp1, tmp2, tmp3
- .endm
-
#endif /* CONFIG_ARM64_PTR_AUTH */
#endif /* __ASM_ASM_POINTER_AUTH_H */
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 8418c1bd8f04..89faca0e740d 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -130,15 +130,27 @@ alternative_endif
.endm
/*
- * Emit an entry into the exception table
+ * Create an exception table entry for `insn`, which will branch to `fixup`
+ * when an unhandled fault is taken.
*/
- .macro _asm_extable, from, to
+ .macro _asm_extable, insn, fixup
.pushsection __ex_table, "a"
.align 3
- .long (\from - .), (\to - .)
+ .long (\insn - .), (\fixup - .)
.popsection
.endm
+/*
+ * Create an exception table entry for `insn` if `fixup` is provided. Otherwise
+ * do nothing.
+ */
+ .macro _cond_extable, insn, fixup
+ .ifnc \fixup,
+ _asm_extable \insn, \fixup
+ .endif
+ .endm
+
+
#define USER(l, x...) \
9999: x; \
_asm_extable 9999b, l
@@ -232,17 +244,25 @@ lr .req x30 // link register
* @dst: destination register
*/
#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
- .macro this_cpu_offset, dst
+ .macro get_this_cpu_offset, dst
mrs \dst, tpidr_el2
.endm
#else
- .macro this_cpu_offset, dst
+ .macro get_this_cpu_offset, dst
alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
mrs \dst, tpidr_el1
alternative_else
mrs \dst, tpidr_el2
alternative_endif
.endm
+
+ .macro set_this_cpu_offset, src
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
+ msr tpidr_el1, \src
+alternative_else
+ msr tpidr_el2, \src
+alternative_endif
+ .endm
#endif
/*
@@ -253,7 +273,7 @@ alternative_endif
.macro adr_this_cpu, dst, sym, tmp
adrp \tmp, \sym
add \dst, \tmp, #:lo12:\sym
- this_cpu_offset \tmp
+ get_this_cpu_offset \tmp
add \dst, \dst, \tmp
.endm
@@ -264,7 +284,7 @@ alternative_endif
*/
.macro ldr_this_cpu dst, sym, tmp
adr_l \dst, \sym
- this_cpu_offset \tmp
+ get_this_cpu_offset \tmp
ldr \dst, [\dst, \tmp]
.endm
@@ -375,51 +395,53 @@ alternative_cb_end
bfi \tcr, \tmp0, \pos, #3
.endm
+ .macro __dcache_op_workaround_clean_cache, op, addr
+alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
+ dc \op, \addr
+alternative_else
+ dc civac, \addr
+alternative_endif
+ .endm
+
/*
* Macro to perform a data cache maintenance for the interval
- * [kaddr, kaddr + size)
+ * [start, end)
*
* op: operation passed to dc instruction
* domain: domain used in dsb instruciton
- * kaddr: starting virtual address of the region
- * size: size of the region
- * Corrupts: kaddr, size, tmp1, tmp2
+ * start: starting virtual address of the region
+ * end: end virtual address of the region
+ * fixup: optional label to branch to on user fault
+ * Corrupts: start, end, tmp1, tmp2
*/
- .macro __dcache_op_workaround_clean_cache, op, kaddr
-alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
- dc \op, \kaddr
-alternative_else
- dc civac, \kaddr
-alternative_endif
- .endm
-
- .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
+ .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup
dcache_line_size \tmp1, \tmp2
- add \size, \kaddr, \size
sub \tmp2, \tmp1, #1
- bic \kaddr, \kaddr, \tmp2
-9998:
+ bic \start, \start, \tmp2
+.Ldcache_op\@:
.ifc \op, cvau
- __dcache_op_workaround_clean_cache \op, \kaddr
+ __dcache_op_workaround_clean_cache \op, \start
.else
.ifc \op, cvac
- __dcache_op_workaround_clean_cache \op, \kaddr
+ __dcache_op_workaround_clean_cache \op, \start
.else
.ifc \op, cvap
- sys 3, c7, c12, 1, \kaddr // dc cvap
+ sys 3, c7, c12, 1, \start // dc cvap
.else
.ifc \op, cvadp
- sys 3, c7, c13, 1, \kaddr // dc cvadp
+ sys 3, c7, c13, 1, \start // dc cvadp
.else
- dc \op, \kaddr
+ dc \op, \start
.endif
.endif
.endif
.endif
- add \kaddr, \kaddr, \tmp1
- cmp \kaddr, \size
- b.lo 9998b
+ add \start, \start, \tmp1
+ cmp \start, \end
+ b.lo .Ldcache_op\@
dsb \domain
+
+ _cond_extable .Ldcache_op\@, \fixup
.endm
/*
@@ -427,20 +449,22 @@ alternative_endif
* [start, end)
*
* start, end: virtual addresses describing the region
- * label: A label to branch to on user fault.
+ * fixup: optional label to branch to on user fault
* Corrupts: tmp1, tmp2
*/
- .macro invalidate_icache_by_line start, end, tmp1, tmp2, label
+ .macro invalidate_icache_by_line start, end, tmp1, tmp2, fixup
icache_line_size \tmp1, \tmp2
sub \tmp2, \tmp1, #1
bic \tmp2, \start, \tmp2
-9997:
-USER(\label, ic ivau, \tmp2) // invalidate I line PoU
+.Licache_op\@:
+ ic ivau, \tmp2 // invalidate I line PoU
add \tmp2, \tmp2, \tmp1
cmp \tmp2, \end
- b.lo 9997b
+ b.lo .Licache_op\@
dsb ish
isb
+
+ _cond_extable .Licache_op\@, \fixup
.endm
/*
@@ -745,7 +769,7 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
cbz \tmp, \lbl
#endif
adr_l \tmp, irq_stat + IRQ_CPUSTAT_SOFTIRQ_PENDING
- this_cpu_offset \tmp2
+ get_this_cpu_offset \tmp2
ldr w\tmp, [\tmp, \tmp2]
cbnz w\tmp, \lbl // yield on pending softirq in task context
.Lnoyield_\@:
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index b56a4b2bc248..c9979273d389 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -223,6 +223,4 @@ static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v)
#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
-#define ARCH_ATOMIC
-
#endif /* __ASM_ATOMIC_H */
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index a074459f8f2f..a9c0716e7440 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -47,7 +47,7 @@
* cache before the transfer is done, causing old data to be seen by
* the CPU.
*/
-#define ARCH_DMA_MINALIGN (128)
+#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
#ifdef CONFIG_KASAN_SW_TAGS
#define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT)
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 52e5c1623224..543c997eb3b7 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -30,45 +30,58 @@
* the implementation assumes non-aliasing VIPT D-cache and (aliasing)
* VIPT I-cache.
*
- * flush_icache_range(start, end)
+ * All functions below apply to the interval [start, end)
+ * - start - virtual start address (inclusive)
+ * - end - virtual end address (exclusive)
*
- * Ensure coherency between the I-cache and the D-cache in the
- * region described by start, end.
- * - start - virtual start address
- * - end - virtual end address
+ * caches_clean_inval_pou(start, end)
*
- * invalidate_icache_range(start, end)
+ * Ensure coherency between the I-cache and the D-cache region to
+ * the Point of Unification.
*
- * Invalidate the I-cache in the region described by start, end.
- * - start - virtual start address
- * - end - virtual end address
+ * caches_clean_inval_user_pou(start, end)
*
- * __flush_cache_user_range(start, end)
+ * Ensure coherency between the I-cache and the D-cache region to
+ * the Point of Unification.
+ * Use only if the region might access user memory.
*
- * Ensure coherency between the I-cache and the D-cache in the
- * region described by start, end.
- * - start - virtual start address
- * - end - virtual end address
+ * icache_inval_pou(start, end)
*
- * __flush_dcache_area(kaddr, size)
+ * Invalidate I-cache region to the Point of Unification.
*
- * Ensure that the data held in page is written back.
- * - kaddr - page address
- * - size - region size
+ * dcache_clean_inval_poc(start, end)
+ *
+ * Clean and invalidate D-cache region to the Point of Coherency.
+ *
+ * dcache_inval_poc(start, end)
+ *
+ * Invalidate D-cache region to the Point of Coherency.
+ *
+ * dcache_clean_poc(start, end)
+ *
+ * Clean D-cache region to the Point of Coherency.
+ *
+ * dcache_clean_pop(start, end)
+ *
+ * Clean D-cache region to the Point of Persistence.
+ *
+ * dcache_clean_pou(start, end)
+ *
+ * Clean D-cache region to the Point of Unification.
*/
-extern void __flush_icache_range(unsigned long start, unsigned long end);
-extern int invalidate_icache_range(unsigned long start, unsigned long end);
-extern void __flush_dcache_area(void *addr, size_t len);
-extern void __inval_dcache_area(void *addr, size_t len);
-extern void __clean_dcache_area_poc(void *addr, size_t len);
-extern void __clean_dcache_area_pop(void *addr, size_t len);
-extern void __clean_dcache_area_pou(void *addr, size_t len);
-extern long __flush_cache_user_range(unsigned long start, unsigned long end);
-extern void sync_icache_aliases(void *kaddr, unsigned long len);
+extern void caches_clean_inval_pou(unsigned long start, unsigned long end);
+extern void icache_inval_pou(unsigned long start, unsigned long end);
+extern void dcache_clean_inval_poc(unsigned long start, unsigned long end);
+extern void dcache_inval_poc(unsigned long start, unsigned long end);
+extern void dcache_clean_poc(unsigned long start, unsigned long end);
+extern void dcache_clean_pop(unsigned long start, unsigned long end);
+extern void dcache_clean_pou(unsigned long start, unsigned long end);
+extern long caches_clean_inval_user_pou(unsigned long start, unsigned long end);
+extern void sync_icache_aliases(unsigned long start, unsigned long end);
static inline void flush_icache_range(unsigned long start, unsigned long end)
{
- __flush_icache_range(start, end);
+ caches_clean_inval_pou(start, end);
/*
* IPI all online CPUs so that they undergo a context synchronization
@@ -122,7 +135,7 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *);
-static __always_inline void __flush_icache_all(void)
+static __always_inline void icache_inval_all_pou(void)
{
if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
return;
diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h
index 6fb2e6bcc392..dc3ea4080e2e 100644
--- a/arch/arm64/include/asm/compiler.h
+++ b/arch/arm64/include/asm/compiler.h
@@ -23,4 +23,20 @@
#define __builtin_return_address(val) \
(void *)(ptrauth_clear_pac((unsigned long)__builtin_return_address(val)))
+#ifdef CONFIG_CFI_CLANG
+/*
+ * With CONFIG_CFI_CLANG, the compiler replaces function address
+ * references with the address of the function's CFI jump table
+ * entry. The function_nocfi macro always returns the address of the
+ * actual function instead.
+ */
+#define function_nocfi(x) ({ \
+ void *addr; \
+ asm("adrp %0, " __stringify(x) "\n\t" \
+ "add %0, %0, :lo12:" __stringify(x) \
+ : "=r" (addr)); \
+ addr; \
+})
+#endif
+
#endif /* __ASM_COMPILER_H */
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 7faae6ff3ab4..0f6d16faa540 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -12,26 +12,7 @@
/*
* Records attributes of an individual CPU.
*/
-struct cpuinfo_arm64 {
- struct cpu cpu;
- struct kobject kobj;
- u32 reg_ctr;
- u32 reg_cntfrq;
- u32 reg_dczid;
- u32 reg_midr;
- u32 reg_revidr;
-
- u64 reg_id_aa64dfr0;
- u64 reg_id_aa64dfr1;
- u64 reg_id_aa64isar0;
- u64 reg_id_aa64isar1;
- u64 reg_id_aa64mmfr0;
- u64 reg_id_aa64mmfr1;
- u64 reg_id_aa64mmfr2;
- u64 reg_id_aa64pfr0;
- u64 reg_id_aa64pfr1;
- u64 reg_id_aa64zfr0;
-
+struct cpuinfo_32bit {
u32 reg_id_dfr0;
u32 reg_id_dfr1;
u32 reg_id_isar0;
@@ -54,6 +35,30 @@ struct cpuinfo_arm64 {
u32 reg_mvfr0;
u32 reg_mvfr1;
u32 reg_mvfr2;
+};
+
+struct cpuinfo_arm64 {
+ struct cpu cpu;
+ struct kobject kobj;
+ u64 reg_ctr;
+ u64 reg_cntfrq;
+ u64 reg_dczid;
+ u64 reg_midr;
+ u64 reg_revidr;
+ u64 reg_gmid;
+
+ u64 reg_id_aa64dfr0;
+ u64 reg_id_aa64dfr1;
+ u64 reg_id_aa64isar0;
+ u64 reg_id_aa64isar1;
+ u64 reg_id_aa64mmfr0;
+ u64 reg_id_aa64mmfr1;
+ u64 reg_id_aa64mmfr2;
+ u64 reg_id_aa64pfr0;
+ u64 reg_id_aa64pfr1;
+ u64 reg_id_aa64zfr0;
+
+ struct cpuinfo_32bit aarch32;
/* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */
u64 reg_zcr;
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 338840c00e8e..9bb9d11750d7 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -619,6 +619,13 @@ static inline bool id_aa64pfr0_sve(u64 pfr0)
return val > 0;
}
+static inline bool id_aa64pfr1_mte(u64 pfr1)
+{
+ u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_MTE_SHIFT);
+
+ return val >= ID_AA64PFR1_MTE;
+}
+
void __init setup_cpu_features(void);
void check_local_cpu_capabilities(void);
@@ -630,9 +637,15 @@ static inline bool cpu_supports_mixed_endian_el0(void)
return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
}
+const struct cpumask *system_32bit_el0_cpumask(void);
+DECLARE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0);
+
static inline bool system_supports_32bit_el0(void)
{
- return cpus_have_const_cap(ARM64_HAS_32BIT_EL0);
+ u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+
+ return static_branch_unlikely(&arm64_mismatched_32bit_el0) ||
+ id_aa64pfr0_32bit_el0(pfr0);
}
static inline bool system_supports_4kb_granule(void)
diff --git a/arch/arm64/include/asm/cpuidle.h b/arch/arm64/include/asm/cpuidle.h
index 3c5ddb429ea2..14a19d1141bd 100644
--- a/arch/arm64/include/asm/cpuidle.h
+++ b/arch/arm64/include/asm/cpuidle.h
@@ -18,4 +18,39 @@ static inline int arm_cpuidle_suspend(int index)
return -EOPNOTSUPP;
}
#endif
+
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+#include <asm/arch_gicv3.h>
+
+struct arm_cpuidle_irq_context {
+ unsigned long pmr;
+ unsigned long daif_bits;
+};
+
+#define arm_cpuidle_save_irq_context(__c) \
+ do { \
+ struct arm_cpuidle_irq_context *c = __c; \
+ if (system_uses_irq_prio_masking()) { \
+ c->daif_bits = read_sysreg(daif); \
+ write_sysreg(c->daif_bits | PSR_I_BIT | PSR_F_BIT, \
+ daif); \
+ c->pmr = gic_read_pmr(); \
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); \
+ } \
+ } while (0)
+
+#define arm_cpuidle_restore_irq_context(__c) \
+ do { \
+ struct arm_cpuidle_irq_context *c = __c; \
+ if (system_uses_irq_prio_masking()) { \
+ gic_write_pmr(c->pmr); \
+ write_sysreg(c->daif_bits, daif); \
+ } \
+ } while (0)
+#else
+struct arm_cpuidle_irq_context { };
+
+#define arm_cpuidle_save_irq_context(c) (void)c
+#define arm_cpuidle_restore_irq_context(c) (void)c
+#endif
#endif
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 3578aba9c608..1bed37eb013a 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -137,7 +137,7 @@ void efi_virtmap_unload(void);
static inline void efi_capsule_flush_cache_range(void *addr, int size)
{
- __flush_dcache_area(addr, size);
+ dcache_clean_inval_poc((unsigned long)addr, (unsigned long)addr + size);
}
#endif /* _ASM_EFI_H */
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 6546158d2f2d..4afbc45b8bb0 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -31,20 +31,35 @@ static inline u32 disr_to_esr(u64 disr)
return esr;
}
-asmlinkage void el1_sync_handler(struct pt_regs *regs);
-asmlinkage void el0_sync_handler(struct pt_regs *regs);
-asmlinkage void el0_sync_compat_handler(struct pt_regs *regs);
+asmlinkage void handle_bad_stack(struct pt_regs *regs);
-asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs);
-asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs);
+asmlinkage void el1t_64_sync_handler(struct pt_regs *regs);
+asmlinkage void el1t_64_irq_handler(struct pt_regs *regs);
+asmlinkage void el1t_64_fiq_handler(struct pt_regs *regs);
+asmlinkage void el1t_64_error_handler(struct pt_regs *regs);
+
+asmlinkage void el1h_64_sync_handler(struct pt_regs *regs);
+asmlinkage void el1h_64_irq_handler(struct pt_regs *regs);
+asmlinkage void el1h_64_fiq_handler(struct pt_regs *regs);
+asmlinkage void el1h_64_error_handler(struct pt_regs *regs);
+
+asmlinkage void el0t_64_sync_handler(struct pt_regs *regs);
+asmlinkage void el0t_64_irq_handler(struct pt_regs *regs);
+asmlinkage void el0t_64_fiq_handler(struct pt_regs *regs);
+asmlinkage void el0t_64_error_handler(struct pt_regs *regs);
+
+asmlinkage void el0t_32_sync_handler(struct pt_regs *regs);
+asmlinkage void el0t_32_irq_handler(struct pt_regs *regs);
+asmlinkage void el0t_32_fiq_handler(struct pt_regs *regs);
+asmlinkage void el0t_32_error_handler(struct pt_regs *regs);
+
+asmlinkage void call_on_irq_stack(struct pt_regs *regs,
+ void (*func)(struct pt_regs *));
asmlinkage void enter_from_user_mode(void);
asmlinkage void exit_to_user_mode(void);
-void arm64_enter_nmi(struct pt_regs *regs);
-void arm64_exit_nmi(struct pt_regs *regs);
void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs);
void do_undefinstr(struct pt_regs *regs);
void do_bti(struct pt_regs *regs);
-asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr);
void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
struct pt_regs *regs);
void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs);
@@ -57,4 +72,7 @@ void do_cp15instr(unsigned int esr, struct pt_regs *regs);
void do_el0_svc(struct pt_regs *regs);
void do_el0_svc_compat(struct pt_regs *regs);
void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr);
+void do_serror(struct pt_regs *regs, unsigned int esr);
+
+void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far);
#endif /* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 2599504674b5..c072161d5c65 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -69,7 +69,7 @@ static inline void *sve_pffr(struct thread_struct *thread)
extern void sve_save_state(void *state, u32 *pfpsr);
extern void sve_load_state(void const *state, u32 const *pfpsr,
unsigned long vq_minus_1);
-extern void sve_flush_live(void);
+extern void sve_flush_live(unsigned long vq_minus_1);
extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state,
unsigned long vq_minus_1);
extern unsigned int sve_get_vl(void);
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index a2563992d2dc..059204477ce6 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -213,8 +213,10 @@
mov v\nz\().16b, v\nz\().16b
.endm
-.macro sve_flush
+.macro sve_flush_z
_for n, 0, 31, _sve_flush_z \n
+.endm
+.macro sve_flush_p_ffr
_for n, 0, 15, _sve_pfalse \n
_sve_wrffr 0
.endm
diff --git a/arch/arm64/include/asm/insn-def.h b/arch/arm64/include/asm/insn-def.h
new file mode 100644
index 000000000000..2c075f615c6a
--- /dev/null
+++ b/arch/arm64/include/asm/insn-def.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ASM_INSN_DEF_H
+#define __ASM_INSN_DEF_H
+
+/* A64 instructions are always 32 bits. */
+#define AARCH64_INSN_SIZE 4
+
+#endif /* __ASM_INSN_DEF_H */
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 4ebb9c054ccc..6b776c8667b2 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -10,7 +10,7 @@
#include <linux/build_bug.h>
#include <linux/types.h>
-#include <asm/alternative.h>
+#include <asm/insn-def.h>
#ifndef __ASSEMBLY__
/*
@@ -30,6 +30,7 @@
*/
enum aarch64_insn_encoding_class {
AARCH64_INSN_CLS_UNKNOWN, /* UNALLOCATED */
+ AARCH64_INSN_CLS_SVE, /* SVE instructions */
AARCH64_INSN_CLS_DP_IMM, /* Data processing - immediate */
AARCH64_INSN_CLS_DP_REG, /* Data processing - register */
AARCH64_INSN_CLS_DP_FPSIMD, /* Data processing - SIMD and FP */
@@ -294,6 +295,12 @@ __AARCH64_INSN_FUNCS(adr, 0x9F000000, 0x10000000)
__AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000)
__AARCH64_INSN_FUNCS(prfm, 0x3FC00000, 0x39800000)
__AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000)
+__AARCH64_INSN_FUNCS(store_imm, 0x3FC00000, 0x39000000)
+__AARCH64_INSN_FUNCS(load_imm, 0x3FC00000, 0x39400000)
+__AARCH64_INSN_FUNCS(store_pre, 0x3FE00C00, 0x38000C00)
+__AARCH64_INSN_FUNCS(load_pre, 0x3FE00C00, 0x38400C00)
+__AARCH64_INSN_FUNCS(store_post, 0x3FE00C00, 0x38000400)
+__AARCH64_INSN_FUNCS(load_post, 0x3FE00C00, 0x38400400)
__AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800)
__AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0x38200000)
__AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800)
@@ -302,6 +309,8 @@ __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
__AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000)
__AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000)
__AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000)
+__AARCH64_INSN_FUNCS(stp, 0x7FC00000, 0x29000000)
+__AARCH64_INSN_FUNCS(ldp, 0x7FC00000, 0x29400000)
__AARCH64_INSN_FUNCS(stp_post, 0x7FC00000, 0x28800000)
__AARCH64_INSN_FUNCS(ldp_post, 0x7FC00000, 0x28C00000)
__AARCH64_INSN_FUNCS(stp_pre, 0x7FC00000, 0x29800000)
@@ -334,6 +343,7 @@ __AARCH64_INSN_FUNCS(rev64, 0x7FFFFC00, 0x5AC00C00)
__AARCH64_INSN_FUNCS(and, 0x7F200000, 0x0A000000)
__AARCH64_INSN_FUNCS(bic, 0x7F200000, 0x0A200000)
__AARCH64_INSN_FUNCS(orr, 0x7F200000, 0x2A000000)
+__AARCH64_INSN_FUNCS(mov_reg, 0x7FE0FFE0, 0x2A0003E0)
__AARCH64_INSN_FUNCS(orn, 0x7F200000, 0x2A200000)
__AARCH64_INSN_FUNCS(eor, 0x7F200000, 0x4A000000)
__AARCH64_INSN_FUNCS(eon, 0x7F200000, 0x4A200000)
@@ -368,6 +378,14 @@ __AARCH64_INSN_FUNCS(eret_auth, 0xFFFFFBFF, 0xD69F0BFF)
__AARCH64_INSN_FUNCS(mrs, 0xFFF00000, 0xD5300000)
__AARCH64_INSN_FUNCS(msr_imm, 0xFFF8F01F, 0xD500401F)
__AARCH64_INSN_FUNCS(msr_reg, 0xFFF00000, 0xD5100000)
+__AARCH64_INSN_FUNCS(dmb, 0xFFFFF0FF, 0xD50330BF)
+__AARCH64_INSN_FUNCS(dsb_base, 0xFFFFF0FF, 0xD503309F)
+__AARCH64_INSN_FUNCS(dsb_nxs, 0xFFFFF3FF, 0xD503323F)
+__AARCH64_INSN_FUNCS(isb, 0xFFFFF0FF, 0xD50330DF)
+__AARCH64_INSN_FUNCS(sb, 0xFFFFFFFF, 0xD50330FF)
+__AARCH64_INSN_FUNCS(clrex, 0xFFFFF0FF, 0xD503305F)
+__AARCH64_INSN_FUNCS(ssbb, 0xFFFFFFFF, 0xD503309F)
+__AARCH64_INSN_FUNCS(pssbb, 0xFFFFFFFF, 0xD503349F)
#undef __AARCH64_INSN_FUNCS
@@ -379,8 +397,47 @@ static inline bool aarch64_insn_is_adr_adrp(u32 insn)
return aarch64_insn_is_adr(insn) || aarch64_insn_is_adrp(insn);
}
-int aarch64_insn_read(void *addr, u32 *insnp);
-int aarch64_insn_write(void *addr, u32 insn);
+static inline bool aarch64_insn_is_dsb(u32 insn)
+{
+ return aarch64_insn_is_dsb_base(insn) || aarch64_insn_is_dsb_nxs(insn);
+}
+
+static inline bool aarch64_insn_is_barrier(u32 insn)
+{
+ return aarch64_insn_is_dmb(insn) || aarch64_insn_is_dsb(insn) ||
+ aarch64_insn_is_isb(insn) || aarch64_insn_is_sb(insn) ||
+ aarch64_insn_is_clrex(insn) || aarch64_insn_is_ssbb(insn) ||
+ aarch64_insn_is_pssbb(insn);
+}
+
+static inline bool aarch64_insn_is_store_single(u32 insn)
+{
+ return aarch64_insn_is_store_imm(insn) ||
+ aarch64_insn_is_store_pre(insn) ||
+ aarch64_insn_is_store_post(insn);
+}
+
+static inline bool aarch64_insn_is_store_pair(u32 insn)
+{
+ return aarch64_insn_is_stp(insn) ||
+ aarch64_insn_is_stp_pre(insn) ||
+ aarch64_insn_is_stp_post(insn);
+}
+
+static inline bool aarch64_insn_is_load_single(u32 insn)
+{
+ return aarch64_insn_is_load_imm(insn) ||
+ aarch64_insn_is_load_pre(insn) ||
+ aarch64_insn_is_load_post(insn);
+}
+
+static inline bool aarch64_insn_is_load_pair(u32 insn)
+{
+ return aarch64_insn_is_ldp(insn) ||
+ aarch64_insn_is_ldp_pre(insn) ||
+ aarch64_insn_is_ldp_post(insn);
+}
+
enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
bool aarch64_insn_uses_literal(u32 insn);
bool aarch64_insn_is_branch(u32 insn);
@@ -487,9 +544,6 @@ u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base,
s32 aarch64_get_branch_offset(u32 insn);
u32 aarch64_set_branch_offset(u32 insn, s32 offset);
-int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
-int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
-
s32 aarch64_insn_adrp_get_offset(u32 insn);
u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset);
@@ -506,6 +560,7 @@ u32 aarch32_insn_mcr_extract_crm(u32 insn);
typedef bool (pstate_check_t)(unsigned long);
extern pstate_check_t * const aarch32_opcode_cond_checks[16];
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_INSN_H */
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index d44df9d62fc9..3512184cfec1 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -18,9 +18,9 @@
* 64K (section size = 512M).
*/
#ifdef CONFIG_ARM64_4K_PAGES
-#define ARM64_SWAPPER_USES_SECTION_MAPS 1
+#define ARM64_KERNEL_USES_PMD_MAPS 1
#else
-#define ARM64_SWAPPER_USES_SECTION_MAPS 0
+#define ARM64_KERNEL_USES_PMD_MAPS 0
#endif
/*
@@ -33,7 +33,7 @@
* VA range, so pages required to map highest possible PA are reserved in all
* cases.
*/
-#if ARM64_SWAPPER_USES_SECTION_MAPS
+#if ARM64_KERNEL_USES_PMD_MAPS
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1)
#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1)
#else
@@ -90,9 +90,9 @@
#define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
/* Initial memory map size */
-#if ARM64_SWAPPER_USES_SECTION_MAPS
-#define SWAPPER_BLOCK_SHIFT SECTION_SHIFT
-#define SWAPPER_BLOCK_SIZE SECTION_SIZE
+#if ARM64_KERNEL_USES_PMD_MAPS
+#define SWAPPER_BLOCK_SHIFT PMD_SHIFT
+#define SWAPPER_BLOCK_SIZE PMD_SIZE
#define SWAPPER_TABLE_SHIFT PUD_SHIFT
#else
#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT
@@ -100,16 +100,13 @@
#define SWAPPER_TABLE_SHIFT PMD_SHIFT
#endif
-/* The size of the initial kernel direct mapping */
-#define SWAPPER_INIT_MAP_SIZE (_AC(1, UL) << SWAPPER_TABLE_SHIFT)
-
/*
* Initial memory map attributes.
*/
#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
-#if ARM64_SWAPPER_USES_SECTION_MAPS
+#if ARM64_KERNEL_USES_PMD_MAPS
#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
#else
#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
@@ -125,7 +122,7 @@
#if defined(CONFIG_ARM64_4K_PAGES)
#define ARM64_MEMSTART_SHIFT PUD_SHIFT
#elif defined(CONFIG_ARM64_16K_PAGES)
-#define ARM64_MEMSTART_SHIFT (PMD_SHIFT + 5)
+#define ARM64_MEMSTART_SHIFT CONT_PMD_SHIFT
#else
#define ARM64_MEMSTART_SHIFT PMD_SHIFT
#endif
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 692c9049befa..d436831dd706 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -12,7 +12,8 @@
#include <asm/types.h>
/* Hyp Configuration Register (HCR) bits */
-#define HCR_ATA (UL(1) << 56)
+#define HCR_ATA_SHIFT 56
+#define HCR_ATA (UL(1) << HCR_ATA_SHIFT)
#define HCR_FWB (UL(1) << 46)
#define HCR_API (UL(1) << 41)
#define HCR_APK (UL(1) << 40)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 5e9b33cbac51..9f0bf2109be7 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -8,6 +8,7 @@
#define __ARM_KVM_ASM_H__
#include <asm/hyp_image.h>
+#include <asm/insn.h>
#include <asm/virt.h>
#define ARM_EXIT_WITH_SERROR_BIT 31
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 01b9857757f2..fd418955e31e 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -84,6 +84,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) ||
vcpu_el1_is_32bit(vcpu))
vcpu->arch.hcr_el2 |= HCR_TID2;
+
+ if (kvm_has_mte(vcpu->kvm))
+ vcpu->arch.hcr_el2 |= HCR_ATA;
}
static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 7cd7d5c8c4bc..41911585ae0c 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -46,6 +46,7 @@
#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
+#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)
@@ -132,6 +133,9 @@ struct kvm_arch {
u8 pfr0_csv2;
u8 pfr0_csv3;
+
+ /* Memory Tagging Extension enabled for the guest */
+ bool mte_enabled;
};
struct kvm_vcpu_fault_info {
@@ -206,6 +210,12 @@ enum vcpu_sysreg {
CNTP_CVAL_EL0,
CNTP_CTL_EL0,
+ /* Memory Tagging Extension registers */
+ RGSR_EL1, /* Random Allocation Tag Seed Register */
+ GCR_EL1, /* Tag Control Register */
+ TFSR_EL1, /* Tag Fault Status Register (EL1) */
+ TFSRE0_EL1, /* Tag Fault Status Register (EL0) */
+
/* 32bit specific registers. Keep them at the end of the range */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
@@ -556,16 +566,11 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
}
struct kvm_vm_stat {
- ulong remote_tlb_flush;
+ struct kvm_vm_stat_generic generic;
};
struct kvm_vcpu_stat {
- u64 halt_successful_poll;
- u64 halt_attempted_poll;
- u64 halt_poll_success_ns;
- u64 halt_poll_fail_ns;
- u64 halt_poll_invalid;
- u64 halt_wakeup;
+ struct kvm_vcpu_stat_generic generic;
u64 hvc_exit_stat;
u64 wfe_exit_stat;
u64 wfi_exit_stat;
@@ -721,6 +726,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
+long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
+ struct kvm_arm_copy_mte_tags *copy_tags);
+
/* Guest/host FPSIMD coordination helpers */
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
@@ -769,6 +777,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
#define kvm_arm_vcpu_sve_finalized(vcpu) \
((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
+#define kvm_has_mte(kvm) (system_supports_mte() && (kvm)->arch.mte_enabled)
#define kvm_vcpu_has_pmu(vcpu) \
(test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 25ed956f9af1..b52c5c4b9a3d 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -180,17 +180,16 @@ static inline void *__kvm_vector_slot2addr(void *base,
struct kvm;
-#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))
+#define kvm_flush_dcache_to_poc(a,l) \
+ dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l))
static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
{
return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
}
-static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
+static inline void __clean_dcache_guest_page(void *va, size_t size)
{
- void *va = page_address(pfn_to_page(pfn));
-
/*
* With FWB, we ensure that the guest always accesses memory using
* cacheable attributes, and we don't have to clean to PoC when
@@ -203,18 +202,14 @@ static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
kvm_flush_dcache_to_poc(va, size);
}
-static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
- unsigned long size)
+static inline void __invalidate_icache_guest_page(void *va, size_t size)
{
if (icache_is_aliasing()) {
/* any kind of VIPT cache */
- __flush_icache_all();
+ icache_inval_all_pou();
} else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
/* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
- void *va = page_address(pfn_to_page(pfn));
-
- invalidate_icache_range((unsigned long)va,
- (unsigned long)va + size);
+ icache_inval_pou((unsigned long)va, (unsigned long)va + size);
}
}
diff --git a/arch/arm64/include/asm/kvm_mte.h b/arch/arm64/include/asm/kvm_mte.h
new file mode 100644
index 000000000000..de002636eb1f
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_mte.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2021 ARM Ltd.
+ */
+#ifndef __ASM_KVM_MTE_H
+#define __ASM_KVM_MTE_H
+
+#ifdef __ASSEMBLY__
+
+#include <asm/sysreg.h>
+
+#ifdef CONFIG_ARM64_MTE
+
+.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
+alternative_if_not ARM64_MTE
+ b .L__skip_switch\@
+alternative_else_nop_endif
+ mrs \reg1, hcr_el2
+ tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@
+
+ mrs_s \reg1, SYS_RGSR_EL1
+ str \reg1, [\h_ctxt, #CPU_RGSR_EL1]
+ mrs_s \reg1, SYS_GCR_EL1
+ str \reg1, [\h_ctxt, #CPU_GCR_EL1]
+
+ ldr \reg1, [\g_ctxt, #CPU_RGSR_EL1]
+ msr_s SYS_RGSR_EL1, \reg1
+ ldr \reg1, [\g_ctxt, #CPU_GCR_EL1]
+ msr_s SYS_GCR_EL1, \reg1
+
+.L__skip_switch\@:
+.endm
+
+.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
+alternative_if_not ARM64_MTE
+ b .L__skip_switch\@
+alternative_else_nop_endif
+ mrs \reg1, hcr_el2
+ tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@
+
+ mrs_s \reg1, SYS_RGSR_EL1
+ str \reg1, [\g_ctxt, #CPU_RGSR_EL1]
+ mrs_s \reg1, SYS_GCR_EL1
+ str \reg1, [\g_ctxt, #CPU_GCR_EL1]
+
+ ldr \reg1, [\h_ctxt, #CPU_RGSR_EL1]
+ msr_s SYS_RGSR_EL1, \reg1
+ ldr \reg1, [\h_ctxt, #CPU_GCR_EL1]
+ msr_s SYS_GCR_EL1, \reg1
+
+ isb
+
+.L__skip_switch\@:
+.endm
+
+#else /* !CONFIG_ARM64_MTE */
+
+.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
+.endm
+
+.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
+.endm
+
+#endif /* CONFIG_ARM64_MTE */
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_KVM_MTE_H */
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index c3674c47d48c..f004c0115d89 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -27,23 +27,29 @@ typedef u64 kvm_pte_t;
/**
* struct kvm_pgtable_mm_ops - Memory management callbacks.
- * @zalloc_page: Allocate a single zeroed memory page. The @arg parameter
- * can be used by the walker to pass a memcache. The
- * initial refcount of the page is 1.
- * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages. The
- * @size parameter is in bytes, and is rounded-up to the
- * next page boundary. The resulting allocation is
- * physically contiguous.
- * @free_pages_exact: Free an exact number of memory pages previously
- * allocated by zalloc_pages_exact.
- * @get_page: Increment the refcount on a page.
- * @put_page: Decrement the refcount on a page. When the refcount
- * reaches 0 the page is automatically freed.
- * @page_count: Return the refcount of a page.
- * @phys_to_virt: Convert a physical address into a virtual address mapped
- * in the current context.
- * @virt_to_phys: Convert a virtual address mapped in the current context
- * into a physical address.
+ * @zalloc_page: Allocate a single zeroed memory page.
+ * The @arg parameter can be used by the walker
+ * to pass a memcache. The initial refcount of
+ * the page is 1.
+ * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages.
+ * The @size parameter is in bytes, and is rounded
+ * up to the next page boundary. The resulting
+ * allocation is physically contiguous.
+ * @free_pages_exact: Free an exact number of memory pages previously
+ * allocated by zalloc_pages_exact.
+ * @get_page: Increment the refcount on a page.
+ * @put_page: Decrement the refcount on a page. When the
+ * refcount reaches 0 the page is automatically
+ * freed.
+ * @page_count: Return the refcount of a page.
+ * @phys_to_virt: Convert a physical address into a virtual
+ * address mapped in the current context.
+ * @virt_to_phys: Convert a virtual address mapped in the current
+ * context into a physical address.
+ * @dcache_clean_inval_poc: Clean and invalidate the data cache to the PoC
+ * for the specified memory address range.
+ * @icache_inval_pou: Invalidate the instruction cache to the PoU
+ * for the specified memory address range.
*/
struct kvm_pgtable_mm_ops {
void* (*zalloc_page)(void *arg);
@@ -54,6 +60,8 @@ struct kvm_pgtable_mm_ops {
int (*page_count)(void *addr);
void* (*phys_to_virt)(phys_addr_t phys);
phys_addr_t (*virt_to_phys)(void *addr);
+ void (*dcache_clean_inval_poc)(void *addr, size_t size);
+ void (*icache_inval_pou)(void *addr, size_t size);
};
/**
diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
index ba89a9af820a..9906541a6861 100644
--- a/arch/arm64/include/asm/linkage.h
+++ b/arch/arm64/include/asm/linkage.h
@@ -56,8 +56,16 @@
SYM_FUNC_START_ALIAS(__pi_##x); \
SYM_FUNC_START_WEAK(x)
+#define SYM_FUNC_START_WEAK_ALIAS_PI(x) \
+ SYM_FUNC_START_ALIAS(__pi_##x); \
+ SYM_START(x, SYM_L_WEAK, SYM_A_ALIGN)
+
#define SYM_FUNC_END_PI(x) \
SYM_FUNC_END(x); \
SYM_FUNC_END_ALIAS(__pi_##x)
+#define SYM_FUNC_END_ALIAS_PI(x) \
+ SYM_FUNC_END_ALIAS(x); \
+ SYM_FUNC_END_ALIAS(__pi_##x)
+
#endif
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 9027b7e16c4c..824a3655dd93 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -135,10 +135,8 @@
#define MT_NORMAL 0
#define MT_NORMAL_TAGGED 1
#define MT_NORMAL_NC 2
-#define MT_NORMAL_WT 3
-#define MT_DEVICE_nGnRnE 4
-#define MT_DEVICE_nGnRE 5
-#define MT_DEVICE_GRE 6
+#define MT_DEVICE_nGnRnE 3
+#define MT_DEVICE_nGnRE 4
/*
* Memory types for Stage-2 translation
@@ -323,22 +321,6 @@ static inline void *phys_to_virt(phys_addr_t x)
#define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys((unsigned long)(x)))
#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x))
-#ifdef CONFIG_CFI_CLANG
-/*
- * With CONFIG_CFI_CLANG, the compiler replaces function address
- * references with the address of the function's CFI jump table
- * entry. The function_nocfi macro always returns the address of the
- * actual function instead.
- */
-#define function_nocfi(x) ({ \
- void *addr; \
- asm("adrp %0, " __stringify(x) "\n\t" \
- "add %0, %0, :lo12:" __stringify(x) \
- : "=r" (addr)); \
- addr; \
-})
-#endif
-
/*
* virt_to_page(x) convert a _valid_ virtual address to struct page *
* virt_addr_valid(x) indicates whether a virtual address is valid
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index d3cef9133539..eeb210997149 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -177,9 +177,9 @@ static inline void update_saved_ttbr0(struct task_struct *tsk,
return;
if (mm == &init_mm)
- ttbr = __pa_symbol(reserved_pg_dir);
+ ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir));
else
- ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48;
+ ttbr = phys_to_ttbr(virt_to_phys(mm->pgd)) | ASID(mm) << 48;
WRITE_ONCE(task_thread_info(tsk)->ttbr0, ttbr);
}
diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h
index 810045628c66..a11ccadd47d2 100644
--- a/arch/arm64/include/asm/module.lds.h
+++ b/arch/arm64/include/asm/module.lds.h
@@ -1,7 +1,20 @@
-#ifdef CONFIG_ARM64_MODULE_PLTS
SECTIONS {
+#ifdef CONFIG_ARM64_MODULE_PLTS
.plt 0 (NOLOAD) : { BYTE(0) }
.init.plt 0 (NOLOAD) : { BYTE(0) }
.text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) }
-}
#endif
+
+#ifdef CONFIG_KASAN_SW_TAGS
+ /*
+ * Outlined checks go into comdat-deduplicated sections named .text.hot.
+ * Because they are in comdats they are not combined by the linker and
+ * we otherwise end up with multiple sections with the same .text.hot
+ * name in the .ko file. The kernel module loader warns if it sees
+ * multiple sections with the same name so we use this sections
+ * directive to force them into a single section and silence the
+ * warning.
+ */
+ .text.hot : { *(.text.hot) }
+#endif
+}
diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h
index cf241b0f0a42..626d359b396e 100644
--- a/arch/arm64/include/asm/mte-def.h
+++ b/arch/arm64/include/asm/mte-def.h
@@ -7,6 +7,7 @@
#define MTE_GRANULE_SIZE UL(16)
#define MTE_GRANULE_MASK (~(MTE_GRANULE_SIZE - 1))
+#define MTE_GRANULES_PER_PAGE (PAGE_SIZE / MTE_GRANULE_SIZE)
#define MTE_TAG_SHIFT 56
#define MTE_TAG_SIZE 4
#define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT)
diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h
index ddd4d17cf9a0..d952352bd008 100644
--- a/arch/arm64/include/asm/mte-kasan.h
+++ b/arch/arm64/include/asm/mte-kasan.h
@@ -48,43 +48,84 @@ static inline u8 mte_get_random_tag(void)
return mte_get_ptr_tag(addr);
}
+static inline u64 __stg_post(u64 p)
+{
+ asm volatile(__MTE_PREAMBLE "stg %0, [%0], #16"
+ : "+r"(p)
+ :
+ : "memory");
+ return p;
+}
+
+static inline u64 __stzg_post(u64 p)
+{
+ asm volatile(__MTE_PREAMBLE "stzg %0, [%0], #16"
+ : "+r"(p)
+ :
+ : "memory");
+ return p;
+}
+
+static inline void __dc_gva(u64 p)
+{
+ asm volatile(__MTE_PREAMBLE "dc gva, %0" : : "r"(p) : "memory");
+}
+
+static inline void __dc_gzva(u64 p)
+{
+ asm volatile(__MTE_PREAMBLE "dc gzva, %0" : : "r"(p) : "memory");
+}
+
/*
* Assign allocation tags for a region of memory based on the pointer tag.
* Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and
- * size must be non-zero and MTE_GRANULE_SIZE aligned.
+ * size must be MTE_GRANULE_SIZE aligned.
*/
-static inline void mte_set_mem_tag_range(void *addr, size_t size,
- u8 tag, bool init)
+static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag,
+ bool init)
{
- u64 curr, end;
+ u64 curr, mask, dczid_bs, end1, end2, end3;
- if (!size)
- return;
+ /* Read DC G(Z)VA block size from the system register. */
+ dczid_bs = 4ul << (read_cpuid(DCZID_EL0) & 0xf);
curr = (u64)__tag_set(addr, tag);
- end = curr + size;
+ mask = dczid_bs - 1;
+ /* STG/STZG up to the end of the first block. */
+ end1 = curr | mask;
+ end3 = curr + size;
+ /* DC GVA / GZVA in [end1, end2) */
+ end2 = end3 & ~mask;
/*
- * 'asm volatile' is required to prevent the compiler to move
- * the statement outside of the loop.
+ * The following code uses STG on the first DC GVA block even if the
+ * start address is aligned - it appears to be faster than an alignment
+ * check + conditional branch. Also, if the range size is at least 2 DC
+ * GVA blocks, the first two loops can use post-condition to save one
+ * branch each.
*/
- if (init) {
- do {
- asm volatile(__MTE_PREAMBLE "stzg %0, [%0]"
- :
- : "r" (curr)
- : "memory");
- curr += MTE_GRANULE_SIZE;
- } while (curr != end);
- } else {
- do {
- asm volatile(__MTE_PREAMBLE "stg %0, [%0]"
- :
- : "r" (curr)
- : "memory");
- curr += MTE_GRANULE_SIZE;
- } while (curr != end);
- }
+#define SET_MEMTAG_RANGE(stg_post, dc_gva) \
+ do { \
+ if (size >= 2 * dczid_bs) { \
+ do { \
+ curr = stg_post(curr); \
+ } while (curr < end1); \
+ \
+ do { \
+ dc_gva(curr); \
+ curr += dczid_bs; \
+ } while (curr < end2); \
+ } \
+ \
+ while (curr < end3) \
+ curr = stg_post(curr); \
+ } while (0)
+
+ if (init)
+ SET_MEMTAG_RANGE(__stzg_post, __dc_gzva);
+ else
+ SET_MEMTAG_RANGE(__stg_post, __dc_gva);
+#undef SET_MEMTAG_RANGE
}
void mte_enable_kernel_sync(void);
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index bc88a1ced0d7..58c7f80f5596 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -37,7 +37,8 @@ void mte_free_tag_storage(char *storage);
/* track which pages have valid allocation tags */
#define PG_mte_tagged PG_arch_2
-void mte_sync_tags(pte_t *ptep, pte_t pte);
+void mte_zero_clear_page_tags(void *addr);
+void mte_sync_tags(pte_t old_pte, pte_t pte);
void mte_copy_page_tags(void *kto, const void *kfrom);
void mte_thread_init_user(void);
void mte_thread_switch(struct task_struct *next);
@@ -53,7 +54,10 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request,
/* unused if !CONFIG_ARM64_MTE, silence the compiler */
#define PG_mte_tagged 0
-static inline void mte_sync_tags(pte_t *ptep, pte_t pte)
+static inline void mte_zero_clear_page_tags(void *addr)
+{
+}
+static inline void mte_sync_tags(pte_t old_pte, pte_t pte)
{
}
static inline void mte_copy_page_tags(void *kto, const void *kfrom)
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index fcbef3eec4b2..993a27ea6f54 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -13,6 +13,7 @@
#ifndef __ASSEMBLY__
#include <linux/personality.h> /* for READ_IMPLIES_EXEC */
+#include <linux/types.h> /* for gfp_t */
#include <asm/pgtable-types.h>
struct page;
@@ -28,9 +29,12 @@ void copy_user_highpage(struct page *to, struct page *from,
void copy_highpage(struct page *to, struct page *from);
#define __HAVE_ARCH_COPY_HIGHPAGE
-#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
- alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
+ unsigned long vaddr);
+#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
+
+void tag_clear_highpage(struct page *to);
+#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/patching.h
new file mode 100644
index 000000000000..6bf5adc56295
--- /dev/null
+++ b/arch/arm64/include/asm/patching.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_PATCHING_H
+#define __ASM_PATCHING_H
+
+#include <linux/types.h>
+
+int aarch64_insn_read(void *addr, u32 *insnp);
+int aarch64_insn_write(void *addr, u32 insn);
+
+int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
+int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
+
+#endif /* __ASM_PATCHING_H */
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 60731f602d3e..4ef6f19331f9 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -239,6 +239,11 @@
/* PMMIR_EL1.SLOTS mask */
#define ARMV8_PMU_SLOTS_MASK 0xff
+#define ARMV8_PMU_BUS_SLOTS_SHIFT 8
+#define ARMV8_PMU_BUS_SLOTS_MASK 0xff
+#define ARMV8_PMU_BUS_WIDTH_SHIFT 16
+#define ARMV8_PMU_BUS_WIDTH_MASK 0xf
+
#ifdef CONFIG_PERF_EVENTS
struct pt_regs;
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index b82575a33f8b..40085e53f573 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -72,13 +72,6 @@
#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT))
/*
- * Section address mask and size definitions.
- */
-#define SECTION_SHIFT PMD_SHIFT
-#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT)
-#define SECTION_MASK (~(SECTION_SIZE-1))
-
-/*
* Contiguous page definitions.
*/
#define CONT_PTE_SHIFT (CONFIG_ARM64_CONT_PTE_SHIFT + PAGE_SHIFT)
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 938092df76cf..7032f04c8ac6 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -55,7 +55,6 @@ extern bool arm64_use_ng_mappings;
#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
#define PROT_NORMAL_TAGGED (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED))
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 25f5c04b43ce..508c7ffad515 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -312,9 +312,25 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
__sync_icache_dcache(pte);
- if (system_supports_mte() &&
- pte_present(pte) && pte_tagged(pte) && !pte_special(pte))
- mte_sync_tags(ptep, pte);
+ /*
+ * If the PTE would provide user space access to the tags associated
+ * with it then ensure that the MTE tags are synchronised. Although
+ * pte_access_permitted() returns false for exec only mappings, they
+ * don't expose tags (instruction fetches don't check tags).
+ */
+ if (system_supports_mte() && pte_access_permitted(pte, false) &&
+ !pte_special(pte)) {
+ pte_t old_pte = READ_ONCE(*ptep);
+ /*
+ * We only need to synchronise if the new PTE has tags enabled
+ * or if swapping in (in which case another mapping may have
+ * set tags in the past even if this PTE isn't tagged).
+ * (!pte_none() && !pte_present()) is an open coded version of
+ * is_swap_pte()
+ */
+ if (pte_tagged(pte) || (!pte_none(old_pte) && !pte_present(old_pte)))
+ mte_sync_tags(old_pte, pte);
+ }
__check_racy_pte_update(mm, ptep, pte);
@@ -509,13 +525,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
#define pmd_none(pmd) (!pmd_val(pmd))
-#define pmd_bad(pmd) (!(pmd_val(pmd) & PMD_TABLE_BIT))
-
#define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
PMD_TYPE_TABLE)
#define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
PMD_TYPE_SECT)
#define pmd_leaf(pmd) pmd_sect(pmd)
+#define pmd_bad(pmd) (!pmd_table(pmd))
#define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE)
#define pte_leaf_size(pte) (pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE)
@@ -602,7 +617,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e))
#define pud_none(pud) (!pud_val(pud))
-#define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT))
+#define pud_bad(pud) (!pud_table(pud))
#define pud_present(pud) pte_present(pud_pte(pud))
#define pud_leaf(pud) pud_sect(pud)
#define pud_valid(pud) pte_valid(pud_pte(pud))
diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
index d50416be99be..28a78b67d9b4 100644
--- a/arch/arm64/include/asm/pointer_auth.h
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -31,10 +31,6 @@ struct ptrauth_keys_user {
struct ptrauth_key apga;
};
-struct ptrauth_keys_kernel {
- struct ptrauth_key apia;
-};
-
#define __ptrauth_key_install_nosync(k, v) \
do { \
struct ptrauth_key __pki_v = (v); \
@@ -42,6 +38,29 @@ do { \
write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1); \
} while (0)
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
+
+struct ptrauth_keys_kernel {
+ struct ptrauth_key apia;
+};
+
+static __always_inline void ptrauth_keys_init_kernel(struct ptrauth_keys_kernel *keys)
+{
+ if (system_supports_address_auth())
+ get_random_bytes(&keys->apia, sizeof(keys->apia));
+}
+
+static __always_inline void ptrauth_keys_switch_kernel(struct ptrauth_keys_kernel *keys)
+{
+ if (!system_supports_address_auth())
+ return;
+
+ __ptrauth_key_install_nosync(APIA, keys->apia);
+ isb();
+}
+
+#endif /* CONFIG_ARM64_PTR_AUTH_KERNEL */
+
static inline void ptrauth_keys_install_user(struct ptrauth_keys_user *keys)
{
if (system_supports_address_auth()) {
@@ -69,21 +88,6 @@ static inline void ptrauth_keys_init_user(struct ptrauth_keys_user *keys)
ptrauth_keys_install_user(keys);
}
-static __always_inline void ptrauth_keys_init_kernel(struct ptrauth_keys_kernel *keys)
-{
- if (system_supports_address_auth())
- get_random_bytes(&keys->apia, sizeof(keys->apia));
-}
-
-static __always_inline void ptrauth_keys_switch_kernel(struct ptrauth_keys_kernel *keys)
-{
- if (!system_supports_address_auth())
- return;
-
- __ptrauth_key_install_nosync(APIA, keys->apia);
- isb();
-}
-
extern int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg);
extern int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
@@ -121,11 +125,6 @@ static __always_inline void ptrauth_enable(void)
#define ptrauth_thread_switch_user(tsk) \
ptrauth_keys_install_user(&(tsk)->thread.keys_user)
-#define ptrauth_thread_init_kernel(tsk) \
- ptrauth_keys_init_kernel(&(tsk)->thread.keys_kernel)
-#define ptrauth_thread_switch_kernel(tsk) \
- ptrauth_keys_switch_kernel(&(tsk)->thread.keys_kernel)
-
#else /* CONFIG_ARM64_PTR_AUTH */
#define ptrauth_enable()
#define ptrauth_prctl_reset_keys(tsk, arg) (-EINVAL)
@@ -134,11 +133,19 @@ static __always_inline void ptrauth_enable(void)
#define ptrauth_strip_insn_pac(lr) (lr)
#define ptrauth_suspend_exit()
#define ptrauth_thread_init_user()
-#define ptrauth_thread_init_kernel(tsk)
#define ptrauth_thread_switch_user(tsk)
-#define ptrauth_thread_switch_kernel(tsk)
#endif /* CONFIG_ARM64_PTR_AUTH */
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
+#define ptrauth_thread_init_kernel(tsk) \
+ ptrauth_keys_init_kernel(&(tsk)->thread.keys_kernel)
+#define ptrauth_thread_switch_kernel(tsk) \
+ ptrauth_keys_switch_kernel(&(tsk)->thread.keys_kernel)
+#else
+#define ptrauth_thread_init_kernel(tsk)
+#define ptrauth_thread_switch_kernel(tsk)
+#endif /* CONFIG_ARM64_PTR_AUTH_KERNEL */
+
#define PR_PAC_ENABLED_KEYS_MASK \
(PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY)
diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h
index 80e946b2abee..e83f0982b99c 100644
--- a/arch/arm64/include/asm/preempt.h
+++ b/arch/arm64/include/asm/preempt.h
@@ -23,7 +23,7 @@ static inline void preempt_count_set(u64 pc)
} while (0)
#define init_idle_preempt_count(p, cpu) do { \
- task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \
+ task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \
} while (0)
static inline void set_preempt_need_resched(void)
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 9df3feeee890..b6517fd03d7b 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -148,8 +148,10 @@ struct thread_struct {
struct debug_info debug; /* debugging */
#ifdef CONFIG_ARM64_PTR_AUTH
struct ptrauth_keys_user keys_user;
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
struct ptrauth_keys_kernel keys_kernel;
#endif
+#endif
#ifdef CONFIG_ARM64_MTE
u64 gcr_user_excl;
#endif
@@ -257,8 +259,6 @@ void set_task_sctlr_el1(u64 sctlr);
extern struct task_struct *cpu_switch_to(struct task_struct *prev,
struct task_struct *next);
-asmlinkage void arm64_preempt_schedule_irq(void);
-
#define task_pt_regs(p) \
((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1)
@@ -329,13 +329,13 @@ long get_tagged_addr_ctrl(struct task_struct *task);
* of header definitions for the use of task_stack_page.
*/
-#define current_top_of_stack() \
-({ \
- struct stack_info _info; \
- BUG_ON(!on_accessible_stack(current, current_stack_pointer, &_info)); \
- _info.high; \
+#define current_top_of_stack() \
+({ \
+ struct stack_info _info; \
+ BUG_ON(!on_accessible_stack(current, current_stack_pointer, 1, &_info)); \
+ _info.high; \
})
-#define on_thread_stack() (on_task_stack(current, current_stack_pointer, NULL))
+#define on_thread_stack() (on_task_stack(current, current_stack_pointer, 1, NULL))
#endif /* __ASSEMBLY__ */
#endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
index eaa2cd92e4c1..8297bccf0784 100644
--- a/arch/arm64/include/asm/scs.h
+++ b/arch/arm64/include/asm/scs.h
@@ -9,18 +9,18 @@
#ifdef CONFIG_SHADOW_CALL_STACK
scs_sp .req x18
- .macro scs_load tsk, tmp
+ .macro scs_load tsk
ldr scs_sp, [\tsk, #TSK_TI_SCS_SP]
.endm
- .macro scs_save tsk, tmp
+ .macro scs_save tsk
str scs_sp, [\tsk, #TSK_TI_SCS_SP]
.endm
#else
- .macro scs_load tsk, tmp
+ .macro scs_load tsk
.endm
- .macro scs_save tsk, tmp
+ .macro scs_save tsk
.endm
#endif /* CONFIG_SHADOW_CALL_STACK */
diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h
index 63e0b92a5fbb..7bea1d705dd6 100644
--- a/arch/arm64/include/asm/sdei.h
+++ b/arch/arm64/include/asm/sdei.h
@@ -37,13 +37,17 @@ struct sdei_registered_event;
asmlinkage unsigned long __sdei_handler(struct pt_regs *regs,
struct sdei_registered_event *arg);
+unsigned long do_sdei_event(struct pt_regs *regs,
+ struct sdei_registered_event *arg);
+
unsigned long sdei_arch_get_entry_point(int conduit);
#define sdei_arch_get_entry_point(x) sdei_arch_get_entry_point(x)
struct stack_info;
-bool _on_sdei_stack(unsigned long sp, struct stack_info *info);
-static inline bool on_sdei_stack(unsigned long sp,
+bool _on_sdei_stack(unsigned long sp, unsigned long size,
+ struct stack_info *info);
+static inline bool on_sdei_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
if (!IS_ENABLED(CONFIG_VMAP_STACK))
@@ -51,7 +55,7 @@ static inline bool on_sdei_stack(unsigned long sp,
if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
return false;
if (in_nmi())
- return _on_sdei_stack(sp, info);
+ return _on_sdei_stack(sp, size, info);
return false;
}
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 0e357757c0cc..fc55f5a57a06 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -73,12 +73,10 @@ asmlinkage void secondary_start_kernel(void);
/*
* Initial data for bringing up a secondary CPU.
- * @stack - sp for the secondary CPU
* @status - Result passed back from the secondary CPU to
* indicate failure.
*/
struct secondary_data {
- void *stack;
struct task_struct *task;
long status;
};
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 4b33ca620679..1801399204d7 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -69,14 +69,14 @@ extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
-static inline bool on_stack(unsigned long sp, unsigned long low,
- unsigned long high, enum stack_type type,
- struct stack_info *info)
+static inline bool on_stack(unsigned long sp, unsigned long size,
+ unsigned long low, unsigned long high,
+ enum stack_type type, struct stack_info *info)
{
if (!low)
return false;
- if (sp < low || sp >= high)
+ if (sp < low || sp + size < sp || sp + size > high)
return false;
if (info) {
@@ -87,38 +87,38 @@ static inline bool on_stack(unsigned long sp, unsigned long low,
return true;
}
-static inline bool on_irq_stack(unsigned long sp,
+static inline bool on_irq_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr);
unsigned long high = low + IRQ_STACK_SIZE;
- return on_stack(sp, low, high, STACK_TYPE_IRQ, info);
+ return on_stack(sp, size, low, high, STACK_TYPE_IRQ, info);
}
static inline bool on_task_stack(const struct task_struct *tsk,
- unsigned long sp,
+ unsigned long sp, unsigned long size,
struct stack_info *info)
{
unsigned long low = (unsigned long)task_stack_page(tsk);
unsigned long high = low + THREAD_SIZE;
- return on_stack(sp, low, high, STACK_TYPE_TASK, info);
+ return on_stack(sp, size, low, high, STACK_TYPE_TASK, info);
}
#ifdef CONFIG_VMAP_STACK
DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
-static inline bool on_overflow_stack(unsigned long sp,
+static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
struct stack_info *info)
{
unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack);
unsigned long high = low + OVERFLOW_STACK_SIZE;
- return on_stack(sp, low, high, STACK_TYPE_OVERFLOW, info);
+ return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
}
#else
-static inline bool on_overflow_stack(unsigned long sp,
+static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
struct stack_info *info) { return false; }
#endif
@@ -128,21 +128,21 @@ static inline bool on_overflow_stack(unsigned long sp,
* context.
*/
static inline bool on_accessible_stack(const struct task_struct *tsk,
- unsigned long sp,
+ unsigned long sp, unsigned long size,
struct stack_info *info)
{
if (info)
info->type = STACK_TYPE_UNKNOWN;
- if (on_task_stack(tsk, sp, info))
+ if (on_task_stack(tsk, sp, size, info))
return true;
if (tsk != current || preemptible())
return false;
- if (on_irq_stack(sp, info))
+ if (on_irq_stack(sp, size, info))
return true;
- if (on_overflow_stack(sp, info))
+ if (on_overflow_stack(sp, size, info))
return true;
- if (on_sdei_stack(sp, info))
+ if (on_sdei_stack(sp, size, info))
return true;
return false;
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 65d15700a168..7b9c3acba684 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -651,7 +651,8 @@
#define INIT_SCTLR_EL2_MMU_ON \
(SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \
- SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | SCTLR_EL2_RES1)
+ SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | \
+ SCTLR_ELx_ITFSB | SCTLR_EL2_RES1)
#define INIT_SCTLR_EL2_MMU_OFF \
(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
@@ -703,9 +704,7 @@
/* MAIR_ELx memory attributes (used by Linux) */
#define MAIR_ATTR_DEVICE_nGnRnE UL(0x00)
#define MAIR_ATTR_DEVICE_nGnRE UL(0x04)
-#define MAIR_ATTR_DEVICE_GRE UL(0x0c)
#define MAIR_ATTR_NORMAL_NC UL(0x44)
-#define MAIR_ATTR_NORMAL_WT UL(0xbb)
#define MAIR_ATTR_NORMAL_TAGGED UL(0xf0)
#define MAIR_ATTR_NORMAL UL(0xff)
#define MAIR_ATTR_MASK UL(0xff)
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 61c97d3b58c7..c995d1f4594f 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -28,6 +28,10 @@ static void tlb_flush(struct mmu_gather *tlb);
*/
static inline int tlb_get_level(struct mmu_gather *tlb)
{
+ /* The TTL field is only valid for the leaf entry. */
+ if (tlb->freed_tables)
+ return 0;
+
if (tlb->cleared_ptes && !(tlb->cleared_pmds ||
tlb->cleared_puds ||
tlb->cleared_p4ds))
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 5dab69d2c22b..99ffcafc736c 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -893,7 +893,8 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
__SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2)
#define __NR_mount_setattr 442
__SYSCALL(__NR_mount_setattr, sys_mount_setattr)
-/* 443 is reserved for quotactl_path */
+#define __NR_quotactl_fd 443
+__SYSCALL(__NR_quotactl_fd, sys_quotactl_fd)
#define __NR_landlock_create_ruleset 444
__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
#define __NR_landlock_add_rule 445
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 24223adae150..b3edde68bc3e 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -184,6 +184,17 @@ struct kvm_vcpu_events {
__u32 reserved[12];
};
+struct kvm_arm_copy_mte_tags {
+ __u64 guest_ipa;
+ __u64 length;
+ void __user *addr;
+ __u64 flags;
+ __u64 reserved[2];
+};
+
+#define KVM_ARM_TAGS_TO_GUEST 0
+#define KVM_ARM_TAGS_FROM_GUEST 1
+
/* If you need to interpret the index values, here is the key: */
#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
#define KVM_REG_ARM_COPROC_SHIFT 16
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 6cc97730790e..cce308586fcc 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -14,15 +14,22 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong
CFLAGS_syscall.o += -fno-stack-protector
+# It's not safe to invoke KCOV when portions of the kernel environment aren't
+# available or are out-of-sync with HW state. Since `noinstr` doesn't always
+# inhibit KCOV instrumentation, disable it for the entire compilation unit.
+KCOV_INSTRUMENT_entry.o := n
+KCOV_INSTRUMENT_idle.o := n
+
# Object file lists.
obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
entry-common.o entry-fpsimd.o process.o ptrace.o \
setup.o signal.o sys.o stacktrace.o time.o traps.o \
- io.o vdso.o hyp-stub.o psci.o cpu_ops.o insn.o \
+ io.o vdso.o hyp-stub.o psci.o cpu_ops.o \
return_address.o cpuinfo.o cpu_errata.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
- syscall.o proton-pack.o idreg-override.o
+ syscall.o proton-pack.o idreg-override.o idle.o \
+ patching.o
targets += efi-entry.o
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index cada0b816c8a..f3851724fe35 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -239,6 +239,18 @@ done:
}
}
+static pgprot_t __acpi_get_writethrough_mem_attribute(void)
+{
+ /*
+ * Although UEFI specifies the use of Normal Write-through for
+ * EFI_MEMORY_WT, it is seldom used in practice and not implemented
+ * by most (all?) CPUs. Rather than allocate a MAIR just for this
+ * purpose, emit a warning and use Normal Non-cacheable instead.
+ */
+ pr_warn_once("No MAIR allocation for EFI_MEMORY_WT; treating as Normal Non-cacheable\n");
+ return __pgprot(PROT_NORMAL_NC);
+}
+
pgprot_t __acpi_get_mem_attribute(phys_addr_t addr)
{
/*
@@ -246,7 +258,7 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr)
* types" of UEFI 2.5 section 2.3.6.1, each EFI memory type is
* mapped to a corresponding MAIR attribute encoding.
* The EFI memory attribute advises all possible capabilities
- * of a memory region. We use the most efficient capability.
+ * of a memory region.
*/
u64 attr;
@@ -254,10 +266,10 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr)
attr = efi_mem_attributes(addr);
if (attr & EFI_MEMORY_WB)
return PAGE_KERNEL;
- if (attr & EFI_MEMORY_WT)
- return __pgprot(PROT_NORMAL_WT);
if (attr & EFI_MEMORY_WC)
return __pgprot(PROT_NORMAL_NC);
+ if (attr & EFI_MEMORY_WT)
+ return __acpi_get_writethrough_mem_attribute();
return __pgprot(PROT_DEVICE_nGnRnE);
}
@@ -340,10 +352,10 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
default:
if (region->attribute & EFI_MEMORY_WB)
prot = PAGE_KERNEL;
- else if (region->attribute & EFI_MEMORY_WT)
- prot = __pgprot(PROT_NORMAL_WT);
else if (region->attribute & EFI_MEMORY_WC)
prot = __pgprot(PROT_NORMAL_NC);
+ else if (region->attribute & EFI_MEMORY_WT)
+ prot = __acpi_get_writethrough_mem_attribute();
}
}
return __ioremap(phys, size, prot);
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index c906d20c7b52..3fb79b76e9d9 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -181,7 +181,7 @@ static void __nocfi __apply_alternatives(struct alt_region *region, bool is_modu
*/
if (!is_module) {
dsb(ish);
- __flush_icache_all();
+ icache_inval_all_pou();
isb();
/* Ignore ARM64_CB bit from feature mask */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 0cb34ccb6e73..c85670692afa 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -27,6 +27,7 @@
int main(void)
{
DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm));
+ DEFINE(TSK_CPU, offsetof(struct task_struct, cpu));
BLANK();
DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
@@ -46,6 +47,8 @@ int main(void)
DEFINE(THREAD_SCTLR_USER, offsetof(struct task_struct, thread.sctlr_user));
#ifdef CONFIG_ARM64_PTR_AUTH
DEFINE(THREAD_KEYS_USER, offsetof(struct task_struct, thread.keys_user));
+#endif
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
DEFINE(THREAD_KEYS_KERNEL, offsetof(struct task_struct, thread.keys_kernel));
#endif
#ifdef CONFIG_ARM64_MTE
@@ -99,7 +102,6 @@ int main(void)
DEFINE(SOFTIRQ_SHIFT, SOFTIRQ_SHIFT);
DEFINE(IRQ_CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending));
BLANK();
- DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK();
DEFINE(FTR_OVR_VAL_OFFSET, offsetof(struct arm64_ftr_override, val));
@@ -111,6 +113,8 @@ int main(void)
DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags));
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs));
+ DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1]));
+ DEFINE(CPU_GCR_EL1, offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1]));
DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1]));
DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1]));
@@ -138,6 +142,15 @@ int main(void)
DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2));
DEFINE(ARM_SMCCC_QUIRK_ID_OFFS, offsetof(struct arm_smccc_quirk, id));
DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS, offsetof(struct arm_smccc_quirk, state));
+ DEFINE(ARM_SMCCC_1_2_REGS_X0_OFFS, offsetof(struct arm_smccc_1_2_regs, a0));
+ DEFINE(ARM_SMCCC_1_2_REGS_X2_OFFS, offsetof(struct arm_smccc_1_2_regs, a2));
+ DEFINE(ARM_SMCCC_1_2_REGS_X4_OFFS, offsetof(struct arm_smccc_1_2_regs, a4));
+ DEFINE(ARM_SMCCC_1_2_REGS_X6_OFFS, offsetof(struct arm_smccc_1_2_regs, a6));
+ DEFINE(ARM_SMCCC_1_2_REGS_X8_OFFS, offsetof(struct arm_smccc_1_2_regs, a8));
+ DEFINE(ARM_SMCCC_1_2_REGS_X10_OFFS, offsetof(struct arm_smccc_1_2_regs, a10));
+ DEFINE(ARM_SMCCC_1_2_REGS_X12_OFFS, offsetof(struct arm_smccc_1_2_regs, a12));
+ DEFINE(ARM_SMCCC_1_2_REGS_X14_OFFS, offsetof(struct arm_smccc_1_2_regs, a14));
+ DEFINE(ARM_SMCCC_1_2_REGS_X16_OFFS, offsetof(struct arm_smccc_1_2_regs, a16));
BLANK();
DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address));
DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address));
@@ -153,7 +166,9 @@ int main(void)
#endif
#ifdef CONFIG_ARM64_PTR_AUTH
DEFINE(PTRAUTH_USER_KEY_APIA, offsetof(struct ptrauth_keys_user, apia));
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
DEFINE(PTRAUTH_KERNEL_KEY_APIA, offsetof(struct ptrauth_keys_kernel, apia));
+#endif
BLANK();
#endif
return 0;
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index efed2830d141..125d5c9471ac 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -76,6 +76,7 @@
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
#include <asm/fpsimd.h>
+#include <asm/insn.h>
#include <asm/kvm_host.h>
#include <asm/mmu_context.h>
#include <asm/mte.h>
@@ -108,6 +109,24 @@ bool arm64_use_ng_mappings = false;
EXPORT_SYMBOL(arm64_use_ng_mappings);
/*
+ * Permit PER_LINUX32 and execve() of 32-bit binaries even if not all CPUs
+ * support it?
+ */
+static bool __read_mostly allow_mismatched_32bit_el0;
+
+/*
+ * Static branch enabled only if allow_mismatched_32bit_el0 is set and we have
+ * seen at least one CPU capable of 32-bit EL0.
+ */
+DEFINE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0);
+
+/*
+ * Mask of CPUs supporting 32-bit EL0.
+ * Only valid if arm64_mismatched_32bit_el0 is enabled.
+ */
+static cpumask_var_t cpu_32bit_el0_mask __cpumask_var_read_mostly;
+
+/*
* Flag to indicate if we have computed the system wide
* capabilities based on the boot time active CPUs. This
* will be used to determine if a new booting CPU should
@@ -400,6 +419,11 @@ static const struct arm64_ftr_bits ftr_dczid[] = {
ARM64_FTR_END,
};
+static const struct arm64_ftr_bits ftr_gmid[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, SYS_GMID_EL1_BS_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
static const struct arm64_ftr_bits ftr_id_isar0[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DIVIDE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DEBUG_SHIFT, 4, 0),
@@ -617,6 +641,9 @@ static const struct __ftr_reg_entry {
/* Op1 = 0, CRn = 1, CRm = 2 */
ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr),
+ /* Op1 = 1, CRn = 0, CRm = 0 */
+ ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid),
+
/* Op1 = 3, CRn = 0, CRm = 0 */
{ SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 },
ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid),
@@ -767,7 +794,7 @@ static void __init sort_ftr_regs(void)
* Any bits that are not covered by an arm64_ftr_bits entry are considered
* RES0 for the system-wide value, and must strictly match.
*/
-static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
+static void init_cpu_ftr_reg(u32 sys_reg, u64 new)
{
u64 val = 0;
u64 strict_mask = ~0x0ULL;
@@ -863,6 +890,31 @@ static void __init init_cpu_hwcaps_indirect_list(void)
static void __init setup_boot_cpu_capabilities(void);
+static void init_32bit_cpu_features(struct cpuinfo_32bit *info)
+{
+ init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
+ init_cpu_ftr_reg(SYS_ID_DFR1_EL1, info->reg_id_dfr1);
+ init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
+ init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
+ init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
+ init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
+ init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
+ init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
+ init_cpu_ftr_reg(SYS_ID_ISAR6_EL1, info->reg_id_isar6);
+ init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
+ init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
+ init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
+ init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
+ init_cpu_ftr_reg(SYS_ID_MMFR4_EL1, info->reg_id_mmfr4);
+ init_cpu_ftr_reg(SYS_ID_MMFR5_EL1, info->reg_id_mmfr5);
+ init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
+ init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
+ init_cpu_ftr_reg(SYS_ID_PFR2_EL1, info->reg_id_pfr2);
+ init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
+ init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
+ init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
+}
+
void __init init_cpu_features(struct cpuinfo_arm64 *info)
{
/* Before we start using the tables, make sure it is sorted */
@@ -882,35 +934,17 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0);
- if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
- init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
- init_cpu_ftr_reg(SYS_ID_DFR1_EL1, info->reg_id_dfr1);
- init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
- init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
- init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
- init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
- init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
- init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
- init_cpu_ftr_reg(SYS_ID_ISAR6_EL1, info->reg_id_isar6);
- init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
- init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
- init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
- init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
- init_cpu_ftr_reg(SYS_ID_MMFR4_EL1, info->reg_id_mmfr4);
- init_cpu_ftr_reg(SYS_ID_MMFR5_EL1, info->reg_id_mmfr5);
- init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
- init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
- init_cpu_ftr_reg(SYS_ID_PFR2_EL1, info->reg_id_pfr2);
- init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
- init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
- init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
- }
+ if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
+ init_32bit_cpu_features(&info->aarch32);
if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
sve_init_vq_map();
}
+ if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
+ init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid);
+
/*
* Initialize the indirect array of CPU hwcaps capabilities pointers
* before we handle the boot CPU below.
@@ -975,21 +1009,29 @@ static void relax_cpu_ftr_reg(u32 sys_id, int field)
WARN_ON(!ftrp->width);
}
-static int update_32bit_cpu_features(int cpu, struct cpuinfo_arm64 *info,
- struct cpuinfo_arm64 *boot)
+static void lazy_init_32bit_cpu_features(struct cpuinfo_arm64 *info,
+ struct cpuinfo_arm64 *boot)
+{
+ static bool boot_cpu_32bit_regs_overridden = false;
+
+ if (!allow_mismatched_32bit_el0 || boot_cpu_32bit_regs_overridden)
+ return;
+
+ if (id_aa64pfr0_32bit_el0(boot->reg_id_aa64pfr0))
+ return;
+
+ boot->aarch32 = info->aarch32;
+ init_32bit_cpu_features(&boot->aarch32);
+ boot_cpu_32bit_regs_overridden = true;
+}
+
+static int update_32bit_cpu_features(int cpu, struct cpuinfo_32bit *info,
+ struct cpuinfo_32bit *boot)
{
int taint = 0;
u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
/*
- * If we don't have AArch32 at all then skip the checks entirely
- * as the register values may be UNKNOWN and we're not going to be
- * using them for anything.
- */
- if (!id_aa64pfr0_32bit_el0(pfr0))
- return taint;
-
- /*
* If we don't have AArch32 at EL1, then relax the strictness of
* EL1-dependent register fields to avoid spurious sanity check fails.
*/
@@ -1135,10 +1177,29 @@ void update_cpu_features(int cpu,
}
/*
+ * The kernel uses the LDGM/STGM instructions and the number of tags
+ * they read/write depends on the GMID_EL1.BS field. Check that the
+ * value is the same on all CPUs.
+ */
+ if (IS_ENABLED(CONFIG_ARM64_MTE) &&
+ id_aa64pfr1_mte(info->reg_id_aa64pfr1)) {
+ taint |= check_update_ftr_reg(SYS_GMID_EL1, cpu,
+ info->reg_gmid, boot->reg_gmid);
+ }
+
+ /*
+ * If we don't have AArch32 at all then skip the checks entirely
+ * as the register values may be UNKNOWN and we're not going to be
+ * using them for anything.
+ *
* This relies on a sanitised view of the AArch64 ID registers
* (e.g. SYS_ID_AA64PFR0_EL1), so we call it last.
*/
- taint |= update_32bit_cpu_features(cpu, info, boot);
+ if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
+ lazy_init_32bit_cpu_features(info, boot);
+ taint |= update_32bit_cpu_features(cpu, &info->aarch32,
+ &boot->aarch32);
+ }
/*
* Mismatched CPU features are a recipe for disaster. Don't even
@@ -1248,6 +1309,28 @@ has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope)
return feature_matches(val, entry);
}
+const struct cpumask *system_32bit_el0_cpumask(void)
+{
+ if (!system_supports_32bit_el0())
+ return cpu_none_mask;
+
+ if (static_branch_unlikely(&arm64_mismatched_32bit_el0))
+ return cpu_32bit_el0_mask;
+
+ return cpu_possible_mask;
+}
+
+static bool has_32bit_el0(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ if (!has_cpuid_feature(entry, scope))
+ return allow_mismatched_32bit_el0;
+
+ if (scope == SCOPE_SYSTEM)
+ pr_info("detected: 32-bit EL0 Support\n");
+
+ return true;
+}
+
static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry, int scope)
{
bool has_sre;
@@ -1866,10 +1949,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.cpu_enable = cpu_copy_el2regs,
},
{
- .desc = "32-bit EL0 Support",
- .capability = ARM64_HAS_32BIT_EL0,
+ .capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
- .matches = has_cpuid_feature,
+ .matches = has_32bit_el0,
.sys_reg = SYS_ID_AA64PFR0_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64PFR0_EL0_SHIFT,
@@ -2378,7 +2460,7 @@ static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = {
{},
};
-static void __init cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap)
+static void cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap)
{
switch (cap->hwcap_type) {
case CAP_HWCAP:
@@ -2423,7 +2505,7 @@ static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap)
return rc;
}
-static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
+static void setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
{
/* We support emulation of accesses to CPU ID feature registers */
cpu_set_named_feature(CPUID);
@@ -2598,7 +2680,7 @@ static void check_early_cpu_features(void)
}
static void
-verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps)
+__verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps)
{
for (; caps->matches; caps++)
@@ -2609,6 +2691,14 @@ verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps)
}
}
+static void verify_local_elf_hwcaps(void)
+{
+ __verify_local_elf_hwcaps(arm64_elf_hwcaps);
+
+ if (id_aa64pfr0_32bit_el0(read_cpuid(ID_AA64PFR0_EL1)))
+ __verify_local_elf_hwcaps(compat_elf_hwcaps);
+}
+
static void verify_sve_features(void)
{
u64 safe_zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
@@ -2673,11 +2763,7 @@ static void verify_local_cpu_capabilities(void)
* on all secondary CPUs.
*/
verify_local_cpu_caps(SCOPE_ALL & ~SCOPE_BOOT_CPU);
-
- verify_local_elf_hwcaps(arm64_elf_hwcaps);
-
- if (system_supports_32bit_el0())
- verify_local_elf_hwcaps(compat_elf_hwcaps);
+ verify_local_elf_hwcaps();
if (system_supports_sve())
verify_sve_features();
@@ -2812,6 +2898,34 @@ void __init setup_cpu_features(void)
ARCH_DMA_MINALIGN);
}
+static int enable_mismatched_32bit_el0(unsigned int cpu)
+{
+ struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu);
+ bool cpu_32bit = id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0);
+
+ if (cpu_32bit) {
+ cpumask_set_cpu(cpu, cpu_32bit_el0_mask);
+ static_branch_enable_cpuslocked(&arm64_mismatched_32bit_el0);
+ setup_elf_hwcaps(compat_elf_hwcaps);
+ }
+
+ return 0;
+}
+
+static int __init init_32bit_el0_mask(void)
+{
+ if (!allow_mismatched_32bit_el0)
+ return 0;
+
+ if (!zalloc_cpumask_var(&cpu_32bit_el0_mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ return cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "arm64/mismatched_32bit_el0:online",
+ enable_mismatched_32bit_el0, NULL);
+}
+subsys_initcall_sync(init_32bit_el0_mask);
+
static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap)
{
cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
@@ -2905,8 +3019,8 @@ static int emulate_mrs(struct pt_regs *regs, u32 insn)
}
static struct undef_hook mrs_hook = {
- .instr_mask = 0xfff00000,
- .instr_val = 0xd5300000,
+ .instr_mask = 0xffff0000,
+ .instr_val = 0xd5380000,
.pstate_mask = PSR_AA32_MODE_MASK,
.pstate_val = PSR_MODE_EL0t,
.fn = emulate_mrs,
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 51fcf99d5351..87731fea5e41 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -246,7 +246,7 @@ static struct kobj_type cpuregs_kobj_type = {
struct cpuinfo_arm64 *info = kobj_to_cpuinfo(kobj); \
\
if (info->reg_midr) \
- return sprintf(buf, "0x%016x\n", info->reg_##_field); \
+ return sprintf(buf, "0x%016llx\n", info->reg_##_field); \
else \
return 0; \
} \
@@ -344,6 +344,32 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
}
+static void __cpuinfo_store_cpu_32bit(struct cpuinfo_32bit *info)
+{
+ info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1);
+ info->reg_id_dfr1 = read_cpuid(ID_DFR1_EL1);
+ info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1);
+ info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1);
+ info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1);
+ info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1);
+ info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1);
+ info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1);
+ info->reg_id_isar6 = read_cpuid(ID_ISAR6_EL1);
+ info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1);
+ info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
+ info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
+ info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1);
+ info->reg_id_mmfr4 = read_cpuid(ID_MMFR4_EL1);
+ info->reg_id_mmfr5 = read_cpuid(ID_MMFR5_EL1);
+ info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1);
+ info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
+ info->reg_id_pfr2 = read_cpuid(ID_PFR2_EL1);
+
+ info->reg_mvfr0 = read_cpuid(MVFR0_EL1);
+ info->reg_mvfr1 = read_cpuid(MVFR1_EL1);
+ info->reg_mvfr2 = read_cpuid(MVFR2_EL1);
+}
+
static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
{
info->reg_cntfrq = arch_timer_get_cntfrq();
@@ -371,31 +397,11 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1);
- /* Update the 32bit ID registers only if AArch32 is implemented */
- if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
- info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1);
- info->reg_id_dfr1 = read_cpuid(ID_DFR1_EL1);
- info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1);
- info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1);
- info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1);
- info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1);
- info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1);
- info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1);
- info->reg_id_isar6 = read_cpuid(ID_ISAR6_EL1);
- info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1);
- info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
- info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
- info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1);
- info->reg_id_mmfr4 = read_cpuid(ID_MMFR4_EL1);
- info->reg_id_mmfr5 = read_cpuid(ID_MMFR5_EL1);
- info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1);
- info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
- info->reg_id_pfr2 = read_cpuid(ID_PFR2_EL1);
-
- info->reg_mvfr0 = read_cpuid(MVFR0_EL1);
- info->reg_mvfr1 = read_cpuid(MVFR1_EL1);
- info->reg_mvfr2 = read_cpuid(MVFR2_EL1);
- }
+ if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
+ info->reg_gmid = read_cpuid(GMID_EL1);
+
+ if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
+ __cpuinfo_store_cpu_32bit(&info->aarch32);
if (IS_ENABLED(CONFIG_ARM64_SVE) &&
id_aa64pfr0_sve(info->reg_id_aa64pfr0))
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index 0073b24b5d25..61a87fa1c305 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -28,7 +28,8 @@ SYM_CODE_START(efi_enter_kernel)
* stale icache entries from before relocation.
*/
ldr w1, =kernel_size
- bl __clean_dcache_area_poc
+ add x1, x0, x1
+ bl dcache_clean_poc
ic ialluis
/*
@@ -36,8 +37,8 @@ SYM_CODE_START(efi_enter_kernel)
* so that we can safely disable the MMU and caches.
*/
adr x0, 0f
- ldr w1, 3f
- bl __clean_dcache_area_poc
+ adr x1, 3f
+ bl dcache_clean_poc
0:
/* Turn off Dcache and MMU */
mrs x0, CurrentEL
@@ -64,5 +65,5 @@ SYM_CODE_START(efi_enter_kernel)
mov x2, xzr
mov x3, xzr
br x19
+3:
SYM_CODE_END(efi_enter_kernel)
-3: .long . - 0b
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 340d04e13617..12ce14a98b7c 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -6,7 +6,11 @@
*/
#include <linux/context_tracking.h>
+#include <linux/linkage.h>
+#include <linux/lockdep.h>
#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
#include <linux/thread_info.h>
#include <asm/cpufeature.h>
@@ -15,7 +19,11 @@
#include <asm/exception.h>
#include <asm/kprobes.h>
#include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/sdei.h>
+#include <asm/stacktrace.h>
#include <asm/sysreg.h>
+#include <asm/system_misc.h>
/*
* This is intended to match the logic in irqentry_enter(), handling the kernel
@@ -67,7 +75,7 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
}
}
-void noinstr arm64_enter_nmi(struct pt_regs *regs)
+static void noinstr arm64_enter_nmi(struct pt_regs *regs)
{
regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
@@ -80,7 +88,7 @@ void noinstr arm64_enter_nmi(struct pt_regs *regs)
ftrace_nmi_enter();
}
-void noinstr arm64_exit_nmi(struct pt_regs *regs)
+static void noinstr arm64_exit_nmi(struct pt_regs *regs)
{
bool restore = regs->lockdep_hardirqs;
@@ -97,7 +105,7 @@ void noinstr arm64_exit_nmi(struct pt_regs *regs)
__nmi_exit();
}
-asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs)
+static void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs)
{
if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
arm64_enter_nmi(regs);
@@ -105,7 +113,7 @@ asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs)
enter_from_kernel_mode(regs);
}
-asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
+static void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
{
if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
arm64_exit_nmi(regs);
@@ -113,6 +121,65 @@ asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
exit_to_kernel_mode(regs);
}
+static void __sched arm64_preempt_schedule_irq(void)
+{
+ lockdep_assert_irqs_disabled();
+
+ /*
+ * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
+ * priority masking is used the GIC irqchip driver will clear DAIF.IF
+ * using gic_arch_enable_irqs() for normal IRQs. If anything is set in
+ * DAIF we must have handled an NMI, so skip preemption.
+ */
+ if (system_uses_irq_prio_masking() && read_sysreg(daif))
+ return;
+
+ /*
+ * Preempting a task from an IRQ means we leave copies of PSTATE
+ * on the stack. cpufeature's enable calls may modify PSTATE, but
+ * resuming one of these preempted tasks would undo those changes.
+ *
+ * Only allow a task to be preempted once cpufeatures have been
+ * enabled.
+ */
+ if (system_capabilities_finalized())
+ preempt_schedule_irq();
+}
+
+static void do_interrupt_handler(struct pt_regs *regs,
+ void (*handler)(struct pt_regs *))
+{
+ if (on_thread_stack())
+ call_on_irq_stack(regs, handler);
+ else
+ handler(regs);
+}
+
+extern void (*handle_arch_irq)(struct pt_regs *);
+extern void (*handle_arch_fiq)(struct pt_regs *);
+
+static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector,
+ unsigned int esr)
+{
+ arm64_enter_nmi(regs);
+
+ console_verbose();
+
+ pr_crit("Unhandled %s exception on CPU%d, ESR 0x%08x -- %s\n",
+ vector, smp_processor_id(), esr,
+ esr_get_class_string(esr));
+
+ __show_regs(regs);
+ panic("Unhandled exception");
+}
+
+#define UNHANDLED(el, regsize, vector) \
+asmlinkage void noinstr el##_##regsize##_##vector##_handler(struct pt_regs *regs) \
+{ \
+ const char *desc = #regsize "-bit " #el " " #vector; \
+ __panic_unhandled(regs, desc, read_sysreg(esr_el1)); \
+}
+
#ifdef CONFIG_ARM64_ERRATUM_1463225
static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
@@ -162,6 +229,11 @@ static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
}
#endif /* CONFIG_ARM64_ERRATUM_1463225 */
+UNHANDLED(el1t, 64, sync)
+UNHANDLED(el1t, 64, irq)
+UNHANDLED(el1t, 64, fiq)
+UNHANDLED(el1t, 64, error)
+
static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
@@ -193,15 +265,6 @@ static void noinstr el1_undef(struct pt_regs *regs)
exit_to_kernel_mode(regs);
}
-static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr)
-{
- enter_from_kernel_mode(regs);
- local_daif_inherit(regs);
- bad_mode(regs, 0, esr);
- local_daif_mask();
- exit_to_kernel_mode(regs);
-}
-
static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
{
regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
@@ -245,7 +308,7 @@ static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr)
exit_to_kernel_mode(regs);
}
-asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs)
+asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);
@@ -275,10 +338,50 @@ asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs)
el1_fpac(regs, esr);
break;
default:
- el1_inv(regs, esr);
+ __panic_unhandled(regs, "64-bit el1h sync", esr);
}
}
+static void noinstr el1_interrupt(struct pt_regs *regs,
+ void (*handler)(struct pt_regs *))
+{
+ write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
+
+ enter_el1_irq_or_nmi(regs);
+ do_interrupt_handler(regs, handler);
+
+ /*
+ * Note: thread_info::preempt_count includes both thread_info::count
+ * and thread_info::need_resched, and is not equivalent to
+ * preempt_count().
+ */
+ if (IS_ENABLED(CONFIG_PREEMPTION) &&
+ READ_ONCE(current_thread_info()->preempt_count) == 0)
+ arm64_preempt_schedule_irq();
+
+ exit_el1_irq_or_nmi(regs);
+}
+
+asmlinkage void noinstr el1h_64_irq_handler(struct pt_regs *regs)
+{
+ el1_interrupt(regs, handle_arch_irq);
+}
+
+asmlinkage void noinstr el1h_64_fiq_handler(struct pt_regs *regs)
+{
+ el1_interrupt(regs, handle_arch_fiq);
+}
+
+asmlinkage void noinstr el1h_64_error_handler(struct pt_regs *regs)
+{
+ unsigned long esr = read_sysreg(esr_el1);
+
+ local_daif_restore(DAIF_ERRCTX);
+ arm64_enter_nmi(regs);
+ do_serror(regs, esr);
+ arm64_exit_nmi(regs);
+}
+
asmlinkage void noinstr enter_from_user_mode(void)
{
lockdep_hardirqs_off(CALLER_ADDR0);
@@ -398,7 +501,7 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
enter_from_user_mode();
do_debug_exception(far, esr, regs);
- local_daif_restore(DAIF_PROCCTX_NOIRQ);
+ local_daif_restore(DAIF_PROCCTX);
}
static void noinstr el0_svc(struct pt_regs *regs)
@@ -415,7 +518,7 @@ static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
do_ptrauth_fault(regs, esr);
}
-asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs)
+asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);
@@ -468,6 +571,56 @@ asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs)
}
}
+static void noinstr el0_interrupt(struct pt_regs *regs,
+ void (*handler)(struct pt_regs *))
+{
+ enter_from_user_mode();
+
+ write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
+
+ if (regs->pc & BIT(55))
+ arm64_apply_bp_hardening();
+
+ do_interrupt_handler(regs, handler);
+}
+
+static void noinstr __el0_irq_handler_common(struct pt_regs *regs)
+{
+ el0_interrupt(regs, handle_arch_irq);
+}
+
+asmlinkage void noinstr el0t_64_irq_handler(struct pt_regs *regs)
+{
+ __el0_irq_handler_common(regs);
+}
+
+static void noinstr __el0_fiq_handler_common(struct pt_regs *regs)
+{
+ el0_interrupt(regs, handle_arch_fiq);
+}
+
+asmlinkage void noinstr el0t_64_fiq_handler(struct pt_regs *regs)
+{
+ __el0_fiq_handler_common(regs);
+}
+
+static void __el0_error_handler_common(struct pt_regs *regs)
+{
+ unsigned long esr = read_sysreg(esr_el1);
+
+ enter_from_user_mode();
+ local_daif_restore(DAIF_ERRCTX);
+ arm64_enter_nmi(regs);
+ do_serror(regs, esr);
+ arm64_exit_nmi(regs);
+ local_daif_restore(DAIF_PROCCTX);
+}
+
+asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
+{
+ __el0_error_handler_common(regs);
+}
+
#ifdef CONFIG_COMPAT
static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
{
@@ -483,7 +636,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs)
do_el0_svc_compat(regs);
}
-asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs)
+asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);
@@ -526,4 +679,71 @@ asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs)
el0_inv(regs, esr);
}
}
+
+asmlinkage void noinstr el0t_32_irq_handler(struct pt_regs *regs)
+{
+ __el0_irq_handler_common(regs);
+}
+
+asmlinkage void noinstr el0t_32_fiq_handler(struct pt_regs *regs)
+{
+ __el0_fiq_handler_common(regs);
+}
+
+asmlinkage void noinstr el0t_32_error_handler(struct pt_regs *regs)
+{
+ __el0_error_handler_common(regs);
+}
+#else /* CONFIG_COMPAT */
+UNHANDLED(el0t, 32, sync)
+UNHANDLED(el0t, 32, irq)
+UNHANDLED(el0t, 32, fiq)
+UNHANDLED(el0t, 32, error)
#endif /* CONFIG_COMPAT */
+
+#ifdef CONFIG_VMAP_STACK
+asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs)
+{
+ unsigned int esr = read_sysreg(esr_el1);
+ unsigned long far = read_sysreg(far_el1);
+
+ arm64_enter_nmi(regs);
+ panic_bad_stack(regs, esr, far);
+}
+#endif /* CONFIG_VMAP_STACK */
+
+#ifdef CONFIG_ARM_SDE_INTERFACE
+asmlinkage noinstr unsigned long
+__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
+{
+ unsigned long ret;
+
+ /*
+ * We didn't take an exception to get here, so the HW hasn't
+ * set/cleared bits in PSTATE that we may rely on.
+ *
+ * The original SDEI spec (ARM DEN 0054A) can be read ambiguously as to
+ * whether PSTATE bits are inherited unchanged or generated from
+ * scratch, and the TF-A implementation always clears PAN and always
+ * clears UAO. There are no other known implementations.
+ *
+ * Subsequent revisions (ARM DEN 0054B) follow the usual rules for how
+ * PSTATE is modified upon architectural exceptions, and so PAN is
+ * either inherited or set per SCTLR_ELx.SPAN, and UAO is always
+ * cleared.
+ *
+ * We must explicitly reset PAN to the expected state, including
+ * clearing it when the host isn't using it, in case a VM had it set.
+ */
+ if (system_uses_hw_pan())
+ set_pstate_pan(1);
+ else if (cpu_has_pan())
+ set_pstate_pan(0);
+
+ arm64_enter_nmi(regs);
+ ret = do_sdei_event(regs, arg);
+ arm64_exit_nmi(regs);
+
+ return ret;
+}
+#endif /* CONFIG_ARM_SDE_INTERFACE */
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 3ecec60d3295..0a7a64753878 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -63,16 +63,24 @@ SYM_FUNC_END(sve_set_vq)
* and the rest zeroed. All the other SVE registers will be zeroed.
*/
SYM_FUNC_START(sve_load_from_fpsimd_state)
- sve_load_vq x1, x2, x3
- fpsimd_restore x0, 8
- _for n, 0, 15, _sve_pfalse \n
- _sve_wrffr 0
- ret
+ sve_load_vq x1, x2, x3
+ fpsimd_restore x0, 8
+ sve_flush_p_ffr
+ ret
SYM_FUNC_END(sve_load_from_fpsimd_state)
-/* Zero all SVE registers but the first 128-bits of each vector */
+/*
+ * Zero all SVE registers but the first 128-bits of each vector
+ *
+ * VQ must already be configured by caller, any further updates of VQ
+ * will need to ensure that the register state remains valid.
+ *
+ * x0 = VQ - 1
+ */
SYM_FUNC_START(sve_flush_live)
- sve_flush
+ cbz x0, 1f // A VQ-1 of 0 is 128 bits so no extra Z state
+ sve_flush_z
+1: sve_flush_p_ffr
ret
SYM_FUNC_END(sve_flush_live)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 3513984a88bd..863d44f73028 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -33,12 +33,6 @@
* Context tracking and irqflag tracing need to instrument transitions between
* user and kernel mode.
*/
- .macro user_exit_irqoff
-#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS)
- bl enter_from_user_mode
-#endif
- .endm
-
.macro user_enter_irqoff
#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS)
bl exit_to_user_mode
@@ -51,16 +45,7 @@
.endr
.endm
-/*
- * Bad Abort numbers
- *-----------------
- */
-#define BAD_SYNC 0
-#define BAD_IRQ 1
-#define BAD_FIQ 2
-#define BAD_ERROR 3
-
- .macro kernel_ventry, el, label, regsize = 64
+ .macro kernel_ventry, el:req, ht:req, regsize:req, label:req
.align 7
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
.if \el == 0
@@ -87,7 +72,7 @@ alternative_else_nop_endif
tbnz x0, #THREAD_SHIFT, 0f
sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0
sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp
- b el\()\el\()_\label
+ b el\el\ht\()_\regsize\()_\label
0:
/*
@@ -119,7 +104,7 @@ alternative_else_nop_endif
sub sp, sp, x0
mrs x0, tpidrro_el0
#endif
- b el\()\el\()_\label
+ b el\el\ht\()_\regsize\()_\label
.endm
.macro tramp_alias, dst, sym
@@ -275,7 +260,7 @@ alternative_else_nop_endif
mte_set_kernel_gcr x22, x23
- scs_load tsk, x20
+ scs_load tsk
.else
add x21, sp, #PT_REGS_SIZE
get_current_task tsk
@@ -285,7 +270,7 @@ alternative_else_nop_endif
stp lr, x21, [sp, #S_LR]
/*
- * For exceptions from EL0, create a terminal frame record.
+ * For exceptions from EL0, create a final frame record.
* For exceptions from EL1, create a synthetic frame record so the
* interrupted code shows up in the backtrace.
*/
@@ -375,7 +360,7 @@ alternative_if ARM64_WORKAROUND_845719
alternative_else_nop_endif
#endif
3:
- scs_save tsk, x0
+ scs_save tsk
#ifdef CONFIG_ARM64_PTR_AUTH
alternative_if ARM64_HAS_ADDRESS_AUTH
@@ -486,63 +471,12 @@ SYM_CODE_START_LOCAL(__swpan_exit_el0)
SYM_CODE_END(__swpan_exit_el0)
#endif
- .macro irq_stack_entry
- mov x19, sp // preserve the original sp
-#ifdef CONFIG_SHADOW_CALL_STACK
- mov x24, scs_sp // preserve the original shadow stack
-#endif
-
- /*
- * Compare sp with the base of the task stack.
- * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack,
- * and should switch to the irq stack.
- */
- ldr x25, [tsk, TSK_STACK]
- eor x25, x25, x19
- and x25, x25, #~(THREAD_SIZE - 1)
- cbnz x25, 9998f
-
- ldr_this_cpu x25, irq_stack_ptr, x26
- mov x26, #IRQ_STACK_SIZE
- add x26, x25, x26
-
- /* switch to the irq stack */
- mov sp, x26
-
-#ifdef CONFIG_SHADOW_CALL_STACK
- /* also switch to the irq shadow stack */
- ldr_this_cpu scs_sp, irq_shadow_call_stack_ptr, x26
-#endif
-
-9998:
- .endm
-
- /*
- * The callee-saved regs (x19-x29) should be preserved between
- * irq_stack_entry and irq_stack_exit, but note that kernel_entry
- * uses x20-x23 to store data for later use.
- */
- .macro irq_stack_exit
- mov sp, x19
-#ifdef CONFIG_SHADOW_CALL_STACK
- mov scs_sp, x24
-#endif
- .endm
-
/* GPRs used by entry code */
tsk .req x28 // current thread_info
/*
* Interrupt handling.
*/
- .macro irq_handler, handler:req
- ldr_l x1, \handler
- mov x0, sp
- irq_stack_entry
- blr x1
- irq_stack_exit
- .endm
-
.macro gic_prio_kentry_setup, tmp:req
#ifdef CONFIG_ARM64_PSEUDO_NMI
alternative_if ARM64_HAS_IRQ_PRIO_MASKING
@@ -552,45 +486,6 @@ tsk .req x28 // current thread_info
#endif
.endm
- .macro el1_interrupt_handler, handler:req
- enable_da
-
- mov x0, sp
- bl enter_el1_irq_or_nmi
-
- irq_handler \handler
-
-#ifdef CONFIG_PREEMPTION
- ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
- /*
- * DA were cleared at start of handling, and IF are cleared by
- * the GIC irqchip driver using gic_arch_enable_irqs() for
- * normal IRQs. If anything is set, it means we come back from
- * an NMI instead of a normal IRQ, so skip preemption
- */
- mrs x0, daif
- orr x24, x24, x0
-alternative_else_nop_endif
- cbnz x24, 1f // preempt count != 0 || NMI return path
- bl arm64_preempt_schedule_irq // irq en/disable is done inside
-1:
-#endif
-
- mov x0, sp
- bl exit_el1_irq_or_nmi
- .endm
-
- .macro el0_interrupt_handler, handler:req
- user_exit_irqoff
- enable_da
-
- tbz x22, #55, 1f
- bl do_el0_irq_bp_hardening
-1:
- irq_handler \handler
- .endm
-
.text
/*
@@ -600,32 +495,25 @@ alternative_else_nop_endif
.align 11
SYM_CODE_START(vectors)
- kernel_ventry 1, sync_invalid // Synchronous EL1t
- kernel_ventry 1, irq_invalid // IRQ EL1t
- kernel_ventry 1, fiq_invalid // FIQ EL1t
- kernel_ventry 1, error_invalid // Error EL1t
-
- kernel_ventry 1, sync // Synchronous EL1h
- kernel_ventry 1, irq // IRQ EL1h
- kernel_ventry 1, fiq // FIQ EL1h
- kernel_ventry 1, error // Error EL1h
-
- kernel_ventry 0, sync // Synchronous 64-bit EL0
- kernel_ventry 0, irq // IRQ 64-bit EL0
- kernel_ventry 0, fiq // FIQ 64-bit EL0
- kernel_ventry 0, error // Error 64-bit EL0
-
-#ifdef CONFIG_COMPAT
- kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0
- kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0
- kernel_ventry 0, fiq_compat, 32 // FIQ 32-bit EL0
- kernel_ventry 0, error_compat, 32 // Error 32-bit EL0
-#else
- kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0
- kernel_ventry 0, irq_invalid, 32 // IRQ 32-bit EL0
- kernel_ventry 0, fiq_invalid, 32 // FIQ 32-bit EL0
- kernel_ventry 0, error_invalid, 32 // Error 32-bit EL0
-#endif
+ kernel_ventry 1, t, 64, sync // Synchronous EL1t
+ kernel_ventry 1, t, 64, irq // IRQ EL1t
+ kernel_ventry 1, t, 64, fiq // FIQ EL1h
+ kernel_ventry 1, t, 64, error // Error EL1t
+
+ kernel_ventry 1, h, 64, sync // Synchronous EL1h
+ kernel_ventry 1, h, 64, irq // IRQ EL1h
+ kernel_ventry 1, h, 64, fiq // FIQ EL1h
+ kernel_ventry 1, h, 64, error // Error EL1h
+
+ kernel_ventry 0, t, 64, sync // Synchronous 64-bit EL0
+ kernel_ventry 0, t, 64, irq // IRQ 64-bit EL0
+ kernel_ventry 0, t, 64, fiq // FIQ 64-bit EL0
+ kernel_ventry 0, t, 64, error // Error 64-bit EL0
+
+ kernel_ventry 0, t, 32, sync // Synchronous 32-bit EL0
+ kernel_ventry 0, t, 32, irq // IRQ 32-bit EL0
+ kernel_ventry 0, t, 32, fiq // FIQ 32-bit EL0
+ kernel_ventry 0, t, 32, error // Error 32-bit EL0
SYM_CODE_END(vectors)
#ifdef CONFIG_VMAP_STACK
@@ -656,147 +544,46 @@ __bad_stack:
ASM_BUG()
#endif /* CONFIG_VMAP_STACK */
-/*
- * Invalid mode handlers
- */
- .macro inv_entry, el, reason, regsize = 64
+
+ .macro entry_handler el:req, ht:req, regsize:req, label:req
+SYM_CODE_START_LOCAL(el\el\ht\()_\regsize\()_\label)
kernel_entry \el, \regsize
mov x0, sp
- mov x1, #\reason
- mrs x2, esr_el1
- bl bad_mode
- ASM_BUG()
+ bl el\el\ht\()_\regsize\()_\label\()_handler
+ .if \el == 0
+ b ret_to_user
+ .else
+ b ret_to_kernel
+ .endif
+SYM_CODE_END(el\el\ht\()_\regsize\()_\label)
.endm
-SYM_CODE_START_LOCAL(el0_sync_invalid)
- inv_entry 0, BAD_SYNC
-SYM_CODE_END(el0_sync_invalid)
-
-SYM_CODE_START_LOCAL(el0_irq_invalid)
- inv_entry 0, BAD_IRQ
-SYM_CODE_END(el0_irq_invalid)
-
-SYM_CODE_START_LOCAL(el0_fiq_invalid)
- inv_entry 0, BAD_FIQ
-SYM_CODE_END(el0_fiq_invalid)
-
-SYM_CODE_START_LOCAL(el0_error_invalid)
- inv_entry 0, BAD_ERROR
-SYM_CODE_END(el0_error_invalid)
-
-SYM_CODE_START_LOCAL(el1_sync_invalid)
- inv_entry 1, BAD_SYNC
-SYM_CODE_END(el1_sync_invalid)
-
-SYM_CODE_START_LOCAL(el1_irq_invalid)
- inv_entry 1, BAD_IRQ
-SYM_CODE_END(el1_irq_invalid)
-
-SYM_CODE_START_LOCAL(el1_fiq_invalid)
- inv_entry 1, BAD_FIQ
-SYM_CODE_END(el1_fiq_invalid)
-
-SYM_CODE_START_LOCAL(el1_error_invalid)
- inv_entry 1, BAD_ERROR
-SYM_CODE_END(el1_error_invalid)
-
/*
- * EL1 mode handlers.
+ * Early exception handlers
*/
- .align 6
-SYM_CODE_START_LOCAL_NOALIGN(el1_sync)
- kernel_entry 1
- mov x0, sp
- bl el1_sync_handler
- kernel_exit 1
-SYM_CODE_END(el1_sync)
-
- .align 6
-SYM_CODE_START_LOCAL_NOALIGN(el1_irq)
- kernel_entry 1
- el1_interrupt_handler handle_arch_irq
- kernel_exit 1
-SYM_CODE_END(el1_irq)
-
-SYM_CODE_START_LOCAL_NOALIGN(el1_fiq)
- kernel_entry 1
- el1_interrupt_handler handle_arch_fiq
- kernel_exit 1
-SYM_CODE_END(el1_fiq)
-
-/*
- * EL0 mode handlers.
- */
- .align 6
-SYM_CODE_START_LOCAL_NOALIGN(el0_sync)
- kernel_entry 0
- mov x0, sp
- bl el0_sync_handler
- b ret_to_user
-SYM_CODE_END(el0_sync)
-
-#ifdef CONFIG_COMPAT
- .align 6
-SYM_CODE_START_LOCAL_NOALIGN(el0_sync_compat)
- kernel_entry 0, 32
- mov x0, sp
- bl el0_sync_compat_handler
- b ret_to_user
-SYM_CODE_END(el0_sync_compat)
-
- .align 6
-SYM_CODE_START_LOCAL_NOALIGN(el0_irq_compat)
- kernel_entry 0, 32
- b el0_irq_naked
-SYM_CODE_END(el0_irq_compat)
-
-SYM_CODE_START_LOCAL_NOALIGN(el0_fiq_compat)
- kernel_entry 0, 32
- b el0_fiq_naked
-SYM_CODE_END(el0_fiq_compat)
-
-SYM_CODE_START_LOCAL_NOALIGN(el0_error_compat)
- kernel_entry 0, 32
- b el0_error_naked
-SYM_CODE_END(el0_error_compat)
-#endif
-
- .align 6
-SYM_CODE_START_LOCAL_NOALIGN(el0_irq)
- kernel_entry 0
-el0_irq_naked:
- el0_interrupt_handler handle_arch_irq
- b ret_to_user
-SYM_CODE_END(el0_irq)
-
-SYM_CODE_START_LOCAL_NOALIGN(el0_fiq)
- kernel_entry 0
-el0_fiq_naked:
- el0_interrupt_handler handle_arch_fiq
- b ret_to_user
-SYM_CODE_END(el0_fiq)
-
-SYM_CODE_START_LOCAL(el1_error)
- kernel_entry 1
- mrs x1, esr_el1
- enable_dbg
- mov x0, sp
- bl do_serror
+ entry_handler 1, t, 64, sync
+ entry_handler 1, t, 64, irq
+ entry_handler 1, t, 64, fiq
+ entry_handler 1, t, 64, error
+
+ entry_handler 1, h, 64, sync
+ entry_handler 1, h, 64, irq
+ entry_handler 1, h, 64, fiq
+ entry_handler 1, h, 64, error
+
+ entry_handler 0, t, 64, sync
+ entry_handler 0, t, 64, irq
+ entry_handler 0, t, 64, fiq
+ entry_handler 0, t, 64, error
+
+ entry_handler 0, t, 32, sync
+ entry_handler 0, t, 32, irq
+ entry_handler 0, t, 32, fiq
+ entry_handler 0, t, 32, error
+
+SYM_CODE_START_LOCAL(ret_to_kernel)
kernel_exit 1
-SYM_CODE_END(el1_error)
-
-SYM_CODE_START_LOCAL(el0_error)
- kernel_entry 0
-el0_error_naked:
- mrs x25, esr_el1
- user_exit_irqoff
- enable_dbg
- mov x0, sp
- mov x1, x25
- bl do_serror
- enable_da
- b ret_to_user
-SYM_CODE_END(el0_error)
+SYM_CODE_END(ret_to_kernel)
/*
* "slow" syscall return path.
@@ -979,8 +766,8 @@ SYM_FUNC_START(cpu_switch_to)
mov sp, x9
msr sp_el0, x1
ptrauth_keys_install_kernel x1, x8, x9, x10
- scs_save x0, x8
- scs_load x1, x8
+ scs_save x0
+ scs_load x1
ret
SYM_FUNC_END(cpu_switch_to)
NOKPROBE(cpu_switch_to)
@@ -998,6 +785,42 @@ SYM_CODE_START(ret_from_fork)
SYM_CODE_END(ret_from_fork)
NOKPROBE(ret_from_fork)
+/*
+ * void call_on_irq_stack(struct pt_regs *regs,
+ * void (*func)(struct pt_regs *));
+ *
+ * Calls func(regs) using this CPU's irq stack and shadow irq stack.
+ */
+SYM_FUNC_START(call_on_irq_stack)
+#ifdef CONFIG_SHADOW_CALL_STACK
+ stp scs_sp, xzr, [sp, #-16]!
+ ldr_this_cpu scs_sp, irq_shadow_call_stack_ptr, x17
+#endif
+ /* Create a frame record to save our LR and SP (implicit in FP) */
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+
+ ldr_this_cpu x16, irq_stack_ptr, x17
+ mov x15, #IRQ_STACK_SIZE
+ add x16, x16, x15
+
+ /* Move to the new stack and call the function there */
+ mov sp, x16
+ blr x1
+
+ /*
+ * Restore the SP from the FP, and restore the FP and LR from the frame
+ * record.
+ */
+ mov sp, x29
+ ldp x29, x30, [sp], #16
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldp scs_sp, xzr, [sp], #16
+#endif
+ ret
+SYM_FUNC_END(call_on_irq_stack)
+NOKPROBE(call_on_irq_stack)
+
#ifdef CONFIG_ARM_SDE_INTERFACE
#include <asm/sdei.h>
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index ad3dd34a83cf..e57b23f95284 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -957,8 +957,10 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
* disabling the trap, otherwise update our in-memory copy.
*/
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
- sve_set_vq(sve_vq_from_vl(current->thread.sve_vl) - 1);
- sve_flush_live();
+ unsigned long vq_minus_one =
+ sve_vq_from_vl(current->thread.sve_vl) - 1;
+ sve_set_vq(vq_minus_one);
+ sve_flush_live(vq_minus_one);
fpsimd_bind_task_to_cpu();
} else {
fpsimd_to_sve(current);
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index b5d3ddaf69d9..7f467bd9db7a 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -15,6 +15,7 @@
#include <asm/debug-monitors.h>
#include <asm/ftrace.h>
#include <asm/insn.h>
+#include <asm/patching.h>
#ifdef CONFIG_DYNAMIC_FTRACE
/*
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 96873dfa67fd..c5c994a73a64 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -16,6 +16,7 @@
#include <asm/asm_pointer_auth.h>
#include <asm/assembler.h>
#include <asm/boot.h>
+#include <asm/bug.h>
#include <asm/ptrace.h>
#include <asm/asm-offsets.h>
#include <asm/cache.h>
@@ -117,8 +118,8 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
dmb sy // needed before dc ivac with
// MMU off
- mov x1, #0x20 // 4 x 8 bytes
- b __inval_dcache_area // tail call
+ add x1, x0, #0x20 // 4 x 8 bytes
+ b dcache_inval_poc // tail call
SYM_CODE_END(preserve_boot_args)
/*
@@ -195,7 +196,7 @@ SYM_CODE_END(preserve_boot_args)
and \iend, \iend, \istart // iend = (vend >> shift) & (ptrs - 1)
mov \istart, \ptrs
mul \istart, \istart, \count
- add \iend, \iend, \istart // iend += (count - 1) * ptrs
+ add \iend, \iend, \istart // iend += count * ptrs
// our entries span multiple tables
lsr \istart, \vstart, \shift
@@ -268,8 +269,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
*/
adrp x0, init_pg_dir
adrp x1, init_pg_end
- sub x1, x1, x0
- bl __inval_dcache_area
+ bl dcache_inval_poc
/*
* Clear the init page tables.
@@ -354,7 +354,6 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
#endif
1:
ldr_l x4, idmap_ptrs_per_pgd
- mov x5, x3 // __pa(__idmap_text_start)
adr_l x6, __idmap_text_end // __pa(__idmap_text_end)
map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14
@@ -382,39 +381,57 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
adrp x0, idmap_pg_dir
adrp x1, idmap_pg_end
- sub x1, x1, x0
- bl __inval_dcache_area
+ bl dcache_inval_poc
adrp x0, init_pg_dir
adrp x1, init_pg_end
- sub x1, x1, x0
- bl __inval_dcache_area
+ bl dcache_inval_poc
ret x28
SYM_FUNC_END(__create_page_tables)
+ /*
+ * Initialize CPU registers with task-specific and cpu-specific context.
+ *
+ * Create a final frame record at task_pt_regs(current)->stackframe, so
+ * that the unwinder can identify the final frame record of any task by
+ * its location in the task stack. We reserve the entire pt_regs space
+ * for consistency with user tasks and kthreads.
+ */
+ .macro init_cpu_task tsk, tmp1, tmp2
+ msr sp_el0, \tsk
+
+ ldr \tmp1, [\tsk, #TSK_STACK]
+ add sp, \tmp1, #THREAD_SIZE
+ sub sp, sp, #PT_REGS_SIZE
+
+ stp xzr, xzr, [sp, #S_STACKFRAME]
+ add x29, sp, #S_STACKFRAME
+
+ scs_load \tsk
+
+ adr_l \tmp1, __per_cpu_offset
+ ldr w\tmp2, [\tsk, #TSK_CPU]
+ ldr \tmp1, [\tmp1, \tmp2, lsl #3]
+ set_this_cpu_offset \tmp1
+ .endm
+
/*
* The following fragment of code is executed with the MMU enabled.
*
* x0 = __PHYS_OFFSET
*/
SYM_FUNC_START_LOCAL(__primary_switched)
- adrp x4, init_thread_union
- add sp, x4, #THREAD_SIZE
- adr_l x5, init_task
- msr sp_el0, x5 // Save thread_info
+ adr_l x4, init_task
+ init_cpu_task x4, x5, x6
adr_l x8, vectors // load VBAR_EL1 with virtual
msr vbar_el1, x8 // vector table address
isb
- stp xzr, x30, [sp, #-16]!
+ stp x29, x30, [sp, #-16]!
mov x29, sp
-#ifdef CONFIG_SHADOW_CALL_STACK
- adr_l scs_sp, init_shadow_call_stack // Set shadow call stack
-#endif
-
str_l x21, __fdt_pointer, x5 // Save FDT pointer
ldr_l x4, kimage_vaddr // Save the offset between
@@ -446,10 +463,9 @@ SYM_FUNC_START_LOCAL(__primary_switched)
0:
#endif
bl switch_to_vhe // Prefer VHE if possible
- add sp, sp, #16
- mov x29, #0
- mov x30, #0
- b start_kernel
+ ldp x29, x30, [sp], #16
+ bl start_kernel
+ ASM_BUG()
SYM_FUNC_END(__primary_switched)
.pushsection ".rodata", "a"
@@ -551,7 +567,7 @@ SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag)
cmp w0, #BOOT_CPU_MODE_EL2
b.ne 1f
add x1, x1, #4
-1: str w0, [x1] // This CPU has booted in EL1
+1: str w0, [x1] // Save CPU boot mode
dmb sy
dc ivac, x1 // Invalidate potentially stale cache line
ret
@@ -632,21 +648,17 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
isb
adr_l x0, secondary_data
- ldr x1, [x0, #CPU_BOOT_STACK] // get secondary_data.stack
- cbz x1, __secondary_too_slow
- mov sp, x1
ldr x2, [x0, #CPU_BOOT_TASK]
cbz x2, __secondary_too_slow
- msr sp_el0, x2
- scs_load x2, x3
- mov x29, #0
- mov x30, #0
+
+ init_cpu_task x2, x1, x3
#ifdef CONFIG_ARM64_PTR_AUTH
ptrauth_keys_init_cpu x2, x3, x4, x5
#endif
- b secondary_start_kernel
+ bl secondary_start_kernel
+ ASM_BUG()
SYM_FUNC_END(__secondary_switched)
SYM_FUNC_START_LOCAL(__secondary_too_slow)
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
index 8ccca660034e..81c0186a5e32 100644
--- a/arch/arm64/kernel/hibernate-asm.S
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -45,7 +45,7 @@
* Because this code has to be copied to a 'safe' page, it can't call out to
* other functions by PC-relative address. Also remember that it may be
* mid-way through over-writing other functions. For this reason it contains
- * code from flush_icache_range() and uses the copy_page() macro.
+ * code from caches_clean_inval_pou() and uses the copy_page() macro.
*
* This 'safe' page is mapped via ttbr0, and executed from there. This function
* switches to a copy of the linear map in ttbr1, performs the restore, then
@@ -87,11 +87,12 @@ SYM_CODE_START(swsusp_arch_suspend_exit)
copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9
add x1, x10, #PAGE_SIZE
- /* Clean the copied page to PoU - based on flush_icache_range() */
+ /* Clean the copied page to PoU - based on caches_clean_inval_pou() */
raw_dcache_line_size x2, x3
sub x3, x2, #1
bic x4, x10, x3
-2: dc cvau, x4 /* clean D line / unified line */
+2: /* clean D line / unified line */
+alternative_insn "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE
add x4, x4, x2
cmp x4, x1
b.lo 2b
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index b1cef371df2b..46a0b4d6e251 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -210,7 +210,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
return -ENOMEM;
memcpy(page, src_start, length);
- __flush_icache_range((unsigned long)page, (unsigned long)page + length);
+ caches_clean_inval_pou((unsigned long)page, (unsigned long)page + length);
rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page);
if (rc)
return rc;
@@ -240,8 +240,6 @@ static int create_safe_exec_page(void *src_start, size_t length,
return 0;
}
-#define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start))
-
#ifdef CONFIG_ARM64_MTE
static DEFINE_XARRAY(mte_pages);
@@ -383,13 +381,18 @@ int swsusp_arch_suspend(void)
ret = swsusp_save();
} else {
/* Clean kernel core startup/idle code to PoC*/
- dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end);
- dcache_clean_range(__idmap_text_start, __idmap_text_end);
+ dcache_clean_inval_poc((unsigned long)__mmuoff_data_start,
+ (unsigned long)__mmuoff_data_end);
+ dcache_clean_inval_poc((unsigned long)__idmap_text_start,
+ (unsigned long)__idmap_text_end);
/* Clean kvm setup code to PoC? */
if (el2_reset_needed()) {
- dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end);
- dcache_clean_range(__hyp_text_start, __hyp_text_end);
+ dcache_clean_inval_poc(
+ (unsigned long)__hyp_idmap_text_start,
+ (unsigned long)__hyp_idmap_text_end);
+ dcache_clean_inval_poc((unsigned long)__hyp_text_start,
+ (unsigned long)__hyp_text_end);
}
swsusp_mte_restore_tags();
@@ -474,7 +477,8 @@ int swsusp_arch_resume(void)
* The hibernate exit text contains a set of el2 vectors, that will
* be executed at el2 with the mmu off in order to reload hyp-stub.
*/
- __flush_dcache_area(hibernate_exit, exit_size);
+ dcache_clean_inval_poc((unsigned long)hibernate_exit,
+ (unsigned long)hibernate_exit + exit_size);
/*
* KASLR will cause the el2 vectors to be in a different location in
diff --git a/arch/arm64/kernel/idle.c b/arch/arm64/kernel/idle.c
new file mode 100644
index 000000000000..a2cfbacec2bb
--- /dev/null
+++ b/arch/arm64/kernel/idle.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Low-level idle sequences
+ */
+
+#include <linux/cpu.h>
+#include <linux/irqflags.h>
+
+#include <asm/barrier.h>
+#include <asm/cpuidle.h>
+#include <asm/cpufeature.h>
+#include <asm/sysreg.h>
+
+/*
+ * cpu_do_idle()
+ *
+ * Idle the processor (wait for interrupt).
+ *
+ * If the CPU supports priority masking we must do additional work to
+ * ensure that interrupts are not masked at the PMR (because the core will
+ * not wake up if we block the wake up signal in the interrupt controller).
+ */
+void noinstr cpu_do_idle(void)
+{
+ struct arm_cpuidle_irq_context context;
+
+ arm_cpuidle_save_irq_context(&context);
+
+ dsb(sy);
+ wfi();
+
+ arm_cpuidle_restore_irq_context(&context);
+}
+
+/*
+ * This is our default idle handler.
+ */
+void noinstr arch_cpu_idle(void)
+{
+ /*
+ * This should do all the clock switching and wait for interrupt
+ * tricks
+ */
+ cpu_do_idle();
+ raw_local_irq_enable();
+}
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index e628c8ce1ffe..53a381a7f65d 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -237,7 +237,8 @@ asmlinkage void __init init_feature_override(void)
for (i = 0; i < ARRAY_SIZE(regs); i++) {
if (regs[i]->override)
- __flush_dcache_area(regs[i]->override,
+ dcache_clean_inval_poc((unsigned long)regs[i]->override,
+ (unsigned long)regs[i]->override +
sizeof(*regs[i]->override));
}
}
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index bcf3c2755370..c96a9a0043bf 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -35,7 +35,7 @@ __efistub_strnlen = __pi_strnlen;
__efistub_strcmp = __pi_strcmp;
__efistub_strncmp = __pi_strncmp;
__efistub_strrchr = __pi_strrchr;
-__efistub___clean_dcache_area_poc = __pi___clean_dcache_area_poc;
+__efistub_dcache_clean_poc = __pi_dcache_clean_poc;
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
__efistub___memcpy = __pi_memcpy;
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c
index 9a8a0ae1e75f..fc98037e1220 100644
--- a/arch/arm64/kernel/jump_label.c
+++ b/arch/arm64/kernel/jump_label.c
@@ -8,6 +8,7 @@
#include <linux/kernel.h>
#include <linux/jump_label.h>
#include <asm/insn.h>
+#include <asm/patching.h>
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 341342b207f6..cfa2cfde3019 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -72,7 +72,9 @@ u64 __init kaslr_early_init(void)
* we end up running with module randomization disabled.
*/
module_alloc_base = (u64)_etext - MODULES_VSIZE;
- __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base));
+ dcache_clean_inval_poc((unsigned long)&module_alloc_base,
+ (unsigned long)&module_alloc_base +
+ sizeof(module_alloc_base));
/*
* Try to map the FDT early. If this fails, we simply bail,
@@ -170,8 +172,12 @@ u64 __init kaslr_early_init(void)
module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
module_alloc_base &= PAGE_MASK;
- __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base));
- __flush_dcache_area(&memstart_offset_seed, sizeof(memstart_offset_seed));
+ dcache_clean_inval_poc((unsigned long)&module_alloc_base,
+ (unsigned long)&module_alloc_base +
+ sizeof(module_alloc_base));
+ dcache_clean_inval_poc((unsigned long)&memstart_offset_seed,
+ (unsigned long)&memstart_offset_seed +
+ sizeof(memstart_offset_seed));
return offset;
}
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index 1a157ca33262..2aede780fb80 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -17,6 +17,7 @@
#include <asm/debug-monitors.h>
#include <asm/insn.h>
+#include <asm/patching.h>
#include <asm/traps.h>
struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 90a335c74442..03ceabe4d912 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -68,10 +68,16 @@ int machine_kexec_post_load(struct kimage *kimage)
kimage->arch.kern_reloc = __pa(reloc_code);
kexec_image_info(kimage);
- /* Flush the reloc_code in preparation for its execution. */
- __flush_dcache_area(reloc_code, arm64_relocate_new_kernel_size);
- flush_icache_range((uintptr_t)reloc_code, (uintptr_t)reloc_code +
- arm64_relocate_new_kernel_size);
+ /*
+ * For execution with the MMU off, reloc_code needs to be cleaned to the
+ * PoC and invalidated from the I-cache.
+ */
+ dcache_clean_inval_poc((unsigned long)reloc_code,
+ (unsigned long)reloc_code +
+ arm64_relocate_new_kernel_size);
+ icache_inval_pou((uintptr_t)reloc_code,
+ (uintptr_t)reloc_code +
+ arm64_relocate_new_kernel_size);
return 0;
}
@@ -102,16 +108,18 @@ static void kexec_list_flush(struct kimage *kimage)
for (entry = &kimage->head; ; entry++) {
unsigned int flag;
- void *addr;
+ unsigned long addr;
/* flush the list entries. */
- __flush_dcache_area(entry, sizeof(kimage_entry_t));
+ dcache_clean_inval_poc((unsigned long)entry,
+ (unsigned long)entry +
+ sizeof(kimage_entry_t));
flag = *entry & IND_FLAGS;
if (flag == IND_DONE)
break;
- addr = phys_to_virt(*entry & PAGE_MASK);
+ addr = (unsigned long)phys_to_virt(*entry & PAGE_MASK);
switch (flag) {
case IND_INDIRECTION:
@@ -120,7 +128,7 @@ static void kexec_list_flush(struct kimage *kimage)
break;
case IND_SOURCE:
/* flush the source pages. */
- __flush_dcache_area(addr, PAGE_SIZE);
+ dcache_clean_inval_poc(addr, addr + PAGE_SIZE);
break;
case IND_DESTINATION:
break;
@@ -147,8 +155,10 @@ static void kexec_segment_flush(const struct kimage *kimage)
kimage->segment[i].memsz,
kimage->segment[i].memsz / PAGE_SIZE);
- __flush_dcache_area(phys_to_virt(kimage->segment[i].mem),
- kimage->segment[i].memsz);
+ dcache_clean_inval_poc(
+ (unsigned long)phys_to_virt(kimage->segment[i].mem),
+ (unsigned long)phys_to_virt(kimage->segment[i].mem) +
+ kimage->segment[i].memsz);
}
}
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 125a10e413e9..69b3fde8759e 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -32,10 +32,9 @@ DEFINE_STATIC_KEY_FALSE(mte_async_mode);
EXPORT_SYMBOL_GPL(mte_async_mode);
#endif
-static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
+static void mte_sync_page_tags(struct page *page, pte_t old_pte,
+ bool check_swap, bool pte_is_tagged)
{
- pte_t old_pte = READ_ONCE(*ptep);
-
if (check_swap && is_swap_pte(old_pte)) {
swp_entry_t entry = pte_to_swp_entry(old_pte);
@@ -43,6 +42,9 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
return;
}
+ if (!pte_is_tagged)
+ return;
+
page_kasan_tag_reset(page);
/*
* We need smp_wmb() in between setting the flags and clearing the
@@ -55,16 +57,22 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
mte_clear_page_tags(page_address(page));
}
-void mte_sync_tags(pte_t *ptep, pte_t pte)
+void mte_sync_tags(pte_t old_pte, pte_t pte)
{
struct page *page = pte_page(pte);
long i, nr_pages = compound_nr(page);
bool check_swap = nr_pages == 1;
+ bool pte_is_tagged = pte_tagged(pte);
+
+ /* Early out if there's nothing to do */
+ if (!check_swap && !pte_is_tagged)
+ return;
/* if PG_mte_tagged is set, tags have already been initialised */
for (i = 0; i < nr_pages; i++, page++) {
if (!test_and_set_bit(PG_mte_tagged, &page->flags))
- mte_sync_page_tags(page, ptep, check_swap);
+ mte_sync_page_tags(page, old_pte, check_swap,
+ pte_is_tagged);
}
}
diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
new file mode 100644
index 000000000000..771f543464e0
--- /dev/null
+++ b/arch/arm64/kernel/patching.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/stop_machine.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/fixmap.h>
+#include <asm/insn.h>
+#include <asm/kprobes.h>
+#include <asm/patching.h>
+#include <asm/sections.h>
+
+static DEFINE_RAW_SPINLOCK(patch_lock);
+
+static bool is_exit_text(unsigned long addr)
+{
+ /* discarded with init text/data */
+ return system_state < SYSTEM_RUNNING &&
+ addr >= (unsigned long)__exittext_begin &&
+ addr < (unsigned long)__exittext_end;
+}
+
+static bool is_image_text(unsigned long addr)
+{
+ return core_kernel_text(addr) || is_exit_text(addr);
+}
+
+static void __kprobes *patch_map(void *addr, int fixmap)
+{
+ unsigned long uintaddr = (uintptr_t) addr;
+ bool image = is_image_text(uintaddr);
+ struct page *page;
+
+ if (image)
+ page = phys_to_page(__pa_symbol(addr));
+ else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
+ page = vmalloc_to_page(addr);
+ else
+ return addr;
+
+ BUG_ON(!page);
+ return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
+ (uintaddr & ~PAGE_MASK));
+}
+
+static void __kprobes patch_unmap(int fixmap)
+{
+ clear_fixmap(fixmap);
+}
+/*
+ * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always
+ * little-endian.
+ */
+int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
+{
+ int ret;
+ __le32 val;
+
+ ret = copy_from_kernel_nofault(&val, addr, AARCH64_INSN_SIZE);
+ if (!ret)
+ *insnp = le32_to_cpu(val);
+
+ return ret;
+}
+
+static int __kprobes __aarch64_insn_write(void *addr, __le32 insn)
+{
+ void *waddr = addr;
+ unsigned long flags = 0;
+ int ret;
+
+ raw_spin_lock_irqsave(&patch_lock, flags);
+ waddr = patch_map(addr, FIX_TEXT_POKE0);
+
+ ret = copy_to_kernel_nofault(waddr, &insn, AARCH64_INSN_SIZE);
+
+ patch_unmap(FIX_TEXT_POKE0);
+ raw_spin_unlock_irqrestore(&patch_lock, flags);
+
+ return ret;
+}
+
+int __kprobes aarch64_insn_write(void *addr, u32 insn)
+{
+ return __aarch64_insn_write(addr, cpu_to_le32(insn));
+}
+
+int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
+{
+ u32 *tp = addr;
+ int ret;
+
+ /* A64 instructions must be word aligned */
+ if ((uintptr_t)tp & 0x3)
+ return -EINVAL;
+
+ ret = aarch64_insn_write(tp, insn);
+ if (ret == 0)
+ caches_clean_inval_pou((uintptr_t)tp,
+ (uintptr_t)tp + AARCH64_INSN_SIZE);
+
+ return ret;
+}
+
+struct aarch64_insn_patch {
+ void **text_addrs;
+ u32 *new_insns;
+ int insn_cnt;
+ atomic_t cpu_count;
+};
+
+static int __kprobes aarch64_insn_patch_text_cb(void *arg)
+{
+ int i, ret = 0;
+ struct aarch64_insn_patch *pp = arg;
+
+ /* The first CPU becomes master */
+ if (atomic_inc_return(&pp->cpu_count) == 1) {
+ for (i = 0; ret == 0 && i < pp->insn_cnt; i++)
+ ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i],
+ pp->new_insns[i]);
+ /* Notify other processors with an additional increment. */
+ atomic_inc(&pp->cpu_count);
+ } else {
+ while (atomic_read(&pp->cpu_count) <= num_online_cpus())
+ cpu_relax();
+ isb();
+ }
+
+ return ret;
+}
+
+int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
+{
+ struct aarch64_insn_patch patch = {
+ .text_addrs = addrs,
+ .new_insns = insns,
+ .insn_cnt = cnt,
+ .cpu_count = ATOMIC_INIT(0),
+ };
+
+ if (cnt <= 0)
+ return -EINVAL;
+
+ return stop_machine_cpuslocked(aarch64_insn_patch_text_cb, &patch,
+ cpu_online_mask);
+}
diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index 88ff471b0bce..4a72c2727309 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -116,7 +116,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
tail = (struct frame_tail __user *)regs->regs[29];
while (entry->nr < entry->max_stack &&
- tail && !((unsigned long)tail & 0xf))
+ tail && !((unsigned long)tail & 0x7))
tail = user_backtrace(tail, entry);
} else {
#ifdef CONFIG_COMPAT
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index f594957e29bd..d07788dad388 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -165,10 +165,7 @@ armv8pmu_events_sysfs_show(struct device *dev,
}
#define ARMV8_EVENT_ATTR(name, config) \
- (&((struct perf_pmu_events_attr) { \
- .attr = __ATTR(name, 0444, armv8pmu_events_sysfs_show, NULL), \
- .id = config, \
- }).attr.attr)
+ PMU_EVENT_ATTR_ID(name, armv8pmu_events_sysfs_show, config)
static struct attribute *armv8_pmuv3_event_attrs[] = {
ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR),
@@ -312,13 +309,46 @@ static ssize_t slots_show(struct device *dev, struct device_attribute *attr,
struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
u32 slots = cpu_pmu->reg_pmmir & ARMV8_PMU_SLOTS_MASK;
- return snprintf(page, PAGE_SIZE, "0x%08x\n", slots);
+ return sysfs_emit(page, "0x%08x\n", slots);
}
static DEVICE_ATTR_RO(slots);
+static ssize_t bus_slots_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+ u32 bus_slots = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_SLOTS_SHIFT)
+ & ARMV8_PMU_BUS_SLOTS_MASK;
+
+ return sysfs_emit(page, "0x%08x\n", bus_slots);
+}
+
+static DEVICE_ATTR_RO(bus_slots);
+
+static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+ u32 bus_width = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_WIDTH_SHIFT)
+ & ARMV8_PMU_BUS_WIDTH_MASK;
+ u32 val = 0;
+
+ /* Encoded as Log2(number of bytes), plus one */
+ if (bus_width > 2 && bus_width < 13)
+ val = 1 << (bus_width - 1);
+
+ return sysfs_emit(page, "0x%08x\n", val);
+}
+
+static DEVICE_ATTR_RO(bus_width);
+
static struct attribute *armv8_pmuv3_caps_attrs[] = {
&dev_attr_slots.attr,
+ &dev_attr_bus_slots.attr,
+ &dev_attr_bus_width.attr,
NULL,
};
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index d607c9912025..6dbcc89f6662 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -7,26 +7,28 @@
* Copyright (C) 2013 Linaro Limited.
* Author: Sandeepa Prabhu <sandeepa.prabhu@linaro.org>
*/
+#include <linux/extable.h>
#include <linux/kasan.h>
#include <linux/kernel.h>
#include <linux/kprobes.h>
-#include <linux/extable.h>
-#include <linux/slab.h>
-#include <linux/stop_machine.h>
#include <linux/sched/debug.h>
#include <linux/set_memory.h>
+#include <linux/slab.h>
+#include <linux/stop_machine.h>
#include <linux/stringify.h>
+#include <linux/uaccess.h>
#include <linux/vmalloc.h>
-#include <asm/traps.h>
-#include <asm/ptrace.h>
+
#include <asm/cacheflush.h>
-#include <asm/debug-monitors.h>
#include <asm/daifflags.h>
-#include <asm/system_misc.h>
+#include <asm/debug-monitors.h>
#include <asm/insn.h>
-#include <linux/uaccess.h>
#include <asm/irq.h>
+#include <asm/patching.h>
+#include <asm/ptrace.h>
#include <asm/sections.h>
+#include <asm/system_misc.h>
+#include <asm/traps.h>
#include "decode-insn.h"
@@ -277,23 +279,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
case KPROBE_HIT_ACTIVE:
case KPROBE_HIT_SSDONE:
/*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accounting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, fsr))
- return 1;
-
- /*
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
index 25f67ec59635..22d0b3252476 100644
--- a/arch/arm64/kernel/probes/simulate-insn.c
+++ b/arch/arm64/kernel/probes/simulate-insn.c
@@ -10,6 +10,7 @@
#include <linux/kprobes.h>
#include <asm/ptrace.h>
+#include <asm/traps.h>
#include "simulate-insn.h"
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index 2c247634552b..9be668f3f034 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -21,7 +21,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
memcpy(dst, src, len);
/* flush caches (dcache/icache) */
- sync_icache_aliases(dst, len);
+ sync_icache_aliases((unsigned long)dst, (unsigned long)dst + len);
kunmap_atomic(xol_page_kaddr);
}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index b4bb67f17a2c..5ba0ed036dee 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -18,7 +18,6 @@
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/kernel.h>
-#include <linux/lockdep.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/nospec.h>
@@ -46,7 +45,6 @@
#include <linux/prctl.h>
#include <asm/alternative.h>
-#include <asm/arch_gicv3.h>
#include <asm/compat.h>
#include <asm/cpufeature.h>
#include <asm/cacheflush.h>
@@ -74,63 +72,6 @@ EXPORT_SYMBOL_GPL(pm_power_off);
void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
-static void noinstr __cpu_do_idle(void)
-{
- dsb(sy);
- wfi();
-}
-
-static void noinstr __cpu_do_idle_irqprio(void)
-{
- unsigned long pmr;
- unsigned long daif_bits;
-
- daif_bits = read_sysreg(daif);
- write_sysreg(daif_bits | PSR_I_BIT | PSR_F_BIT, daif);
-
- /*
- * Unmask PMR before going idle to make sure interrupts can
- * be raised.
- */
- pmr = gic_read_pmr();
- gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
-
- __cpu_do_idle();
-
- gic_write_pmr(pmr);
- write_sysreg(daif_bits, daif);
-}
-
-/*
- * cpu_do_idle()
- *
- * Idle the processor (wait for interrupt).
- *
- * If the CPU supports priority masking we must do additional work to
- * ensure that interrupts are not masked at the PMR (because the core will
- * not wake up if we block the wake up signal in the interrupt controller).
- */
-void noinstr cpu_do_idle(void)
-{
- if (system_uses_irq_prio_masking())
- __cpu_do_idle_irqprio();
- else
- __cpu_do_idle();
-}
-
-/*
- * This is our default idle handler.
- */
-void noinstr arch_cpu_idle(void)
-{
- /*
- * This should do all the clock switching and wait for interrupt
- * tricks
- */
- cpu_do_idle();
- raw_local_irq_enable();
-}
-
#ifdef CONFIG_HOTPLUG_CPU
void arch_cpu_idle_dead(void)
{
@@ -435,6 +376,11 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
}
p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
p->thread.cpu_context.sp = (unsigned long)childregs;
+ /*
+ * For the benefit of the unwinder, set up childregs->stackframe
+ * as the final frame for the new task.
+ */
+ p->thread.cpu_context.fp = (unsigned long)childregs->stackframe;
ptrace_hw_copy_thread(p);
@@ -527,6 +473,15 @@ static void erratum_1418040_thread_switch(struct task_struct *prev,
write_sysreg(val, cntkctl_el1);
}
+static void compat_thread_switch(struct task_struct *next)
+{
+ if (!is_compat_thread(task_thread_info(next)))
+ return;
+
+ if (static_branch_unlikely(&arm64_mismatched_32bit_el0))
+ set_tsk_thread_flag(next, TIF_NOTIFY_RESUME);
+}
+
static void update_sctlr_el1(u64 sctlr)
{
/*
@@ -568,6 +523,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
ssbs_thread_switch(next);
erratum_1418040_thread_switch(prev, next);
ptrauth_thread_switch_user(next);
+ compat_thread_switch(next);
/*
* Complete any pending TLB or cache maintenance on this CPU in case
@@ -598,7 +554,7 @@ unsigned long get_wchan(struct task_struct *p)
struct stackframe frame;
unsigned long stack_page, ret = 0;
int count = 0;
- if (!p || p == current || p->state == TASK_RUNNING)
+ if (!p || p == current || task_is_running(p))
return 0;
stack_page = (unsigned long)try_get_task_stack(p);
@@ -633,8 +589,15 @@ unsigned long arch_align_stack(unsigned long sp)
*/
void arch_setup_new_exec(void)
{
- current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
+ unsigned long mmflags = 0;
+
+ if (is_compat_task()) {
+ mmflags = MMCF_AARCH32;
+ if (static_branch_unlikely(&arm64_mismatched_32bit_el0))
+ set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
+ }
+ current->mm->context.flags = mmflags;
ptrauth_thread_init_user();
mte_thread_init_user();
@@ -724,22 +687,6 @@ static int __init tagged_addr_init(void)
core_initcall(tagged_addr_init);
#endif /* CONFIG_ARM64_TAGGED_ADDR_ABI */
-asmlinkage void __sched arm64_preempt_schedule_irq(void)
-{
- lockdep_assert_irqs_disabled();
-
- /*
- * Preempting a task from an IRQ means we leave copies of PSTATE
- * on the stack. cpufeature's enable calls may modify PSTATE, but
- * resuming one of these preempted tasks would undo those changes.
- *
- * Only allow a task to be preempted once cpufeatures have been
- * enabled.
- */
- if (system_capabilities_finalized())
- preempt_schedule_irq();
-}
-
#ifdef CONFIG_BINFMT_ELF
int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state,
bool has_interp, bool is_interp)
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index eb2f73939b7b..499b6b2f9757 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -122,7 +122,7 @@ static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
{
return ((addr & ~(THREAD_SIZE - 1)) ==
(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) ||
- on_irq_stack(addr, NULL);
+ on_irq_stack(addr, sizeof(unsigned long), NULL);
}
/**
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index 2c7ca449dd51..47f77d1234cb 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -162,31 +162,33 @@ static int init_sdei_scs(void)
return err;
}
-static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info)
+static bool on_sdei_normal_stack(unsigned long sp, unsigned long size,
+ struct stack_info *info)
{
unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
unsigned long high = low + SDEI_STACK_SIZE;
- return on_stack(sp, low, high, STACK_TYPE_SDEI_NORMAL, info);
+ return on_stack(sp, size, low, high, STACK_TYPE_SDEI_NORMAL, info);
}
-static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info)
+static bool on_sdei_critical_stack(unsigned long sp, unsigned long size,
+ struct stack_info *info)
{
unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr);
unsigned long high = low + SDEI_STACK_SIZE;
- return on_stack(sp, low, high, STACK_TYPE_SDEI_CRITICAL, info);
+ return on_stack(sp, size, low, high, STACK_TYPE_SDEI_CRITICAL, info);
}
-bool _on_sdei_stack(unsigned long sp, struct stack_info *info)
+bool _on_sdei_stack(unsigned long sp, unsigned long size, struct stack_info *info)
{
if (!IS_ENABLED(CONFIG_VMAP_STACK))
return false;
- if (on_sdei_critical_stack(sp, info))
+ if (on_sdei_critical_stack(sp, size, info))
return true;
- if (on_sdei_normal_stack(sp, info))
+ if (on_sdei_normal_stack(sp, size, info))
return true;
return false;
@@ -231,13 +233,13 @@ out_err:
}
/*
- * __sdei_handler() returns one of:
+ * do_sdei_event() returns one of:
* SDEI_EV_HANDLED - success, return to the interrupted context.
* SDEI_EV_FAILED - failure, return this error code to firmare.
* virtual-address - success, return to this address.
*/
-static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
- struct sdei_registered_event *arg)
+unsigned long __kprobes do_sdei_event(struct pt_regs *regs,
+ struct sdei_registered_event *arg)
{
u32 mode;
int i, err = 0;
@@ -292,45 +294,3 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
return vbar + 0x480;
}
-
-static void __kprobes notrace __sdei_pstate_entry(void)
-{
- /*
- * The original SDEI spec (ARM DEN 0054A) can be read ambiguously as to
- * whether PSTATE bits are inherited unchanged or generated from
- * scratch, and the TF-A implementation always clears PAN and always
- * clears UAO. There are no other known implementations.
- *
- * Subsequent revisions (ARM DEN 0054B) follow the usual rules for how
- * PSTATE is modified upon architectural exceptions, and so PAN is
- * either inherited or set per SCTLR_ELx.SPAN, and UAO is always
- * cleared.
- *
- * We must explicitly reset PAN to the expected state, including
- * clearing it when the host isn't using it, in case a VM had it set.
- */
- if (system_uses_hw_pan())
- set_pstate_pan(1);
- else if (cpu_has_pan())
- set_pstate_pan(0);
-}
-
-asmlinkage noinstr unsigned long
-__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
-{
- unsigned long ret;
-
- /*
- * We didn't take an exception to get here, so the HW hasn't
- * set/cleared bits in PSTATE that we may rely on. Initialize PAN.
- */
- __sdei_pstate_entry();
-
- arm64_enter_nmi(regs);
-
- ret = _sdei_handler(regs, arg);
-
- arm64_exit_nmi(regs);
-
- return ret;
-}
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 787bc0f601b3..880f40bae60e 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -88,12 +88,6 @@ void __init smp_setup_processor_id(void)
u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
set_cpu_logical_map(0, mpidr);
- /*
- * clear __my_cpu_offset on boot CPU to avoid hang caused by
- * using percpu variable early, for example, lockdep will
- * access percpu variable inside lock_release
- */
- set_my_cpu_offset(0);
pr_info("Booting Linux on physical CPU 0x%010lx [0x%08x]\n",
(unsigned long)mpidr, read_cpuid_id());
}
@@ -382,7 +376,7 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
* faults in case uaccess_enable() is inadvertently called by the init
* thread.
*/
- init_task.thread_info.ttbr0 = __pa_symbol(reserved_pg_dir);
+ init_task.thread_info.ttbr0 = phys_to_ttbr(__pa_symbol(reserved_pg_dir));
#endif
if (boot_args[1] || boot_args[2] || boot_args[3]) {
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 6237486ff6bb..f8192f4ae0b8 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -911,6 +911,19 @@ static void do_signal(struct pt_regs *regs)
restore_saved_sigmask();
}
+static bool cpu_affinity_invalid(struct pt_regs *regs)
+{
+ if (!compat_user_mode(regs))
+ return false;
+
+ /*
+ * We're preemptible, but a reschedule will cause us to check the
+ * affinity again.
+ */
+ return !cpumask_test_cpu(raw_smp_processor_id(),
+ system_32bit_el0_cpumask());
+}
+
asmlinkage void do_notify_resume(struct pt_regs *regs,
unsigned long thread_flags)
{
@@ -938,6 +951,19 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
if (thread_flags & _TIF_NOTIFY_RESUME) {
tracehook_notify_resume(regs);
rseq_handle_notify_resume(NULL, regs);
+
+ /*
+ * If we reschedule after checking the affinity
+ * then we must ensure that TIF_NOTIFY_RESUME
+ * is set so that we check the affinity again.
+ * Since tracehook_notify_resume() clears the
+ * flag, ensure that the compiler doesn't move
+ * it after the affinity check.
+ */
+ barrier();
+
+ if (cpu_affinity_invalid(regs))
+ force_sig(SIGKILL);
}
if (thread_flags & _TIF_FOREIGN_FPSTATE)
diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S
index d62447964ed9..d3d37f932b97 100644
--- a/arch/arm64/kernel/smccc-call.S
+++ b/arch/arm64/kernel/smccc-call.S
@@ -7,8 +7,34 @@
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
+#include <asm/thread_info.h>
+
+/*
+ * If we have SMCCC v1.3 and (as is likely) no SVE state in
+ * the registers then set the SMCCC hint bit to say there's no
+ * need to preserve it. Do this by directly adjusting the SMCCC
+ * function value which is already stored in x0 ready to be called.
+ */
+SYM_FUNC_START(__arm_smccc_sve_check)
+
+ ldr_l x16, smccc_has_sve_hint
+ cbz x16, 2f
+
+ get_current_task x16
+ ldr x16, [x16, #TSK_TI_FLAGS]
+ tbnz x16, #TIF_FOREIGN_FPSTATE, 1f // Any live FP state?
+ tbnz x16, #TIF_SVE, 2f // Does that state include SVE?
+
+1: orr x0, x0, ARM_SMCCC_1_3_SVE_HINT
+
+2: ret
+SYM_FUNC_END(__arm_smccc_sve_check)
+EXPORT_SYMBOL(__arm_smccc_sve_check)
.macro SMCCC instr
+alternative_if ARM64_SVE
+ bl __arm_smccc_sve_check
+alternative_else_nop_endif
\instr #0
ldr x4, [sp]
stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS]
@@ -43,3 +69,60 @@ SYM_FUNC_START(__arm_smccc_hvc)
SMCCC hvc
SYM_FUNC_END(__arm_smccc_hvc)
EXPORT_SYMBOL(__arm_smccc_hvc)
+
+ .macro SMCCC_1_2 instr
+ /* Save `res` and free a GPR that won't be clobbered */
+ stp x1, x19, [sp, #-16]!
+
+ /* Ensure `args` won't be clobbered while loading regs in next step */
+ mov x19, x0
+
+ /* Load the registers x0 - x17 from the struct arm_smccc_1_2_regs */
+ ldp x0, x1, [x19, #ARM_SMCCC_1_2_REGS_X0_OFFS]
+ ldp x2, x3, [x19, #ARM_SMCCC_1_2_REGS_X2_OFFS]
+ ldp x4, x5, [x19, #ARM_SMCCC_1_2_REGS_X4_OFFS]
+ ldp x6, x7, [x19, #ARM_SMCCC_1_2_REGS_X6_OFFS]
+ ldp x8, x9, [x19, #ARM_SMCCC_1_2_REGS_X8_OFFS]
+ ldp x10, x11, [x19, #ARM_SMCCC_1_2_REGS_X10_OFFS]
+ ldp x12, x13, [x19, #ARM_SMCCC_1_2_REGS_X12_OFFS]
+ ldp x14, x15, [x19, #ARM_SMCCC_1_2_REGS_X14_OFFS]
+ ldp x16, x17, [x19, #ARM_SMCCC_1_2_REGS_X16_OFFS]
+
+ \instr #0
+
+ /* Load the `res` from the stack */
+ ldr x19, [sp]
+
+ /* Store the registers x0 - x17 into the result structure */
+ stp x0, x1, [x19, #ARM_SMCCC_1_2_REGS_X0_OFFS]
+ stp x2, x3, [x19, #ARM_SMCCC_1_2_REGS_X2_OFFS]
+ stp x4, x5, [x19, #ARM_SMCCC_1_2_REGS_X4_OFFS]
+ stp x6, x7, [x19, #ARM_SMCCC_1_2_REGS_X6_OFFS]
+ stp x8, x9, [x19, #ARM_SMCCC_1_2_REGS_X8_OFFS]
+ stp x10, x11, [x19, #ARM_SMCCC_1_2_REGS_X10_OFFS]
+ stp x12, x13, [x19, #ARM_SMCCC_1_2_REGS_X12_OFFS]
+ stp x14, x15, [x19, #ARM_SMCCC_1_2_REGS_X14_OFFS]
+ stp x16, x17, [x19, #ARM_SMCCC_1_2_REGS_X16_OFFS]
+
+ /* Restore original x19 */
+ ldp xzr, x19, [sp], #16
+ ret
+.endm
+
+/*
+ * void arm_smccc_1_2_hvc(const struct arm_smccc_1_2_regs *args,
+ * struct arm_smccc_1_2_regs *res);
+ */
+SYM_FUNC_START(arm_smccc_1_2_hvc)
+ SMCCC_1_2 hvc
+SYM_FUNC_END(arm_smccc_1_2_hvc)
+EXPORT_SYMBOL(arm_smccc_1_2_hvc)
+
+/*
+ * void arm_smccc_1_2_smc(const struct arm_smccc_1_2_regs *args,
+ * struct arm_smccc_1_2_regs *res);
+ */
+SYM_FUNC_START(arm_smccc_1_2_smc)
+ SMCCC_1_2 smc
+SYM_FUNC_END(arm_smccc_1_2_smc)
+EXPORT_SYMBOL(arm_smccc_1_2_smc)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dcd7041b2b07..6f6ff072acbd 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -120,9 +120,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
* page tables.
*/
secondary_data.task = idle;
- secondary_data.stack = task_stack_page(idle) + THREAD_SIZE;
update_cpu_boot_status(CPU_MMU_OFF);
- __flush_dcache_area(&secondary_data, sizeof(secondary_data));
/* Now bring the CPU into our world */
ret = boot_secondary(cpu, idle);
@@ -142,8 +140,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
pr_crit("CPU%u: failed to come online\n", cpu);
secondary_data.task = NULL;
- secondary_data.stack = NULL;
- __flush_dcache_area(&secondary_data, sizeof(secondary_data));
status = READ_ONCE(secondary_data.status);
if (status == CPU_MMU_OFF)
status = READ_ONCE(__early_cpu_boot_status);
@@ -202,10 +198,7 @@ asmlinkage notrace void secondary_start_kernel(void)
u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
struct mm_struct *mm = &init_mm;
const struct cpu_operations *ops;
- unsigned int cpu;
-
- cpu = task_cpu(current);
- set_my_cpu_offset(per_cpu_offset(cpu));
+ unsigned int cpu = smp_processor_id();
/*
* All kernel threads share the same mm context; grab a
@@ -224,7 +217,6 @@ asmlinkage notrace void secondary_start_kernel(void)
init_gic_priority_masking();
rcu_cpu_starting(cpu);
- preempt_disable();
trace_hardirqs_off();
/*
@@ -352,7 +344,7 @@ void __cpu_die(unsigned int cpu)
pr_crit("CPU%u: cpu didn't die\n", cpu);
return;
}
- pr_notice("CPU%u: shutdown\n", cpu);
+ pr_debug("CPU%u: shutdown\n", cpu);
/*
* Now that the dying CPU is beyond the point of no return w.r.t.
@@ -452,6 +444,11 @@ void __init smp_cpus_done(unsigned int max_cpus)
void __init smp_prepare_boot_cpu(void)
{
+ /*
+ * The runtime per-cpu areas have been allocated by
+ * setup_per_cpu_areas(), and CPU0's boot time per-cpu area will be
+ * freed shortly, so we must move over to the runtime per-cpu area.
+ */
set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
cpuinfo_store_boot_cpu();
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index c45a83512805..7e1624ecab3c 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -36,7 +36,7 @@ static void write_pen_release(u64 val)
unsigned long size = sizeof(secondary_holding_pen_release);
secondary_holding_pen_release = val;
- __flush_dcache_area(start, size);
+ dcache_clean_inval_poc((unsigned long)start, (unsigned long)start + size);
}
@@ -90,8 +90,9 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
* the boot protocol.
*/
writeq_relaxed(pa_holding_pen, release_addr);
- __flush_dcache_area((__force void *)release_addr,
- sizeof(*release_addr));
+ dcache_clean_inval_poc((__force unsigned long)release_addr,
+ (__force unsigned long)release_addr +
+ sizeof(*release_addr));
/*
* Send an event to wake up the secondary CPU.
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index de07147a7926..b189de5ca6cb 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -68,13 +68,17 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
unsigned long fp = frame->fp;
struct stack_info info;
- if (fp & 0xf)
- return -EINVAL;
-
if (!tsk)
tsk = current;
- if (!on_accessible_stack(tsk, fp, &info))
+ /* Final frame; nothing to unwind */
+ if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
+ return -ENOENT;
+
+ if (fp & 0x7)
+ return -EINVAL;
+
+ if (!on_accessible_stack(tsk, fp, 16, &info))
return -EINVAL;
if (test_bit(info.type, frame->stacks_done))
@@ -128,12 +132,6 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
frame->pc = ptrauth_strip_insn_pac(frame->pc);
- /*
- * This is a terminal record, so we have finished unwinding.
- */
- if (!frame->fp && !frame->pc)
- return -ENOENT;
-
return 0;
}
NOKPROBE_SYMBOL(unwind_frame);
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index e3f72df9509d..938ce6fbee8a 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -7,6 +7,7 @@
#include <asm/alternative.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
+#include <asm/cpuidle.h>
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/exec.h>
@@ -91,6 +92,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
int ret = 0;
unsigned long flags;
struct sleep_stack_data state;
+ struct arm_cpuidle_irq_context context;
/* Report any MTE async fault before going to suspend */
mte_suspend_enter();
@@ -103,12 +105,18 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
flags = local_daif_save();
/*
- * Function graph tracer state gets incosistent when the kernel
+ * Function graph tracer state gets inconsistent when the kernel
* calls functions that never return (aka suspend finishers) hence
* disable graph tracing during their execution.
*/
pause_graph_tracing();
+ /*
+ * Switch to using DAIF.IF instead of PMR in order to reliably
+ * resume if we're using pseudo-NMIs.
+ */
+ arm_cpuidle_save_irq_context(&context);
+
if (__cpu_suspend_enter(&state)) {
/* Call the suspend finisher */
ret = fn(arg);
@@ -126,6 +134,8 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
RCU_NONIDLE(__cpu_suspend_exit());
}
+ arm_cpuidle_restore_irq_context(&context);
+
unpause_graph_tracing();
/*
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 265fe3eb1069..db5159a3055f 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -41,7 +41,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
dsb(ish);
}
- ret = __flush_cache_user_range(start, start + chunk);
+ ret = caches_clean_inval_user_pou(start, start + chunk);
if (ret)
return ret;
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index a05d34f0e82a..b03e383d944a 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -38,6 +38,7 @@
#include <asm/extable.h>
#include <asm/insn.h>
#include <asm/kprobes.h>
+#include <asm/patching.h>
#include <asm/traps.h>
#include <asm/smp.h>
#include <asm/stack_pointer.h>
@@ -45,11 +46,102 @@
#include <asm/system_misc.h>
#include <asm/sysreg.h>
-static const char *handler[] = {
- "Synchronous Abort",
- "IRQ",
- "FIQ",
- "Error"
+static bool __kprobes __check_eq(unsigned long pstate)
+{
+ return (pstate & PSR_Z_BIT) != 0;
+}
+
+static bool __kprobes __check_ne(unsigned long pstate)
+{
+ return (pstate & PSR_Z_BIT) == 0;
+}
+
+static bool __kprobes __check_cs(unsigned long pstate)
+{
+ return (pstate & PSR_C_BIT) != 0;
+}
+
+static bool __kprobes __check_cc(unsigned long pstate)
+{
+ return (pstate & PSR_C_BIT) == 0;
+}
+
+static bool __kprobes __check_mi(unsigned long pstate)
+{
+ return (pstate & PSR_N_BIT) != 0;
+}
+
+static bool __kprobes __check_pl(unsigned long pstate)
+{
+ return (pstate & PSR_N_BIT) == 0;
+}
+
+static bool __kprobes __check_vs(unsigned long pstate)
+{
+ return (pstate & PSR_V_BIT) != 0;
+}
+
+static bool __kprobes __check_vc(unsigned long pstate)
+{
+ return (pstate & PSR_V_BIT) == 0;
+}
+
+static bool __kprobes __check_hi(unsigned long pstate)
+{
+ pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+ return (pstate & PSR_C_BIT) != 0;
+}
+
+static bool __kprobes __check_ls(unsigned long pstate)
+{
+ pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+ return (pstate & PSR_C_BIT) == 0;
+}
+
+static bool __kprobes __check_ge(unsigned long pstate)
+{
+ pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+ return (pstate & PSR_N_BIT) == 0;
+}
+
+static bool __kprobes __check_lt(unsigned long pstate)
+{
+ pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+ return (pstate & PSR_N_BIT) != 0;
+}
+
+static bool __kprobes __check_gt(unsigned long pstate)
+{
+ /*PSR_N_BIT ^= PSR_V_BIT */
+ unsigned long temp = pstate ^ (pstate << 3);
+
+ temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */
+ return (temp & PSR_N_BIT) == 0;
+}
+
+static bool __kprobes __check_le(unsigned long pstate)
+{
+ /*PSR_N_BIT ^= PSR_V_BIT */
+ unsigned long temp = pstate ^ (pstate << 3);
+
+ temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */
+ return (temp & PSR_N_BIT) != 0;
+}
+
+static bool __kprobes __check_al(unsigned long pstate)
+{
+ return true;
+}
+
+/*
+ * Note that the ARMv8 ARM calls condition code 0b1111 "nv", but states that
+ * it behaves identically to 0b1110 ("al").
+ */
+pstate_check_t * const aarch32_opcode_cond_checks[16] = {
+ __check_eq, __check_ne, __check_cs, __check_cc,
+ __check_mi, __check_pl, __check_vs, __check_vc,
+ __check_hi, __check_ls, __check_ge, __check_lt,
+ __check_gt, __check_le, __check_al, __check_al
};
int show_unhandled_signals = 0;
@@ -751,27 +843,8 @@ const char *esr_get_class_string(u32 esr)
}
/*
- * bad_mode handles the impossible case in the exception vector. This is always
- * fatal.
- */
-asmlinkage void notrace bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
-{
- arm64_enter_nmi(regs);
-
- console_verbose();
-
- pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n",
- handler[reason], smp_processor_id(), esr,
- esr_get_class_string(esr));
-
- __show_regs(regs);
- local_daif_mask();
- panic("bad mode");
-}
-
-/*
* bad_el0_sync handles unexpected, but potentially recoverable synchronous
- * exceptions taken from EL0. Unlike bad_mode, this returns.
+ * exceptions taken from EL0.
*/
void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
{
@@ -789,15 +862,11 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
__aligned(16);
-asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs)
+void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far)
{
unsigned long tsk_stk = (unsigned long)current->stack;
unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr);
unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
- unsigned int esr = read_sysreg(esr_el1);
- unsigned long far = read_sysreg(far_el1);
-
- arm64_enter_nmi(regs);
console_verbose();
pr_emerg("Insufficient stack space to handle exception!");
@@ -870,15 +939,11 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
}
}
-asmlinkage void noinstr do_serror(struct pt_regs *regs, unsigned int esr)
+void do_serror(struct pt_regs *regs, unsigned int esr)
{
- arm64_enter_nmi(regs);
-
/* non-RAS errors are not containable */
if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr))
arm64_serror_panic(regs, esr);
-
- arm64_exit_nmi(regs);
}
/* GENERIC_BUG traps */
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 3964acf5451e..a4eba0908bfa 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -20,8 +20,6 @@ if VIRTUALIZATION
menuconfig KVM
bool "Kernel-based Virtual Machine (KVM) support"
depends on OF
- # for TASKSTATS/TASK_DELAY_ACCT:
- depends on NET && MULTIUSER
select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
select HAVE_KVM_CPU_RELAX_INTERCEPT
@@ -38,8 +36,7 @@ menuconfig KVM
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_VCPU_RUN_PID_CHANGE
- select TASKSTATS
- select TASK_DELAY_ACCT
+ select SCHED_INFO
help
Support hosting virtualized guest machines.
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 589921392cb1..989bb5dad2c8 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_KVM) += kvm.o
obj-$(CONFIG_KVM) += hyp/
kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
- $(KVM)/vfio.o $(KVM)/irqchip.o \
+ $(KVM)/vfio.o $(KVM)/irqchip.o $(KVM)/binary_stats.o \
arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \
inject_fault.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o \
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 74e0699661e9..3df67c127489 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -9,6 +9,7 @@
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
+#include <linux/irqdomain.h>
#include <linux/uaccess.h>
#include <clocksource/arm_arch_timer.h>
@@ -973,36 +974,154 @@ static int kvm_timer_dying_cpu(unsigned int cpu)
return 0;
}
-int kvm_timer_hyp_init(bool has_gic)
+static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
{
- struct arch_timer_kvm_info *info;
- int err;
+ if (vcpu)
+ irqd_set_forwarded_to_vcpu(d);
+ else
+ irqd_clr_forwarded_to_vcpu(d);
- info = arch_timer_get_kvm_info();
- timecounter = &info->timecounter;
+ return 0;
+}
- if (!timecounter->cc) {
- kvm_err("kvm_arch_timer: uninitialized timecounter\n");
- return -ENODEV;
+static int timer_irq_set_irqchip_state(struct irq_data *d,
+ enum irqchip_irq_state which, bool val)
+{
+ if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d))
+ return irq_chip_set_parent_state(d, which, val);
+
+ if (val)
+ irq_chip_mask_parent(d);
+ else
+ irq_chip_unmask_parent(d);
+
+ return 0;
+}
+
+static void timer_irq_eoi(struct irq_data *d)
+{
+ if (!irqd_is_forwarded_to_vcpu(d))
+ irq_chip_eoi_parent(d);
+}
+
+static void timer_irq_ack(struct irq_data *d)
+{
+ d = d->parent_data;
+ if (d->chip->irq_ack)
+ d->chip->irq_ack(d);
+}
+
+static struct irq_chip timer_chip = {
+ .name = "KVM",
+ .irq_ack = timer_irq_ack,
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+ .irq_eoi = timer_irq_eoi,
+ .irq_set_type = irq_chip_set_type_parent,
+ .irq_set_vcpu_affinity = timer_irq_set_vcpu_affinity,
+ .irq_set_irqchip_state = timer_irq_set_irqchip_state,
+};
+
+static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ irq_hw_number_t hwirq = (uintptr_t)arg;
+
+ return irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
+ &timer_chip, NULL);
+}
+
+static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+}
+
+static const struct irq_domain_ops timer_domain_ops = {
+ .alloc = timer_irq_domain_alloc,
+ .free = timer_irq_domain_free,
+};
+
+static struct irq_ops arch_timer_irq_ops = {
+ .get_input_level = kvm_arch_timer_get_input_level,
+};
+
+static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
+{
+ *flags = irq_get_trigger_type(virq);
+ if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) {
+ kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n",
+ virq);
+ *flags = IRQF_TRIGGER_LOW;
}
+}
- /* First, do the virtual EL1 timer irq */
+static int kvm_irq_init(struct arch_timer_kvm_info *info)
+{
+ struct irq_domain *domain = NULL;
if (info->virtual_irq <= 0) {
kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
info->virtual_irq);
return -ENODEV;
}
+
host_vtimer_irq = info->virtual_irq;
+ kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags);
+
+ if (kvm_vgic_global_state.no_hw_deactivation) {
+ struct fwnode_handle *fwnode;
+ struct irq_data *data;
+
+ fwnode = irq_domain_alloc_named_fwnode("kvm-timer");
+ if (!fwnode)
+ return -ENOMEM;
+
+ /* Assume both vtimer and ptimer in the same parent */
+ data = irq_get_irq_data(host_vtimer_irq);
+ domain = irq_domain_create_hierarchy(data->domain, 0,
+ NR_KVM_TIMERS, fwnode,
+ &timer_domain_ops, NULL);
+ if (!domain) {
+ irq_domain_free_fwnode(fwnode);
+ return -ENOMEM;
+ }
+
+ arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
+ WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
+ (void *)TIMER_VTIMER));
+ }
- host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq);
- if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH &&
- host_vtimer_irq_flags != IRQF_TRIGGER_LOW) {
- kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n",
- host_vtimer_irq);
- host_vtimer_irq_flags = IRQF_TRIGGER_LOW;
+ if (info->physical_irq > 0) {
+ host_ptimer_irq = info->physical_irq;
+ kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags);
+
+ if (domain)
+ WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq,
+ (void *)TIMER_PTIMER));
}
+ return 0;
+}
+
+int kvm_timer_hyp_init(bool has_gic)
+{
+ struct arch_timer_kvm_info *info;
+ int err;
+
+ info = arch_timer_get_kvm_info();
+ timecounter = &info->timecounter;
+
+ if (!timecounter->cc) {
+ kvm_err("kvm_arch_timer: uninitialized timecounter\n");
+ return -ENODEV;
+ }
+
+ err = kvm_irq_init(info);
+ if (err)
+ return err;
+
+ /* First, do the virtual EL1 timer irq */
+
err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
"kvm guest vtimer", kvm_get_running_vcpus());
if (err) {
@@ -1027,15 +1146,6 @@ int kvm_timer_hyp_init(bool has_gic)
/* Now let's do the physical EL1 timer irq */
if (info->physical_irq > 0) {
- host_ptimer_irq = info->physical_irq;
- host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq);
- if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH &&
- host_ptimer_irq_flags != IRQF_TRIGGER_LOW) {
- kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n",
- host_ptimer_irq);
- host_ptimer_irq_flags = IRQF_TRIGGER_LOW;
- }
-
err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler,
"kvm guest ptimer", kvm_get_running_vcpus());
if (err) {
@@ -1143,7 +1253,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
ret = kvm_vgic_map_phys_irq(vcpu,
map.direct_vtimer->host_timer_irq,
map.direct_vtimer->irq.irq,
- kvm_arch_timer_get_input_level);
+ &arch_timer_irq_ops);
if (ret)
return ret;
@@ -1151,7 +1261,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
ret = kvm_vgic_map_phys_irq(vcpu,
map.direct_ptimer->host_timer_irq,
map.direct_ptimer->irq.irq,
- kvm_arch_timer_get_input_level);
+ &arch_timer_irq_ops);
}
if (ret)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e720148232a0..e9a2b8f27792 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -93,6 +93,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = 0;
kvm->arch.return_nisv_io_abort_to_user = true;
break;
+ case KVM_CAP_ARM_MTE:
+ if (!system_supports_mte() || kvm->created_vcpus)
+ return -EINVAL;
+ r = 0;
+ kvm->arch.mte_enabled = true;
+ break;
default:
r = -EINVAL;
break;
@@ -237,6 +243,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
*/
r = 1;
break;
+ case KVM_CAP_ARM_MTE:
+ r = system_supports_mte();
+ break;
case KVM_CAP_STEAL_TIME:
r = kvm_arm_pvtime_supported();
break;
@@ -689,9 +698,22 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
vgic_v4_load(vcpu);
preempt_enable();
}
+
+ if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu))
+ kvm_pmu_handle_pmcr(vcpu,
+ __vcpu_sys_reg(vcpu, PMCR_EL0));
}
}
+static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
+{
+ if (likely(!vcpu_mode_is_32bit(vcpu)))
+ return false;
+
+ return !system_supports_32bit_el0() ||
+ static_branch_unlikely(&arm64_mismatched_32bit_el0);
+}
+
/**
* kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
* @vcpu: The VCPU pointer
@@ -877,7 +899,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* with the asymmetric AArch32 case), return to userspace with
* a fatal error.
*/
- if (!system_supports_32bit_el0() && vcpu_mode_is_32bit(vcpu)) {
+ if (vcpu_mode_is_bad_32bit(vcpu)) {
/*
* As we have caught the guest red-handed, decide that
* it isn't fit for purpose anymore by making the vcpu
@@ -1078,7 +1100,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
stage2_unmap_vm(vcpu->kvm);
else
- __flush_icache_all();
+ icache_inval_all_pou();
}
vcpu_reset_hcr(vcpu);
@@ -1350,6 +1372,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
return 0;
}
+ case KVM_ARM_MTE_COPY_TAGS: {
+ struct kvm_arm_copy_mte_tags copy_tags;
+
+ if (copy_from_user(&copy_tags, argp, sizeof(copy_tags)))
+ return -EFAULT;
+ return kvm_vm_ioctl_mte_copy_tags(kvm, &copy_tags);
+ }
default:
return -EINVAL;
}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 5cb4a1cd5603..1dfb83578277 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -28,20 +28,40 @@
#include "trace.h"
-struct kvm_stats_debugfs_item debugfs_entries[] = {
- VCPU_STAT("halt_successful_poll", halt_successful_poll),
- VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
- VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
- VCPU_STAT("halt_wakeup", halt_wakeup),
- VCPU_STAT("hvc_exit_stat", hvc_exit_stat),
- VCPU_STAT("wfe_exit_stat", wfe_exit_stat),
- VCPU_STAT("wfi_exit_stat", wfi_exit_stat),
- VCPU_STAT("mmio_exit_user", mmio_exit_user),
- VCPU_STAT("mmio_exit_kernel", mmio_exit_kernel),
- VCPU_STAT("exits", exits),
- VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
- VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
- { NULL }
+const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+ KVM_GENERIC_VM_STATS()
+};
+static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
+ sizeof(struct kvm_vm_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vm_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vm_stats_desc),
+};
+
+const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+ KVM_GENERIC_VCPU_STATS(),
+ STATS_DESC_COUNTER(VCPU, hvc_exit_stat),
+ STATS_DESC_COUNTER(VCPU, wfe_exit_stat),
+ STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
+ STATS_DESC_COUNTER(VCPU, mmio_exit_user),
+ STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
+ STATS_DESC_COUNTER(VCPU, exits)
+};
+static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
+ sizeof(struct kvm_vcpu_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vcpu_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vcpu_stats_desc),
};
static bool core_reg_offset_is_vreg(u64 off)
@@ -995,3 +1015,89 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
return ret;
}
+
+long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
+ struct kvm_arm_copy_mte_tags *copy_tags)
+{
+ gpa_t guest_ipa = copy_tags->guest_ipa;
+ size_t length = copy_tags->length;
+ void __user *tags = copy_tags->addr;
+ gpa_t gfn;
+ bool write = !(copy_tags->flags & KVM_ARM_TAGS_FROM_GUEST);
+ int ret = 0;
+
+ if (!kvm_has_mte(kvm))
+ return -EINVAL;
+
+ if (copy_tags->reserved[0] || copy_tags->reserved[1])
+ return -EINVAL;
+
+ if (copy_tags->flags & ~KVM_ARM_TAGS_FROM_GUEST)
+ return -EINVAL;
+
+ if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK)
+ return -EINVAL;
+
+ gfn = gpa_to_gfn(guest_ipa);
+
+ mutex_lock(&kvm->slots_lock);
+
+ while (length > 0) {
+ kvm_pfn_t pfn = gfn_to_pfn_prot(kvm, gfn, write, NULL);
+ void *maddr;
+ unsigned long num_tags;
+ struct page *page;
+
+ if (is_error_noslot_pfn(pfn)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ page = pfn_to_online_page(pfn);
+ if (!page) {
+ /* Reject ZONE_DEVICE memory */
+ ret = -EFAULT;
+ goto out;
+ }
+ maddr = page_address(page);
+
+ if (!write) {
+ if (test_bit(PG_mte_tagged, &page->flags))
+ num_tags = mte_copy_tags_to_user(tags, maddr,
+ MTE_GRANULES_PER_PAGE);
+ else
+ /* No tags in memory, so write zeros */
+ num_tags = MTE_GRANULES_PER_PAGE -
+ clear_user(tags, MTE_GRANULES_PER_PAGE);
+ kvm_release_pfn_clean(pfn);
+ } else {
+ num_tags = mte_copy_tags_from_user(maddr, tags,
+ MTE_GRANULES_PER_PAGE);
+
+ /*
+ * Set the flag after checking the write
+ * completed fully
+ */
+ if (num_tags == MTE_GRANULES_PER_PAGE)
+ set_bit(PG_mte_tagged, &page->flags);
+
+ kvm_release_pfn_dirty(pfn);
+ }
+
+ if (num_tags != MTE_GRANULES_PER_PAGE) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ gfn++;
+ tags += num_tags;
+ length -= PAGE_SIZE;
+ }
+
+out:
+ mutex_unlock(&kvm->slots_lock);
+ /* If some data has been copied report the number of bytes copied */
+ if (length != copy_tags->length)
+ return copy_tags->length - length;
+ return ret;
+}
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index e831d3dfd50d..435346ea1504 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -13,6 +13,7 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
+#include <asm/kvm_mte.h>
#include <asm/kvm_ptrauth.h>
.text
@@ -51,6 +52,9 @@ alternative_else_nop_endif
add x29, x0, #VCPU_CONTEXT
+ // mte_switch_to_guest(g_ctxt, h_ctxt, tmp1)
+ mte_switch_to_guest x29, x1, x2
+
// Macro ptrauth_switch_to_guest format:
// ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
// The below macro to restore guest keys is not implemented in C code
@@ -142,6 +146,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
// when this feature is enabled for kernel code.
ptrauth_switch_to_hyp x1, x2, x3, x4, x5
+ // mte_switch_to_hyp(g_ctxt, h_ctxt, reg1)
+ mte_switch_to_hyp x1, x2, x3
+
// Restore hyp's sp_el0
restore_sp_el0 x2, x3
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index 11541b94b328..0418399e0a20 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -112,7 +112,8 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
new |= (old & PSR_C_BIT);
new |= (old & PSR_V_BIT);
- // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests)
+ if (kvm_has_mte(vcpu->kvm))
+ new |= PSR_TCO_BIT;
new |= (old & PSR_DIT_BIT);
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 5f49df4ffdd8..9aa9b73475c9 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -76,6 +76,7 @@ el1_trap:
b __guest_exit
el1_irq:
+el1_fiq:
get_vcpu_ptr x1, x0
mov x0, #ARM_EXCEPTION_IRQ
b __guest_exit
@@ -131,7 +132,6 @@ SYM_CODE_END(\label)
invalid_vector el2t_error_invalid
invalid_vector el2h_irq_invalid
invalid_vector el2h_fiq_invalid
- invalid_vector el1_fiq_invalid
.ltorg
@@ -179,12 +179,12 @@ SYM_CODE_START(__kvm_hyp_vector)
valid_vect el1_sync // Synchronous 64-bit EL1
valid_vect el1_irq // IRQ 64-bit EL1
- invalid_vect el1_fiq_invalid // FIQ 64-bit EL1
+ valid_vect el1_fiq // FIQ 64-bit EL1
valid_vect el1_error // Error 64-bit EL1
valid_vect el1_sync // Synchronous 32-bit EL1
valid_vect el1_irq // IRQ 32-bit EL1
- invalid_vect el1_fiq_invalid // FIQ 32-bit EL1
+ valid_vect el1_fiq // FIQ 32-bit EL1
valid_vect el1_error // Error 32-bit EL1
SYM_CODE_END(__kvm_hyp_vector)
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index cce43bfe158f..de7e14c862e6 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -14,6 +14,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
{
@@ -26,6 +27,16 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0);
}
+static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
+
+ if (!vcpu)
+ vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
+
+ return kvm_has_mte(kern_hyp_va(vcpu->kvm));
+}
+
static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1);
@@ -46,6 +57,11 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par();
ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1);
+ if (ctxt_has_mte(ctxt)) {
+ ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR);
+ ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1);
+ }
+
ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1);
ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR);
ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR);
@@ -107,6 +123,11 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1);
write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1);
+ if (ctxt_has_mte(ctxt)) {
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR);
+ write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1);
+ }
+
if (!has_vhe() &&
cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) &&
ctxt->__hyp_running_vcpu) {
diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
index 18a4494337bd..fb0f523d1492 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
@@ -7,7 +7,7 @@
#include <nvhe/memory.h>
#include <nvhe/spinlock.h>
-#define HYP_NO_ORDER UINT_MAX
+#define HYP_NO_ORDER USHRT_MAX
struct hyp_pool {
/*
@@ -19,48 +19,13 @@ struct hyp_pool {
struct list_head free_area[MAX_ORDER];
phys_addr_t range_start;
phys_addr_t range_end;
- unsigned int max_order;
+ unsigned short max_order;
};
-static inline void hyp_page_ref_inc(struct hyp_page *p)
-{
- struct hyp_pool *pool = hyp_page_to_pool(p);
-
- hyp_spin_lock(&pool->lock);
- p->refcount++;
- hyp_spin_unlock(&pool->lock);
-}
-
-static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
-{
- struct hyp_pool *pool = hyp_page_to_pool(p);
- int ret;
-
- hyp_spin_lock(&pool->lock);
- p->refcount--;
- ret = (p->refcount == 0);
- hyp_spin_unlock(&pool->lock);
-
- return ret;
-}
-
-static inline void hyp_set_page_refcounted(struct hyp_page *p)
-{
- struct hyp_pool *pool = hyp_page_to_pool(p);
-
- hyp_spin_lock(&pool->lock);
- if (p->refcount) {
- hyp_spin_unlock(&pool->lock);
- BUG();
- }
- p->refcount = 1;
- hyp_spin_unlock(&pool->lock);
-}
-
/* Allocation */
-void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order);
-void hyp_get_page(void *addr);
-void hyp_put_page(void *addr);
+void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order);
+void hyp_get_page(struct hyp_pool *pool, void *addr);
+void hyp_put_page(struct hyp_pool *pool, void *addr);
/* Used pages cannot be freed */
int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 42d81ec739fa..9c227d87c36d 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -23,7 +23,7 @@ extern struct host_kvm host_kvm;
int __pkvm_prot_finalize(void);
int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
-int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool);
+int kvm_host_prepare_stage2(void *pgt_pool_base);
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
static __always_inline void __load_host_stage2(void)
diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h
index fd78bde939ee..592b7edb3edb 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/memory.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h
@@ -7,12 +7,9 @@
#include <linux/types.h>
-struct hyp_pool;
struct hyp_page {
- unsigned int refcount;
- unsigned int order;
- struct hyp_pool *pool;
- struct list_head node;
+ unsigned short refcount;
+ unsigned short order;
};
extern u64 __hyp_vmemmap;
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index 0095f6289742..8ec3a5a7744b 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -78,19 +78,20 @@ static inline unsigned long hyp_s1_pgtable_pages(void)
return res;
}
-static inline unsigned long host_s2_mem_pgtable_pages(void)
+static inline unsigned long host_s2_pgtable_pages(void)
{
+ unsigned long res;
+
/*
* Include an extra 16 pages to safely upper-bound the worst case of
* concatenated pgds.
*/
- return __hyp_pgtable_total_pages() + 16;
-}
+ res = __hyp_pgtable_total_pages() + 16;
-static inline unsigned long host_s2_dev_pgtable_pages(void)
-{
/* Allow 1 GiB for MMIO mappings */
- return __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
+ res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
+
+ return res;
}
#endif /* __KVM_HYP_MM_H */
diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S
index 36cef6915428..958734f4d6b0 100644
--- a/arch/arm64/kvm/hyp/nvhe/cache.S
+++ b/arch/arm64/kvm/hyp/nvhe/cache.S
@@ -7,7 +7,7 @@
#include <asm/assembler.h>
#include <asm/alternative.h>
-SYM_FUNC_START_PI(__flush_dcache_area)
+SYM_FUNC_START_PI(dcache_clean_inval_poc)
dcache_by_line_op civac, sy, x0, x1, x2, x3
ret
-SYM_FUNC_END_PI(__flush_dcache_area)
+SYM_FUNC_END_PI(dcache_clean_inval_poc)
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 4b60c0056c04..d938ce95d3bd 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -23,8 +23,7 @@
extern unsigned long hyp_nr_cpus;
struct host_kvm host_kvm;
-static struct hyp_pool host_s2_mem;
-static struct hyp_pool host_s2_dev;
+static struct hyp_pool host_s2_pool;
/*
* Copies of the host's CPU features registers holding sanitized values.
@@ -36,7 +35,7 @@ static const u8 pkvm_hyp_id = 1;
static void *host_s2_zalloc_pages_exact(size_t size)
{
- return hyp_alloc_pages(&host_s2_mem, get_order(size));
+ return hyp_alloc_pages(&host_s2_pool, get_order(size));
}
static void *host_s2_zalloc_page(void *pool)
@@ -44,20 +43,24 @@ static void *host_s2_zalloc_page(void *pool)
return hyp_alloc_pages(pool, 0);
}
-static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool)
+static void host_s2_get_page(void *addr)
+{
+ hyp_get_page(&host_s2_pool, addr);
+}
+
+static void host_s2_put_page(void *addr)
+{
+ hyp_put_page(&host_s2_pool, addr);
+}
+
+static int prepare_s2_pool(void *pgt_pool_base)
{
unsigned long nr_pages, pfn;
int ret;
- pfn = hyp_virt_to_pfn(mem_pgt_pool);
- nr_pages = host_s2_mem_pgtable_pages();
- ret = hyp_pool_init(&host_s2_mem, pfn, nr_pages, 0);
- if (ret)
- return ret;
-
- pfn = hyp_virt_to_pfn(dev_pgt_pool);
- nr_pages = host_s2_dev_pgtable_pages();
- ret = hyp_pool_init(&host_s2_dev, pfn, nr_pages, 0);
+ pfn = hyp_virt_to_pfn(pgt_pool_base);
+ nr_pages = host_s2_pgtable_pages();
+ ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0);
if (ret)
return ret;
@@ -67,8 +70,8 @@ static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool)
.phys_to_virt = hyp_phys_to_virt,
.virt_to_phys = hyp_virt_to_phys,
.page_count = hyp_page_count,
- .get_page = hyp_get_page,
- .put_page = hyp_put_page,
+ .get_page = host_s2_get_page,
+ .put_page = host_s2_put_page,
};
return 0;
@@ -86,7 +89,7 @@ static void prepare_host_vtcr(void)
id_aa64mmfr1_el1_sys_val, phys_shift);
}
-int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool)
+int kvm_host_prepare_stage2(void *pgt_pool_base)
{
struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
int ret;
@@ -94,7 +97,7 @@ int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool)
prepare_host_vtcr();
hyp_spin_lock_init(&host_kvm.lock);
- ret = prepare_s2_pools(mem_pgt_pool, dev_pgt_pool);
+ ret = prepare_s2_pool(pgt_pool_base);
if (ret)
return ret;
@@ -199,11 +202,10 @@ static bool range_is_memory(u64 start, u64 end)
}
static inline int __host_stage2_idmap(u64 start, u64 end,
- enum kvm_pgtable_prot prot,
- struct hyp_pool *pool)
+ enum kvm_pgtable_prot prot)
{
return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start,
- prot, pool);
+ prot, &host_s2_pool);
}
static int host_stage2_idmap(u64 addr)
@@ -211,7 +213,6 @@ static int host_stage2_idmap(u64 addr)
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W;
struct kvm_mem_range range;
bool is_memory = find_mem_range(addr, &range);
- struct hyp_pool *pool = is_memory ? &host_s2_mem : &host_s2_dev;
int ret;
if (is_memory)
@@ -222,22 +223,21 @@ static int host_stage2_idmap(u64 addr)
if (ret)
goto unlock;
- ret = __host_stage2_idmap(range.start, range.end, prot, pool);
- if (is_memory || ret != -ENOMEM)
+ ret = __host_stage2_idmap(range.start, range.end, prot);
+ if (ret != -ENOMEM)
goto unlock;
/*
- * host_s2_mem has been provided with enough pages to cover all of
- * memory with page granularity, so we should never hit the ENOMEM case.
- * However, it is difficult to know how much of the MMIO range we will
- * need to cover upfront, so we may need to 'recycle' the pages if we
- * run out.
+ * The pool has been provided with enough pages to cover all of memory
+ * with page granularity, but it is difficult to know how much of the
+ * MMIO range we will need to cover upfront, so we may need to 'recycle'
+ * the pages if we run out.
*/
ret = host_stage2_unmap_dev_all();
if (ret)
goto unlock;
- ret = __host_stage2_idmap(range.start, range.end, prot, pool);
+ ret = __host_stage2_idmap(range.start, range.end, prot);
unlock:
hyp_spin_unlock(&host_kvm.lock);
@@ -258,7 +258,7 @@ int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
hyp_spin_lock(&host_kvm.lock);
ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start,
- &host_s2_mem, pkvm_hyp_id);
+ &host_s2_pool, pkvm_hyp_id);
hyp_spin_unlock(&host_kvm.lock);
return ret != -EAGAIN ? ret : 0;
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index 237e03bf0cb1..41fc25bdfb34 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -32,7 +32,7 @@ u64 __hyp_vmemmap;
*/
static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool,
struct hyp_page *p,
- unsigned int order)
+ unsigned short order)
{
phys_addr_t addr = hyp_page_to_phys(p);
@@ -51,21 +51,49 @@ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool,
/* Find a buddy page currently available for allocation */
static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool,
struct hyp_page *p,
- unsigned int order)
+ unsigned short order)
{
struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order);
- if (!buddy || buddy->order != order || list_empty(&buddy->node))
+ if (!buddy || buddy->order != order || buddy->refcount)
return NULL;
return buddy;
}
+/*
+ * Pages that are available for allocation are tracked in free-lists, so we use
+ * the pages themselves to store the list nodes to avoid wasting space. As the
+ * allocator always returns zeroed pages (which are zeroed on the hyp_put_page()
+ * path to optimize allocation speed), we also need to clean-up the list node in
+ * each page when we take it out of the list.
+ */
+static inline void page_remove_from_list(struct hyp_page *p)
+{
+ struct list_head *node = hyp_page_to_virt(p);
+
+ __list_del_entry(node);
+ memset(node, 0, sizeof(*node));
+}
+
+static inline void page_add_to_list(struct hyp_page *p, struct list_head *head)
+{
+ struct list_head *node = hyp_page_to_virt(p);
+
+ INIT_LIST_HEAD(node);
+ list_add_tail(node, head);
+}
+
+static inline struct hyp_page *node_to_page(struct list_head *node)
+{
+ return hyp_virt_to_page(node);
+}
+
static void __hyp_attach_page(struct hyp_pool *pool,
struct hyp_page *p)
{
- unsigned int order = p->order;
+ unsigned short order = p->order;
struct hyp_page *buddy;
memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order);
@@ -83,32 +111,23 @@ static void __hyp_attach_page(struct hyp_pool *pool,
break;
/* Take the buddy out of its list, and coallesce with @p */
- list_del_init(&buddy->node);
+ page_remove_from_list(buddy);
buddy->order = HYP_NO_ORDER;
p = min(p, buddy);
}
/* Mark the new head, and insert it */
p->order = order;
- list_add_tail(&p->node, &pool->free_area[order]);
-}
-
-static void hyp_attach_page(struct hyp_page *p)
-{
- struct hyp_pool *pool = hyp_page_to_pool(p);
-
- hyp_spin_lock(&pool->lock);
- __hyp_attach_page(pool, p);
- hyp_spin_unlock(&pool->lock);
+ page_add_to_list(p, &pool->free_area[order]);
}
static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
struct hyp_page *p,
- unsigned int order)
+ unsigned short order)
{
struct hyp_page *buddy;
- list_del_init(&p->node);
+ page_remove_from_list(p);
while (p->order > order) {
/*
* The buddy of order n - 1 currently has HYP_NO_ORDER as it
@@ -119,30 +138,64 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
p->order--;
buddy = __find_buddy_nocheck(pool, p, p->order);
buddy->order = p->order;
- list_add_tail(&buddy->node, &pool->free_area[buddy->order]);
+ page_add_to_list(buddy, &pool->free_area[buddy->order]);
}
return p;
}
-void hyp_put_page(void *addr)
+static inline void hyp_page_ref_inc(struct hyp_page *p)
{
- struct hyp_page *p = hyp_virt_to_page(addr);
+ BUG_ON(p->refcount == USHRT_MAX);
+ p->refcount++;
+}
+static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
+{
+ p->refcount--;
+ return (p->refcount == 0);
+}
+
+static inline void hyp_set_page_refcounted(struct hyp_page *p)
+{
+ BUG_ON(p->refcount);
+ p->refcount = 1;
+}
+
+static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p)
+{
if (hyp_page_ref_dec_and_test(p))
- hyp_attach_page(p);
+ __hyp_attach_page(pool, p);
+}
+
+/*
+ * Changes to the buddy tree and page refcounts must be done with the hyp_pool
+ * lock held. If a refcount change requires an update to the buddy tree (e.g.
+ * hyp_put_page()), both operations must be done within the same critical
+ * section to guarantee transient states (e.g. a page with null refcount but
+ * not yet attached to a free list) can't be observed by well-behaved readers.
+ */
+void hyp_put_page(struct hyp_pool *pool, void *addr)
+{
+ struct hyp_page *p = hyp_virt_to_page(addr);
+
+ hyp_spin_lock(&pool->lock);
+ __hyp_put_page(pool, p);
+ hyp_spin_unlock(&pool->lock);
}
-void hyp_get_page(void *addr)
+void hyp_get_page(struct hyp_pool *pool, void *addr)
{
struct hyp_page *p = hyp_virt_to_page(addr);
+ hyp_spin_lock(&pool->lock);
hyp_page_ref_inc(p);
+ hyp_spin_unlock(&pool->lock);
}
-void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order)
+void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order)
{
- unsigned int i = order;
+ unsigned short i = order;
struct hyp_page *p;
hyp_spin_lock(&pool->lock);
@@ -156,11 +209,11 @@ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order)
}
/* Extract it from the tree at the right order */
- p = list_first_entry(&pool->free_area[i], struct hyp_page, node);
+ p = node_to_page(pool->free_area[i].next);
p = __hyp_extract_page(pool, p, order);
- hyp_spin_unlock(&pool->lock);
hyp_set_page_refcounted(p);
+ hyp_spin_unlock(&pool->lock);
return hyp_page_to_virt(p);
}
@@ -181,15 +234,14 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
/* Init the vmemmap portion */
p = hyp_phys_to_page(phys);
- memset(p, 0, sizeof(*p) * nr_pages);
for (i = 0; i < nr_pages; i++) {
- p[i].pool = pool;
- INIT_LIST_HEAD(&p[i].node);
+ p[i].order = 0;
+ hyp_set_page_refcounted(&p[i]);
}
/* Attach the unused pages to the buddy tree */
for (i = reserved_pages; i < nr_pages; i++)
- __hyp_attach_page(pool, &p[i]);
+ __hyp_put_page(pool, &p[i]);
return 0;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index a3d3a275344e..0b574d106519 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -24,8 +24,7 @@ unsigned long hyp_nr_cpus;
static void *vmemmap_base;
static void *hyp_pgt_base;
-static void *host_s2_mem_pgt_base;
-static void *host_s2_dev_pgt_base;
+static void *host_s2_pgt_base;
static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
static int divide_memory_pool(void *virt, unsigned long size)
@@ -45,14 +44,9 @@ static int divide_memory_pool(void *virt, unsigned long size)
if (!hyp_pgt_base)
return -ENOMEM;
- nr_pages = host_s2_mem_pgtable_pages();
- host_s2_mem_pgt_base = hyp_early_alloc_contig(nr_pages);
- if (!host_s2_mem_pgt_base)
- return -ENOMEM;
-
- nr_pages = host_s2_dev_pgtable_pages();
- host_s2_dev_pgt_base = hyp_early_alloc_contig(nr_pages);
- if (!host_s2_dev_pgt_base)
+ nr_pages = host_s2_pgtable_pages();
+ host_s2_pgt_base = hyp_early_alloc_contig(nr_pages);
+ if (!host_s2_pgt_base)
return -ENOMEM;
return 0;
@@ -134,7 +128,8 @@ static void update_nvhe_init_params(void)
for (i = 0; i < hyp_nr_cpus; i++) {
params = per_cpu_ptr(&kvm_init_params, i);
params->pgd_pa = __hyp_pa(pkvm_pgtable.pgd);
- __flush_dcache_area(params, sizeof(*params));
+ dcache_clean_inval_poc((unsigned long)params,
+ (unsigned long)params + sizeof(*params));
}
}
@@ -143,6 +138,16 @@ static void *hyp_zalloc_hyp_page(void *arg)
return hyp_alloc_pages(&hpool, 0);
}
+static void hpool_get_page(void *addr)
+{
+ hyp_get_page(&hpool, addr);
+}
+
+static void hpool_put_page(void *addr)
+{
+ hyp_put_page(&hpool, addr);
+}
+
void __noreturn __pkvm_init_finalise(void)
{
struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
@@ -158,7 +163,7 @@ void __noreturn __pkvm_init_finalise(void)
if (ret)
goto out;
- ret = kvm_host_prepare_stage2(host_s2_mem_pgt_base, host_s2_dev_pgt_base);
+ ret = kvm_host_prepare_stage2(host_s2_pgt_base);
if (ret)
goto out;
@@ -166,8 +171,8 @@ void __noreturn __pkvm_init_finalise(void)
.zalloc_page = hyp_zalloc_hyp_page,
.phys_to_virt = hyp_phys_to_virt,
.virt_to_phys = hyp_virt_to_phys,
- .get_page = hyp_get_page,
- .put_page = hyp_put_page,
+ .get_page = hpool_get_page,
+ .put_page = hpool_put_page,
};
pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops;
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index 83dc3b271bc5..38ed0f6f2703 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -104,7 +104,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
* you should be running with VHE enabled.
*/
if (icache_is_vpipt())
- __flush_icache_all();
+ icache_inval_all_pou();
__tlb_switch_to_host(&cxt);
}
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index c37c1dc4feaf..05321f4165e3 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -577,12 +577,24 @@ static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr,
mm_ops->put_page(ptep);
}
+static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
+{
+ u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
+ return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
+}
+
+static bool stage2_pte_executable(kvm_pte_t pte)
+{
+ return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
+}
+
static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep,
struct stage2_map_data *data)
{
kvm_pte_t new, old = *ptep;
u64 granule = kvm_granule_size(level), phys = data->phys;
+ struct kvm_pgtable *pgt = data->mmu->pgt;
struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
if (!kvm_block_mapping_supported(addr, end, phys, level))
@@ -606,6 +618,14 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
}
+ /* Perform CMOs before installation of the guest stage-2 PTE */
+ if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new))
+ mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops),
+ granule);
+
+ if (mm_ops->icache_inval_pou && stage2_pte_executable(new))
+ mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);
+
smp_store_release(ptep, new);
if (stage2_pte_is_counted(new))
mm_ops->get_page(ptep);
@@ -798,12 +818,6 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
return ret;
}
-static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
-{
- u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
- return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
-}
-
static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag,
void * const arg)
@@ -839,8 +853,11 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
stage2_put_pte(ptep, mmu, addr, level, mm_ops);
if (need_flush) {
- __flush_dcache_area(kvm_pte_follow(pte, mm_ops),
- kvm_granule_size(level));
+ kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops);
+
+ dcache_clean_inval_poc((unsigned long)pte_follow,
+ (unsigned long)pte_follow +
+ kvm_granule_size(level));
}
if (childp)
@@ -861,10 +878,11 @@ int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
}
struct stage2_attr_data {
- kvm_pte_t attr_set;
- kvm_pte_t attr_clr;
- kvm_pte_t pte;
- u32 level;
+ kvm_pte_t attr_set;
+ kvm_pte_t attr_clr;
+ kvm_pte_t pte;
+ u32 level;
+ struct kvm_pgtable_mm_ops *mm_ops;
};
static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
@@ -873,6 +891,7 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
{
kvm_pte_t pte = *ptep;
struct stage2_attr_data *data = arg;
+ struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
if (!kvm_pte_valid(pte))
return 0;
@@ -887,8 +906,17 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
* but worst-case the access flag update gets lost and will be
* set on the next access instead.
*/
- if (data->pte != pte)
+ if (data->pte != pte) {
+ /*
+ * Invalidate instruction cache before updating the guest
+ * stage-2 PTE if we are going to add executable permission.
+ */
+ if (mm_ops->icache_inval_pou &&
+ stage2_pte_executable(pte) && !stage2_pte_executable(*ptep))
+ mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops),
+ kvm_granule_size(level));
WRITE_ONCE(*ptep, pte);
+ }
return 0;
}
@@ -903,6 +931,7 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
struct stage2_attr_data data = {
.attr_set = attr_set & attr_mask,
.attr_clr = attr_clr & attr_mask,
+ .mm_ops = pgt->mm_ops,
};
struct kvm_pgtable_walker walker = {
.cb = stage2_attr_walker,
@@ -988,11 +1017,15 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
struct kvm_pgtable *pgt = arg;
struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
kvm_pte_t pte = *ptep;
+ kvm_pte_t *pte_follow;
if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte))
return 0;
- __flush_dcache_area(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level));
+ pte_follow = kvm_pte_follow(pte, mm_ops);
+ dcache_clean_inval_poc((unsigned long)pte_follow,
+ (unsigned long)pte_follow +
+ kvm_granule_size(level));
return 0;
}
diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/hyp/reserved_mem.c
index 83ca23ac259b..d654921dd09b 100644
--- a/arch/arm64/kvm/hyp/reserved_mem.c
+++ b/arch/arm64/kvm/hyp/reserved_mem.c
@@ -71,8 +71,7 @@ void __init kvm_hyp_reserve(void)
}
hyp_mem_pages += hyp_s1_pgtable_pages();
- hyp_mem_pages += host_s2_mem_pgtable_pages();
- hyp_mem_pages += host_s2_dev_pgtable_pages();
+ hyp_mem_pages += host_s2_pgtable_pages();
/*
* The hyp_vmemmap needs to be backed by pages, but these pages
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 5e0d40f9fb86..3155c9e778f0 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -126,6 +126,16 @@ static void *kvm_host_va(phys_addr_t phys)
return __va(phys);
}
+static void clean_dcache_guest_page(void *va, size_t size)
+{
+ __clean_dcache_guest_page(va, size);
+}
+
+static void invalidate_icache_guest_page(void *va, size_t size)
+{
+ __invalidate_icache_guest_page(va, size);
+}
+
/*
* Unmapping vs dcache management:
*
@@ -432,6 +442,8 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
.page_count = kvm_host_page_count,
.phys_to_virt = kvm_host_va,
.virt_to_phys = kvm_host_pa,
+ .dcache_clean_inval_poc = clean_dcache_guest_page,
+ .icache_inval_pou = invalidate_icache_guest_page,
};
/**
@@ -693,16 +705,6 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
}
-static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
-{
- __clean_dcache_guest_page(pfn, size);
-}
-
-static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size)
-{
- __invalidate_icache_guest_page(pfn, size);
-}
-
static void kvm_send_hwpoison_signal(unsigned long address, short lsb)
{
send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current);
@@ -822,6 +824,74 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
return PAGE_SIZE;
}
+static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
+{
+ unsigned long pa;
+
+ if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP))
+ return huge_page_shift(hstate_vma(vma));
+
+ if (!(vma->vm_flags & VM_PFNMAP))
+ return PAGE_SHIFT;
+
+ VM_BUG_ON(is_vm_hugetlb_page(vma));
+
+ pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start);
+
+#ifndef __PAGETABLE_PMD_FOLDED
+ if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) &&
+ ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start &&
+ ALIGN(hva, PUD_SIZE) <= vma->vm_end)
+ return PUD_SHIFT;
+#endif
+
+ if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) &&
+ ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start &&
+ ALIGN(hva, PMD_SIZE) <= vma->vm_end)
+ return PMD_SHIFT;
+
+ return PAGE_SHIFT;
+}
+
+/*
+ * The page will be mapped in stage 2 as Normal Cacheable, so the VM will be
+ * able to see the page's tags and therefore they must be initialised first. If
+ * PG_mte_tagged is set, tags have already been initialised.
+ *
+ * The race in the test/set of the PG_mte_tagged flag is handled by:
+ * - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs
+ * racing to santise the same page
+ * - mmap_lock protects between a VM faulting a page in and the VMM performing
+ * an mprotect() to add VM_MTE
+ */
+static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
+ unsigned long size)
+{
+ unsigned long i, nr_pages = size >> PAGE_SHIFT;
+ struct page *page;
+
+ if (!kvm_has_mte(kvm))
+ return 0;
+
+ /*
+ * pfn_to_online_page() is used to reject ZONE_DEVICE pages
+ * that may not support tags.
+ */
+ page = pfn_to_online_page(pfn);
+
+ if (!page)
+ return -EFAULT;
+
+ for (i = 0; i < nr_pages; i++, page++) {
+ if (!test_bit(PG_mte_tagged, &page->flags)) {
+ mte_clear_page_tags(page_address(page));
+ set_bit(PG_mte_tagged, &page->flags);
+ }
+ }
+
+ return 0;
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_memory_slot *memslot, unsigned long hva,
unsigned long fault_status)
@@ -830,6 +900,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
bool write_fault, writable, force_pte = false;
bool exec_fault;
bool device = false;
+ bool shared;
unsigned long mmu_seq;
struct kvm *kvm = vcpu->kvm;
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
@@ -853,7 +924,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
}
- /* Let's check if we will get back a huge page backed by hugetlbfs */
+ /*
+ * Let's check if we will get back a huge page backed by hugetlbfs, or
+ * get block mapping for device MMIO region.
+ */
mmap_read_lock(current->mm);
vma = vma_lookup(current->mm, hva);
if (unlikely(!vma)) {
@@ -862,17 +936,19 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
}
- if (is_vm_hugetlb_page(vma))
- vma_shift = huge_page_shift(hstate_vma(vma));
- else
- vma_shift = PAGE_SHIFT;
-
- if (logging_active ||
- (vma->vm_flags & VM_PFNMAP)) {
+ /*
+ * logging_active is guaranteed to never be true for VM_PFNMAP
+ * memslots.
+ */
+ if (logging_active) {
force_pte = true;
vma_shift = PAGE_SHIFT;
+ } else {
+ vma_shift = get_vma_page_shift(vma, hva);
}
+ shared = (vma->vm_flags & VM_PFNMAP);
+
switch (vma_shift) {
#ifndef __PAGETABLE_PMD_FOLDED
case PUD_SHIFT:
@@ -943,8 +1019,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
if (kvm_is_device_pfn(pfn)) {
+ /*
+ * If the page was identified as device early by looking at
+ * the VMA flags, vma_pagesize is already representing the
+ * largest quantity we can map. If instead it was mapped
+ * via gfn_to_pfn_prot(), vma_pagesize is set to PAGE_SIZE
+ * and must not be upgraded.
+ *
+ * In both cases, we don't let transparent_hugepage_adjust()
+ * change things at the last minute.
+ */
device = true;
- force_pte = true;
} else if (logging_active && !write_fault) {
/*
* Only actually map the page as writable if this was a write
@@ -965,19 +1050,25 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* If we are not forced to use page mapping, check if we are
* backed by a THP and thus use block mapping if possible.
*/
- if (vma_pagesize == PAGE_SIZE && !force_pte)
+ if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
vma_pagesize = transparent_hugepage_adjust(memslot, hva,
&pfn, &fault_ipa);
+
+ if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
+ /* Check the VMM hasn't introduced a new VM_SHARED VMA */
+ if (!shared)
+ ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
+ else
+ ret = -EFAULT;
+ if (ret)
+ goto out_unlock;
+ }
+
if (writable)
prot |= KVM_PGTABLE_PROT_W;
- if (fault_status != FSC_PERM && !device)
- clean_dcache_guest_page(pfn, vma_pagesize);
-
- if (exec_fault) {
+ if (exec_fault)
prot |= KVM_PGTABLE_PROT_X;
- invalidate_icache_guest_page(pfn, vma_pagesize);
- }
if (device)
prot |= KVM_PGTABLE_PROT_DEVICE;
@@ -1168,19 +1259,22 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
kvm_pfn_t pfn = pte_pfn(range->pte);
+ int ret;
if (!kvm->arch.mmu.pgt)
return false;
WARN_ON(range->end - range->start != 1);
- /*
- * We've moved a page around, probably through CoW, so let's treat it
- * just like a translation fault and clean the cache to the PoC.
- */
- clean_dcache_guest_page(pfn, PAGE_SIZE);
+ ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE);
+ if (ret)
+ return false;
/*
+ * We've moved a page around, probably through CoW, so let's treat
+ * it just like a translation fault and the map handler will clean
+ * the cache to the PoC.
+ *
* The MMU notifiers will have unmapped a huge PMD before calling
* ->change_pte() (which in turn calls kvm_set_spte_gfn()) and
* therefore we never need to clear out a huge PMD through this
@@ -1346,7 +1440,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
{
hva_t hva = mem->userspace_addr;
hva_t reg_end = hva + mem->memory_size;
- bool writable = !(mem->flags & KVM_MEM_READONLY);
int ret = 0;
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
@@ -1363,8 +1456,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
mmap_read_lock(current->mm);
/*
* A memory region could potentially cover multiple VMAs, and any holes
- * between them, so iterate over all of them to find out if we can map
- * any of them right now.
+ * between them, so iterate over all of them.
*
* +--------------------------------------------+
* +---------------+----------------+ +----------------+
@@ -1375,51 +1467,29 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
*/
do {
struct vm_area_struct *vma;
- hva_t vm_start, vm_end;
vma = find_vma_intersection(current->mm, hva, reg_end);
if (!vma)
break;
/*
- * Take the intersection of this VMA with the memory region
+ * VM_SHARED mappings are not allowed with MTE to avoid races
+ * when updating the PG_mte_tagged page flag, see
+ * sanitise_mte_tags for more details.
*/
- vm_start = max(hva, vma->vm_start);
- vm_end = min(reg_end, vma->vm_end);
+ if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED)
+ return -EINVAL;
if (vma->vm_flags & VM_PFNMAP) {
- gpa_t gpa = mem->guest_phys_addr +
- (vm_start - mem->userspace_addr);
- phys_addr_t pa;
-
- pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
- pa += vm_start - vma->vm_start;
-
/* IO region dirty page logging not allowed */
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
ret = -EINVAL;
- goto out;
- }
-
- ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
- vm_end - vm_start,
- writable);
- if (ret)
break;
+ }
}
- hva = vm_end;
+ hva = min(reg_end, vma->vm_end);
} while (hva < reg_end);
- if (change == KVM_MR_FLAGS_ONLY)
- goto out;
-
- spin_lock(&kvm->mmu_lock);
- if (ret)
- unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size);
- else if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
- stage2_flush_memslot(kvm, memslot);
- spin_unlock(&kvm->mmu_lock);
-out:
mmap_read_unlock(current->mm);
return ret;
}
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index fd167d4f4215..f33825c995cb 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -578,6 +578,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
if (val & ARMV8_PMU_PMCR_P) {
+ mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
for_each_set_bit(i, &mask, 32)
kvm_pmu_set_counter_value(vcpu, i, 0);
}
@@ -850,6 +851,9 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
return -EINVAL;
}
+ /* One-off reload of the PMU on first run */
+ kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
+
return 0;
}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index d37ebee085cf..cba7872d69a8 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -176,6 +176,10 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
return false;
+ /* MTE is incompatible with AArch32 */
+ if (kvm_has_mte(vcpu->kvm) && is32bit)
+ return false;
+
/* Check that the vcpus are either all 32bit or all 64bit */
kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1a7968ad078c..f6f126eb6ac1 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1047,6 +1047,13 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
break;
case SYS_ID_AA64PFR1_EL1:
val &= ~FEATURE(ID_AA64PFR1_MTE);
+ if (kvm_has_mte(vcpu->kvm)) {
+ u64 pfr, mte;
+
+ pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
+ mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT);
+ val |= FIELD_PREP(FEATURE(ID_AA64PFR1_MTE), mte);
+ }
break;
case SYS_ID_AA64ISAR1_EL1:
if (!vcpu_has_ptrauth(vcpu))
@@ -1302,6 +1309,23 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return true;
}
+static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (kvm_has_mte(vcpu->kvm))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
+#define MTE_REG(name) { \
+ SYS_DESC(SYS_##name), \
+ .access = undef_access, \
+ .reset = reset_unknown, \
+ .reg = name, \
+ .visibility = mte_visibility, \
+}
+
/* sys_reg_desc initialiser for known cpufeature ID registers */
#define ID_SANITISED(name) { \
SYS_DESC(SYS_##name), \
@@ -1470,8 +1494,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
- { SYS_DESC(SYS_RGSR_EL1), undef_access },
- { SYS_DESC(SYS_GCR_EL1), undef_access },
+ MTE_REG(RGSR_EL1),
+ MTE_REG(GCR_EL1),
{ SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
{ SYS_DESC(SYS_TRFCR_EL1), undef_access },
@@ -1498,8 +1522,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
{ SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
- { SYS_DESC(SYS_TFSR_EL1), undef_access },
- { SYS_DESC(SYS_TFSRE0_EL1), undef_access },
+ MTE_REG(TFSR_EL1),
+ MTE_REG(TFSRE0_EL1),
{ SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },
{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 58cbda00e56d..340c51d87677 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -482,6 +482,16 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
return IRQ_HANDLED;
}
+static struct gic_kvm_info *gic_kvm_info;
+
+void __init vgic_set_kvm_info(const struct gic_kvm_info *info)
+{
+ BUG_ON(gic_kvm_info != NULL);
+ gic_kvm_info = kmalloc(sizeof(*info), GFP_KERNEL);
+ if (gic_kvm_info)
+ *gic_kvm_info = *info;
+}
+
/**
* kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware
*
@@ -509,18 +519,29 @@ void kvm_vgic_init_cpu_hardware(void)
*/
int kvm_vgic_hyp_init(void)
{
- const struct gic_kvm_info *gic_kvm_info;
+ bool has_mask;
int ret;
- gic_kvm_info = gic_get_kvm_info();
if (!gic_kvm_info)
return -ENODEV;
- if (!gic_kvm_info->maint_irq) {
+ has_mask = !gic_kvm_info->no_maint_irq_mask;
+
+ if (has_mask && !gic_kvm_info->maint_irq) {
kvm_err("No vgic maintenance irq\n");
return -ENXIO;
}
+ /*
+ * If we get one of these oddball non-GICs, taint the kernel,
+ * as we have no idea of how they *really* behave.
+ */
+ if (gic_kvm_info->no_hw_deactivation) {
+ kvm_info("Non-architectural vgic, tainting kernel\n");
+ add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+ kvm_vgic_global_state.no_hw_deactivation = true;
+ }
+
switch (gic_kvm_info->type) {
case GIC_V2:
ret = vgic_v2_probe(gic_kvm_info);
@@ -536,10 +557,17 @@ int kvm_vgic_hyp_init(void)
ret = -ENODEV;
}
+ kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;
+
+ kfree(gic_kvm_info);
+ gic_kvm_info = NULL;
+
if (ret)
return ret;
- kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;
+ if (!has_mask)
+ return 0;
+
ret = request_percpu_irq(kvm_vgic_global_state.maint_irq,
vgic_maintenance_handler,
"vgic", kvm_get_running_vcpus());
diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c
index 11934c2af2f4..2c580204f1dc 100644
--- a/arch/arm64/kvm/vgic/vgic-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-v2.c
@@ -108,11 +108,22 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
* If this causes us to lower the level, we have to also clear
* the physical active state, since we will otherwise never be
* told when the interrupt becomes asserted again.
+ *
+ * Another case is when the interrupt requires a helping hand
+ * on deactivation (no HW deactivation, for example).
*/
- if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) {
- irq->line_level = vgic_get_phys_line_level(irq);
+ if (vgic_irq_is_mapped_level(irq)) {
+ bool resample = false;
+
+ if (val & GICH_LR_PENDING_BIT) {
+ irq->line_level = vgic_get_phys_line_level(irq);
+ resample = !irq->line_level;
+ } else if (vgic_irq_needs_resampling(irq) &&
+ !(irq->active || irq->pending_latch)) {
+ resample = true;
+ }
- if (!irq->line_level)
+ if (resample)
vgic_irq_set_phys_active(irq, false);
}
@@ -152,7 +163,7 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
if (irq->group)
val |= GICH_LR_GROUP1;
- if (irq->hw) {
+ if (irq->hw && !vgic_irq_needs_resampling(irq)) {
val |= GICH_LR_HW;
val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
/*
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 41ecf219c333..66004f61cd83 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -101,11 +101,22 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
* If this causes us to lower the level, we have to also clear
* the physical active state, since we will otherwise never be
* told when the interrupt becomes asserted again.
+ *
+ * Another case is when the interrupt requires a helping hand
+ * on deactivation (no HW deactivation, for example).
*/
- if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) {
- irq->line_level = vgic_get_phys_line_level(irq);
+ if (vgic_irq_is_mapped_level(irq)) {
+ bool resample = false;
+
+ if (val & ICH_LR_PENDING_BIT) {
+ irq->line_level = vgic_get_phys_line_level(irq);
+ resample = !irq->line_level;
+ } else if (vgic_irq_needs_resampling(irq) &&
+ !(irq->active || irq->pending_latch)) {
+ resample = true;
+ }
- if (!irq->line_level)
+ if (resample)
vgic_irq_set_phys_active(irq, false);
}
@@ -136,7 +147,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
}
}
- if (irq->hw) {
+ if (irq->hw && !vgic_irq_needs_resampling(irq)) {
val |= ICH_LR_HW;
val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
/*
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 15b666200f0b..111bff47e471 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -182,8 +182,8 @@ bool vgic_get_phys_line_level(struct vgic_irq *irq)
BUG_ON(!irq->hw);
- if (irq->get_input_level)
- return irq->get_input_level(irq->intid);
+ if (irq->ops && irq->ops->get_input_level)
+ return irq->ops->get_input_level(irq->intid);
WARN_ON(irq_get_irqchip_state(irq->host_irq,
IRQCHIP_STATE_PENDING,
@@ -480,7 +480,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
/* @irq->irq_lock must be held */
static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
unsigned int host_irq,
- bool (*get_input_level)(int vindid))
+ struct irq_ops *ops)
{
struct irq_desc *desc;
struct irq_data *data;
@@ -500,7 +500,7 @@ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
irq->hw = true;
irq->host_irq = host_irq;
irq->hwintid = data->hwirq;
- irq->get_input_level = get_input_level;
+ irq->ops = ops;
return 0;
}
@@ -509,11 +509,11 @@ static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
{
irq->hw = false;
irq->hwintid = 0;
- irq->get_input_level = NULL;
+ irq->ops = NULL;
}
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
- u32 vintid, bool (*get_input_level)(int vindid))
+ u32 vintid, struct irq_ops *ops)
{
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
unsigned long flags;
@@ -522,7 +522,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
BUG_ON(!irq);
raw_spin_lock_irqsave(&irq->irq_lock, flags);
- ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level);
+ ret = kvm_vgic_map_irq(vcpu, irq, host_irq, ops);
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index d31e1169d9b8..6dd56a49790a 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
lib-y := clear_user.o delay.o copy_from_user.o \
copy_to_user.o copy_in_user.o copy_page.o \
- clear_page.o csum.o memchr.o memcpy.o memmove.o \
+ clear_page.o csum.o insn.o memchr.o memcpy.o \
memset.o memcmp.o strcmp.o strncmp.o strlen.o \
strnlen.o strchr.o strrchr.o tishift.o
@@ -18,3 +18,5 @@ obj-$(CONFIG_CRC32) += crc32.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
obj-$(CONFIG_ARM64_MTE) += mte.o
+
+obj-$(CONFIG_KASAN_SW_TAGS) += kasan_sw_tags.o
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index af9afcbec92c..a7efb2ad2a1c 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -1,12 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Based on arch/arm/lib/clear_user.S
- *
- * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2021 Arm Ltd.
*/
-#include <linux/linkage.h>
-#include <asm/asm-uaccess.h>
+#include <linux/linkage.h>
#include <asm/assembler.h>
.text
@@ -19,25 +16,33 @@
*
* Alignment fixed up by hardware.
*/
+
+ .p2align 4
+ // Alignment is for the loop, but since the prologue (including BTI)
+ // is also 16 bytes we can keep any padding outside the function
SYM_FUNC_START(__arch_clear_user)
- mov x2, x1 // save the size for fixup return
+ add x2, x0, x1
subs x1, x1, #8
b.mi 2f
1:
-user_ldst 9f, sttr, xzr, x0, 8
+USER(9f, sttr xzr, [x0])
+ add x0, x0, #8
subs x1, x1, #8
- b.pl 1b
-2: adds x1, x1, #4
- b.mi 3f
-user_ldst 9f, sttr, wzr, x0, 4
- sub x1, x1, #4
-3: adds x1, x1, #2
- b.mi 4f
-user_ldst 9f, sttrh, wzr, x0, 2
- sub x1, x1, #2
-4: adds x1, x1, #1
- b.mi 5f
-user_ldst 9f, sttrb, wzr, x0, 0
+ b.hi 1b
+USER(9f, sttr xzr, [x2, #-8])
+ mov x0, #0
+ ret
+
+2: tbz x1, #2, 3f
+USER(9f, sttr wzr, [x0])
+USER(8f, sttr wzr, [x2, #-4])
+ mov x0, #0
+ ret
+
+3: tbz x1, #1, 4f
+USER(9f, sttrh wzr, [x0])
+4: tbz x1, #0, 5f
+USER(7f, sttrb wzr, [x2, #-1])
5: mov x0, #0
ret
SYM_FUNC_END(__arch_clear_user)
@@ -45,6 +50,8 @@ EXPORT_SYMBOL(__arch_clear_user)
.section .fixup,"ax"
.align 2
-9: mov x0, x2 // return the original size
+7: sub x0, x2, #5 // Adjust for faulting on the final byte...
+8: add x0, x0, #4 // ...or the second word of the 4-7 byte case
+9: sub x0, x2, x0
ret
.previous
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/lib/insn.c
index 6c0de2f60ea9..b506a4b1e38c 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/lib/insn.c
@@ -7,21 +7,14 @@
*/
#include <linux/bitops.h>
#include <linux/bug.h>
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/spinlock.h>
-#include <linux/stop_machine.h>
+#include <linux/printk.h>
+#include <linux/sizes.h>
#include <linux/types.h>
-#include <linux/uaccess.h>
-#include <asm/cacheflush.h>
#include <asm/debug-monitors.h>
-#include <asm/fixmap.h>
+#include <asm/errno.h>
#include <asm/insn.h>
#include <asm/kprobes.h>
-#include <asm/sections.h>
#define AARCH64_INSN_SF_BIT BIT(31)
#define AARCH64_INSN_N_BIT BIT(22)
@@ -30,7 +23,7 @@
static const int aarch64_insn_encoding_class[] = {
AARCH64_INSN_CLS_UNKNOWN,
AARCH64_INSN_CLS_UNKNOWN,
- AARCH64_INSN_CLS_UNKNOWN,
+ AARCH64_INSN_CLS_SVE,
AARCH64_INSN_CLS_UNKNOWN,
AARCH64_INSN_CLS_LDST,
AARCH64_INSN_CLS_DP_REG,
@@ -83,81 +76,6 @@ bool aarch64_insn_is_branch_imm(u32 insn)
aarch64_insn_is_bcond(insn));
}
-static DEFINE_RAW_SPINLOCK(patch_lock);
-
-static bool is_exit_text(unsigned long addr)
-{
- /* discarded with init text/data */
- return system_state < SYSTEM_RUNNING &&
- addr >= (unsigned long)__exittext_begin &&
- addr < (unsigned long)__exittext_end;
-}
-
-static bool is_image_text(unsigned long addr)
-{
- return core_kernel_text(addr) || is_exit_text(addr);
-}
-
-static void __kprobes *patch_map(void *addr, int fixmap)
-{
- unsigned long uintaddr = (uintptr_t) addr;
- bool image = is_image_text(uintaddr);
- struct page *page;
-
- if (image)
- page = phys_to_page(__pa_symbol(addr));
- else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
- page = vmalloc_to_page(addr);
- else
- return addr;
-
- BUG_ON(!page);
- return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
- (uintaddr & ~PAGE_MASK));
-}
-
-static void __kprobes patch_unmap(int fixmap)
-{
- clear_fixmap(fixmap);
-}
-/*
- * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always
- * little-endian.
- */
-int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
-{
- int ret;
- __le32 val;
-
- ret = copy_from_kernel_nofault(&val, addr, AARCH64_INSN_SIZE);
- if (!ret)
- *insnp = le32_to_cpu(val);
-
- return ret;
-}
-
-static int __kprobes __aarch64_insn_write(void *addr, __le32 insn)
-{
- void *waddr = addr;
- unsigned long flags = 0;
- int ret;
-
- raw_spin_lock_irqsave(&patch_lock, flags);
- waddr = patch_map(addr, FIX_TEXT_POKE0);
-
- ret = copy_to_kernel_nofault(waddr, &insn, AARCH64_INSN_SIZE);
-
- patch_unmap(FIX_TEXT_POKE0);
- raw_spin_unlock_irqrestore(&patch_lock, flags);
-
- return ret;
-}
-
-int __kprobes aarch64_insn_write(void *addr, u32 insn)
-{
- return __aarch64_insn_write(addr, cpu_to_le32(insn));
-}
-
bool __kprobes aarch64_insn_uses_literal(u32 insn)
{
/* ldr/ldrsw (literal), prfm */
@@ -187,67 +105,6 @@ bool __kprobes aarch64_insn_is_branch(u32 insn)
aarch64_insn_is_bcond(insn);
}
-int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
-{
- u32 *tp = addr;
- int ret;
-
- /* A64 instructions must be word aligned */
- if ((uintptr_t)tp & 0x3)
- return -EINVAL;
-
- ret = aarch64_insn_write(tp, insn);
- if (ret == 0)
- __flush_icache_range((uintptr_t)tp,
- (uintptr_t)tp + AARCH64_INSN_SIZE);
-
- return ret;
-}
-
-struct aarch64_insn_patch {
- void **text_addrs;
- u32 *new_insns;
- int insn_cnt;
- atomic_t cpu_count;
-};
-
-static int __kprobes aarch64_insn_patch_text_cb(void *arg)
-{
- int i, ret = 0;
- struct aarch64_insn_patch *pp = arg;
-
- /* The first CPU becomes master */
- if (atomic_inc_return(&pp->cpu_count) == 1) {
- for (i = 0; ret == 0 && i < pp->insn_cnt; i++)
- ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i],
- pp->new_insns[i]);
- /* Notify other processors with an additional increment. */
- atomic_inc(&pp->cpu_count);
- } else {
- while (atomic_read(&pp->cpu_count) <= num_online_cpus())
- cpu_relax();
- isb();
- }
-
- return ret;
-}
-
-int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
-{
- struct aarch64_insn_patch patch = {
- .text_addrs = addrs,
- .new_insns = insns,
- .insn_cnt = cnt,
- .cpu_count = ATOMIC_INIT(0),
- };
-
- if (cnt <= 0)
- return -EINVAL;
-
- return stop_machine_cpuslocked(aarch64_insn_patch_text_cb, &patch,
- cpu_online_mask);
-}
-
static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type,
u32 *maskp, int *shiftp)
{
@@ -1432,104 +1289,6 @@ u32 aarch32_insn_mcr_extract_crm(u32 insn)
return insn & CRM_MASK;
}
-static bool __kprobes __check_eq(unsigned long pstate)
-{
- return (pstate & PSR_Z_BIT) != 0;
-}
-
-static bool __kprobes __check_ne(unsigned long pstate)
-{
- return (pstate & PSR_Z_BIT) == 0;
-}
-
-static bool __kprobes __check_cs(unsigned long pstate)
-{
- return (pstate & PSR_C_BIT) != 0;
-}
-
-static bool __kprobes __check_cc(unsigned long pstate)
-{
- return (pstate & PSR_C_BIT) == 0;
-}
-
-static bool __kprobes __check_mi(unsigned long pstate)
-{
- return (pstate & PSR_N_BIT) != 0;
-}
-
-static bool __kprobes __check_pl(unsigned long pstate)
-{
- return (pstate & PSR_N_BIT) == 0;
-}
-
-static bool __kprobes __check_vs(unsigned long pstate)
-{
- return (pstate & PSR_V_BIT) != 0;
-}
-
-static bool __kprobes __check_vc(unsigned long pstate)
-{
- return (pstate & PSR_V_BIT) == 0;
-}
-
-static bool __kprobes __check_hi(unsigned long pstate)
-{
- pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
- return (pstate & PSR_C_BIT) != 0;
-}
-
-static bool __kprobes __check_ls(unsigned long pstate)
-{
- pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
- return (pstate & PSR_C_BIT) == 0;
-}
-
-static bool __kprobes __check_ge(unsigned long pstate)
-{
- pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */
- return (pstate & PSR_N_BIT) == 0;
-}
-
-static bool __kprobes __check_lt(unsigned long pstate)
-{
- pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */
- return (pstate & PSR_N_BIT) != 0;
-}
-
-static bool __kprobes __check_gt(unsigned long pstate)
-{
- /*PSR_N_BIT ^= PSR_V_BIT */
- unsigned long temp = pstate ^ (pstate << 3);
-
- temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */
- return (temp & PSR_N_BIT) == 0;
-}
-
-static bool __kprobes __check_le(unsigned long pstate)
-{
- /*PSR_N_BIT ^= PSR_V_BIT */
- unsigned long temp = pstate ^ (pstate << 3);
-
- temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */
- return (temp & PSR_N_BIT) != 0;
-}
-
-static bool __kprobes __check_al(unsigned long pstate)
-{
- return true;
-}
-
-/*
- * Note that the ARMv8 ARM calls condition code 0b1111 "nv", but states that
- * it behaves identically to 0b1110 ("al").
- */
-pstate_check_t * const aarch32_opcode_cond_checks[16] = {
- __check_eq, __check_ne, __check_cs, __check_cc,
- __check_mi, __check_pl, __check_vs, __check_vc,
- __check_hi, __check_ls, __check_ge, __check_lt,
- __check_gt, __check_le, __check_al, __check_al
-};
-
static bool range_of_ones(u64 val)
{
/* Doesn't handle full ones or full zeroes */
diff --git a/arch/arm64/lib/kasan_sw_tags.S b/arch/arm64/lib/kasan_sw_tags.S
new file mode 100644
index 000000000000..5b04464c045e
--- /dev/null
+++ b/arch/arm64/lib/kasan_sw_tags.S
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Google LLC
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Report a tag mismatch detected by tag-based KASAN.
+ *
+ * A compiler-generated thunk calls this with a non-AAPCS calling
+ * convention. Upon entry to this function, registers are as follows:
+ *
+ * x0: fault address (see below for restore)
+ * x1: fault description (see below for restore)
+ * x2 to x15: callee-saved
+ * x16 to x17: safe to clobber
+ * x18 to x30: callee-saved
+ * sp: pre-decremented by 256 bytes (see below for restore)
+ *
+ * The caller has decremented the SP by 256 bytes, and created a
+ * structure on the stack as follows:
+ *
+ * sp + 0..15: x0 and x1 to be restored
+ * sp + 16..231: free for use
+ * sp + 232..247: x29 and x30 (same as in GPRs)
+ * sp + 248..255: free for use
+ *
+ * Note that this is not a struct pt_regs.
+ *
+ * To call a regular AAPCS function we must save x2 to x15 (which we can
+ * store in the gaps), and create a frame record (for which we can use
+ * x29 and x30 spilled by the caller as those match the GPRs).
+ *
+ * The caller expects x0 and x1 to be restored from the structure, and
+ * for the structure to be removed from the stack (i.e. the SP must be
+ * incremented by 256 prior to return).
+ */
+SYM_CODE_START(__hwasan_tag_mismatch)
+#ifdef BTI_C
+ BTI_C
+#endif
+ add x29, sp, #232
+ stp x2, x3, [sp, #8 * 2]
+ stp x4, x5, [sp, #8 * 4]
+ stp x6, x7, [sp, #8 * 6]
+ stp x8, x9, [sp, #8 * 8]
+ stp x10, x11, [sp, #8 * 10]
+ stp x12, x13, [sp, #8 * 12]
+ stp x14, x15, [sp, #8 * 14]
+#ifndef CONFIG_SHADOW_CALL_STACK
+ str x18, [sp, #8 * 18]
+#endif
+
+ mov x2, x30
+ bl kasan_tag_mismatch
+
+ ldp x0, x1, [sp]
+ ldp x2, x3, [sp, #8 * 2]
+ ldp x4, x5, [sp, #8 * 4]
+ ldp x6, x7, [sp, #8 * 6]
+ ldp x8, x9, [sp, #8 * 8]
+ ldp x10, x11, [sp, #8 * 10]
+ ldp x12, x13, [sp, #8 * 12]
+ ldp x14, x15, [sp, #8 * 14]
+#ifndef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [sp, #8 * 18]
+#endif
+ ldp x29, x30, [sp, #8 * 29]
+
+ /* remove the structure from the stack */
+ add sp, sp, #256
+ ret
+SYM_CODE_END(__hwasan_tag_mismatch)
+EXPORT_SYMBOL(__hwasan_tag_mismatch)
diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S
index edf6b970a277..7c2276fdab54 100644
--- a/arch/arm64/lib/memchr.S
+++ b/arch/arm64/lib/memchr.S
@@ -1,9 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Based on arch/arm/lib/memchr.S
- *
- * Copyright (C) 1995-2000 Russell King
- * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2021 Arm Ltd.
*/
#include <linux/linkage.h>
@@ -19,16 +16,60 @@
* Returns:
* x0 - address of first occurrence of 'c' or 0
*/
+
+#define L(label) .L ## label
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+
+#define srcin x0
+#define chrin w1
+#define cntin x2
+
+#define result x0
+
+#define wordcnt x3
+#define rep01 x4
+#define repchr x5
+#define cur_word x6
+#define cur_byte w6
+#define tmp x7
+#define tmp2 x8
+
+ .p2align 4
+ nop
SYM_FUNC_START_WEAK_PI(memchr)
- and w1, w1, #0xff
-1: subs x2, x2, #1
- b.mi 2f
- ldrb w3, [x0], #1
- cmp w3, w1
- b.ne 1b
- sub x0, x0, #1
+ and chrin, chrin, #0xff
+ lsr wordcnt, cntin, #3
+ cbz wordcnt, L(byte_loop)
+ mov rep01, #REP8_01
+ mul repchr, x1, rep01
+ and cntin, cntin, #7
+L(word_loop):
+ ldr cur_word, [srcin], #8
+ sub wordcnt, wordcnt, #1
+ eor cur_word, cur_word, repchr
+ sub tmp, cur_word, rep01
+ orr tmp2, cur_word, #REP8_7f
+ bics tmp, tmp, tmp2
+ b.ne L(found_word)
+ cbnz wordcnt, L(word_loop)
+L(byte_loop):
+ cbz cntin, L(not_found)
+ ldrb cur_byte, [srcin], #1
+ sub cntin, cntin, #1
+ cmp cur_byte, chrin
+ b.ne L(byte_loop)
+ sub srcin, srcin, #1
+ ret
+L(found_word):
+CPU_LE( rev tmp, tmp)
+ clz tmp, tmp
+ sub tmp, tmp, #64
+ add result, srcin, tmp, asr #3
ret
-2: mov x0, #0
+L(not_found):
+ mov result, #0
ret
SYM_FUNC_END_PI(memchr)
EXPORT_SYMBOL_NOKASAN(memchr)
diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S
index c0671e793ea9..7d956384222f 100644
--- a/arch/arm64/lib/memcmp.S
+++ b/arch/arm64/lib/memcmp.S
@@ -1,247 +1,139 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
+ * Copyright (c) 2013-2021, Arm Limited.
*
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
+ * Adapted from the original at:
+ * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/memcmp.S
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
-/*
-* compare memory areas(when two memory areas' offset are different,
-* alignment handled by the hardware)
-*
-* Parameters:
-* x0 - const memory area 1 pointer
-* x1 - const memory area 2 pointer
-* x2 - the maximal compare byte length
-* Returns:
-* x0 - a compare result, maybe less than, equal to, or greater than ZERO
-*/
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses.
+ */
+
+#define L(label) .L ## label
/* Parameters and result. */
-src1 .req x0
-src2 .req x1
-limit .req x2
-result .req x0
+#define src1 x0
+#define src2 x1
+#define limit x2
+#define result w0
/* Internal variables. */
-data1 .req x3
-data1w .req w3
-data2 .req x4
-data2w .req w4
-has_nul .req x5
-diff .req x6
-endloop .req x7
-tmp1 .req x8
-tmp2 .req x9
-tmp3 .req x10
-pos .req x11
-limit_wd .req x12
-mask .req x13
+#define data1 x3
+#define data1w w3
+#define data1h x4
+#define data2 x5
+#define data2w w5
+#define data2h x6
+#define tmp1 x7
+#define tmp2 x8
SYM_FUNC_START_WEAK_PI(memcmp)
- cbz limit, .Lret0
- eor tmp1, src1, src2
- tst tmp1, #7
- b.ne .Lmisaligned8
- ands tmp1, src1, #7
- b.ne .Lmutual_align
- sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
- lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
- /*
- * The input source addresses are at alignment boundary.
- * Directly compare eight bytes each time.
- */
-.Lloop_aligned:
- ldr data1, [src1], #8
- ldr data2, [src2], #8
-.Lstart_realigned:
- subs limit_wd, limit_wd, #1
- eor diff, data1, data2 /* Non-zero if differences found. */
- csinv endloop, diff, xzr, cs /* Last Dword or differences. */
- cbz endloop, .Lloop_aligned
-
- /* Not reached the limit, must have found a diff. */
- tbz limit_wd, #63, .Lnot_limit
-
- /* Limit % 8 == 0 => the diff is in the last 8 bytes. */
- ands limit, limit, #7
- b.eq .Lnot_limit
- /*
- * The remained bytes less than 8. It is needed to extract valid data
- * from last eight bytes of the intended memory range.
- */
- lsl limit, limit, #3 /* bytes-> bits. */
- mov mask, #~0
-CPU_BE( lsr mask, mask, limit )
-CPU_LE( lsl mask, mask, limit )
- bic data1, data1, mask
- bic data2, data2, mask
-
- orr diff, diff, mask
- b .Lnot_limit
-
-.Lmutual_align:
- /*
- * Sources are mutually aligned, but are not currently at an
- * alignment boundary. Round down the addresses and then mask off
- * the bytes that precede the start point.
- */
- bic src1, src1, #7
- bic src2, src2, #7
- ldr data1, [src1], #8
- ldr data2, [src2], #8
- /*
- * We can not add limit with alignment offset(tmp1) here. Since the
- * addition probably make the limit overflown.
- */
- sub limit_wd, limit, #1/*limit != 0, so no underflow.*/
- and tmp3, limit_wd, #7
- lsr limit_wd, limit_wd, #3
- add tmp3, tmp3, tmp1
- add limit_wd, limit_wd, tmp3, lsr #3
- add limit, limit, tmp1/* Adjust the limit for the extra. */
-
- lsl tmp1, tmp1, #3/* Bytes beyond alignment -> bits.*/
- neg tmp1, tmp1/* Bits to alignment -64. */
- mov tmp2, #~0
- /*mask off the non-intended bytes before the start address.*/
-CPU_BE( lsl tmp2, tmp2, tmp1 )/*Big-endian.Early bytes are at MSB*/
- /* Little-endian. Early bytes are at LSB. */
-CPU_LE( lsr tmp2, tmp2, tmp1 )
-
- orr data1, data1, tmp2
- orr data2, data2, tmp2
- b .Lstart_realigned
-
- /*src1 and src2 have different alignment offset.*/
-.Lmisaligned8:
- cmp limit, #8
- b.lo .Ltiny8proc /*limit < 8: compare byte by byte*/
-
- and tmp1, src1, #7
- neg tmp1, tmp1
- add tmp1, tmp1, #8/*valid length in the first 8 bytes of src1*/
- and tmp2, src2, #7
- neg tmp2, tmp2
- add tmp2, tmp2, #8/*valid length in the first 8 bytes of src2*/
- subs tmp3, tmp1, tmp2
- csel pos, tmp1, tmp2, hi /*Choose the maximum.*/
-
- sub limit, limit, pos
- /*compare the proceeding bytes in the first 8 byte segment.*/
-.Ltinycmp:
- ldrb data1w, [src1], #1
- ldrb data2w, [src2], #1
- subs pos, pos, #1
- ccmp data1w, data2w, #0, ne /* NZCV = 0b0000. */
- b.eq .Ltinycmp
- cbnz pos, 1f /*diff occurred before the last byte.*/
- cmp data1w, data2w
- b.eq .Lstart_align
-1:
- sub result, data1, data2
+ subs limit, limit, 8
+ b.lo L(less8)
+
+ ldr data1, [src1], 8
+ ldr data2, [src2], 8
+ cmp data1, data2
+ b.ne L(return)
+
+ subs limit, limit, 8
+ b.gt L(more16)
+
+ ldr data1, [src1, limit]
+ ldr data2, [src2, limit]
+ b L(return)
+
+L(more16):
+ ldr data1, [src1], 8
+ ldr data2, [src2], 8
+ cmp data1, data2
+ bne L(return)
+
+ /* Jump directly to comparing the last 16 bytes for 32 byte (or less)
+ strings. */
+ subs limit, limit, 16
+ b.ls L(last_bytes)
+
+ /* We overlap loads between 0-32 bytes at either side of SRC1 when we
+ try to align, so limit it only to strings larger than 128 bytes. */
+ cmp limit, 96
+ b.ls L(loop16)
+
+ /* Align src1 and adjust src2 with bytes not yet done. */
+ and tmp1, src1, 15
+ add limit, limit, tmp1
+ sub src1, src1, tmp1
+ sub src2, src2, tmp1
+
+ /* Loop performing 16 bytes per iteration using aligned src1.
+ Limit is pre-decremented by 16 and must be larger than zero.
+ Exit if <= 16 bytes left to do or if the data is not equal. */
+ .p2align 4
+L(loop16):
+ ldp data1, data1h, [src1], 16
+ ldp data2, data2h, [src2], 16
+ subs limit, limit, 16
+ ccmp data1, data2, 0, hi
+ ccmp data1h, data2h, 0, eq
+ b.eq L(loop16)
+
+ cmp data1, data2
+ bne L(return)
+ mov data1, data1h
+ mov data2, data2h
+ cmp data1, data2
+ bne L(return)
+
+ /* Compare last 1-16 bytes using unaligned access. */
+L(last_bytes):
+ add src1, src1, limit
+ add src2, src2, limit
+ ldp data1, data1h, [src1]
+ ldp data2, data2h, [src2]
+ cmp data1, data2
+ bne L(return)
+ mov data1, data1h
+ mov data2, data2h
+ cmp data1, data2
+
+ /* Compare data bytes and set return value to 0, -1 or 1. */
+L(return):
+#ifndef __AARCH64EB__
+ rev data1, data1
+ rev data2, data2
+#endif
+ cmp data1, data2
+L(ret_eq):
+ cset result, ne
+ cneg result, result, lo
ret
-.Lstart_align:
- lsr limit_wd, limit, #3
- cbz limit_wd, .Lremain8
-
- ands xzr, src1, #7
- b.eq .Lrecal_offset
- /*process more leading bytes to make src1 aligned...*/
- add src1, src1, tmp3 /*backwards src1 to alignment boundary*/
- add src2, src2, tmp3
- sub limit, limit, tmp3
- lsr limit_wd, limit, #3
- cbz limit_wd, .Lremain8
- /*load 8 bytes from aligned SRC1..*/
- ldr data1, [src1], #8
- ldr data2, [src2], #8
-
- subs limit_wd, limit_wd, #1
- eor diff, data1, data2 /*Non-zero if differences found.*/
- csinv endloop, diff, xzr, ne
- cbnz endloop, .Lunequal_proc
- /*How far is the current SRC2 from the alignment boundary...*/
- and tmp3, tmp3, #7
-
-.Lrecal_offset:/*src1 is aligned now..*/
- neg pos, tmp3
-.Lloopcmp_proc:
- /*
- * Divide the eight bytes into two parts. First,backwards the src2
- * to an alignment boundary,load eight bytes and compare from
- * the SRC2 alignment boundary. If all 8 bytes are equal,then start
- * the second part's comparison. Otherwise finish the comparison.
- * This special handle can garantee all the accesses are in the
- * thread/task space in avoid to overrange access.
- */
- ldr data1, [src1,pos]
- ldr data2, [src2,pos]
- eor diff, data1, data2 /* Non-zero if differences found. */
- cbnz diff, .Lnot_limit
-
- /*The second part process*/
- ldr data1, [src1], #8
- ldr data2, [src2], #8
- eor diff, data1, data2 /* Non-zero if differences found. */
- subs limit_wd, limit_wd, #1
- csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
- cbz endloop, .Lloopcmp_proc
-.Lunequal_proc:
- cbz diff, .Lremain8
-
-/* There is difference occurred in the latest comparison. */
-.Lnot_limit:
-/*
-* For little endian,reverse the low significant equal bits into MSB,then
-* following CLZ can find how many equal bits exist.
-*/
-CPU_LE( rev diff, diff )
-CPU_LE( rev data1, data1 )
-CPU_LE( rev data2, data2 )
-
- /*
- * The MS-non-zero bit of DIFF marks either the first bit
- * that is different, or the end of the significant data.
- * Shifting left now will bring the critical information into the
- * top bits.
- */
- clz pos, diff
- lsl data1, data1, pos
- lsl data2, data2, pos
- /*
- * We need to zero-extend (char is unsigned) the value and then
- * perform a signed subtraction.
- */
- lsr data1, data1, #56
- sub result, data1, data2, lsr #56
+ .p2align 4
+ /* Compare up to 8 bytes. Limit is [-8..-1]. */
+L(less8):
+ adds limit, limit, 4
+ b.lo L(less4)
+ ldr data1w, [src1], 4
+ ldr data2w, [src2], 4
+ cmp data1w, data2w
+ b.ne L(return)
+ sub limit, limit, 4
+L(less4):
+ adds limit, limit, 4
+ beq L(ret_eq)
+L(byte_loop):
+ ldrb data1w, [src1], 1
+ ldrb data2w, [src2], 1
+ subs limit, limit, 1
+ ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
+ b.eq L(byte_loop)
+ sub result, data1w, data2w
ret
-.Lremain8:
- /* Limit % 8 == 0 =>. all data are equal.*/
- ands limit, limit, #7
- b.eq .Lret0
-
-.Ltiny8proc:
- ldrb data1w, [src1], #1
- ldrb data2w, [src2], #1
- subs limit, limit, #1
-
- ccmp data1w, data2w, #0, ne /* NZCV = 0b0000. */
- b.eq .Ltiny8proc
- sub result, data1, data2
- ret
-.Lret0:
- mov result, #0
- ret
SYM_FUNC_END_PI(memcmp)
EXPORT_SYMBOL_NOKASAN(memcmp)
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
index dc8d2a216a6e..b82fd64ee1e1 100644
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -1,66 +1,252 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
+ * Copyright (c) 2012-2021, Arm Limited.
*
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
+ * Adapted from the original at:
+ * https://github.com/ARM-software/optimized-routines/blob/afd6244a1f8d9229/string/aarch64/memcpy.S
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
-#include <asm/cache.h>
-/*
- * Copy a buffer from src to dest (alignment handled by the hardware)
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses.
*
- * Parameters:
- * x0 - dest
- * x1 - src
- * x2 - n
- * Returns:
- * x0 - dest
*/
- .macro ldrb1 reg, ptr, val
- ldrb \reg, [\ptr], \val
- .endm
-
- .macro strb1 reg, ptr, val
- strb \reg, [\ptr], \val
- .endm
- .macro ldrh1 reg, ptr, val
- ldrh \reg, [\ptr], \val
- .endm
+#define L(label) .L ## label
- .macro strh1 reg, ptr, val
- strh \reg, [\ptr], \val
- .endm
+#define dstin x0
+#define src x1
+#define count x2
+#define dst x3
+#define srcend x4
+#define dstend x5
+#define A_l x6
+#define A_lw w6
+#define A_h x7
+#define B_l x8
+#define B_lw w8
+#define B_h x9
+#define C_l x10
+#define C_lw w10
+#define C_h x11
+#define D_l x12
+#define D_h x13
+#define E_l x14
+#define E_h x15
+#define F_l x16
+#define F_h x17
+#define G_l count
+#define G_h dst
+#define H_l src
+#define H_h srcend
+#define tmp1 x14
- .macro ldr1 reg, ptr, val
- ldr \reg, [\ptr], \val
- .endm
+/* This implementation handles overlaps and supports both memcpy and memmove
+ from a single entry point. It uses unaligned accesses and branchless
+ sequences to keep the code small, simple and improve performance.
- .macro str1 reg, ptr, val
- str \reg, [\ptr], \val
- .endm
+ Copies are split into 3 main cases: small copies of up to 32 bytes, medium
+ copies of up to 128 bytes, and large copies. The overhead of the overlap
+ check is negligible since it is only required for large copies.
- .macro ldp1 reg1, reg2, ptr, val
- ldp \reg1, \reg2, [\ptr], \val
- .endm
-
- .macro stp1 reg1, reg2, ptr, val
- stp \reg1, \reg2, [\ptr], \val
- .endm
+ Large copies use a software pipelined loop processing 64 bytes per iteration.
+ The destination pointer is 16-byte aligned to minimize unaligned accesses.
+ The loop tail is handled by always copying 64 bytes from the end.
+*/
+SYM_FUNC_START_ALIAS(__memmove)
+SYM_FUNC_START_WEAK_ALIAS_PI(memmove)
SYM_FUNC_START_ALIAS(__memcpy)
SYM_FUNC_START_WEAK_PI(memcpy)
-#include "copy_template.S"
+ add srcend, src, count
+ add dstend, dstin, count
+ cmp count, 128
+ b.hi L(copy_long)
+ cmp count, 32
+ b.hi L(copy32_128)
+
+ /* Small copies: 0..32 bytes. */
+ cmp count, 16
+ b.lo L(copy16)
+ ldp A_l, A_h, [src]
+ ldp D_l, D_h, [srcend, -16]
+ stp A_l, A_h, [dstin]
+ stp D_l, D_h, [dstend, -16]
+ ret
+
+ /* Copy 8-15 bytes. */
+L(copy16):
+ tbz count, 3, L(copy8)
+ ldr A_l, [src]
+ ldr A_h, [srcend, -8]
+ str A_l, [dstin]
+ str A_h, [dstend, -8]
+ ret
+
+ .p2align 3
+ /* Copy 4-7 bytes. */
+L(copy8):
+ tbz count, 2, L(copy4)
+ ldr A_lw, [src]
+ ldr B_lw, [srcend, -4]
+ str A_lw, [dstin]
+ str B_lw, [dstend, -4]
+ ret
+
+ /* Copy 0..3 bytes using a branchless sequence. */
+L(copy4):
+ cbz count, L(copy0)
+ lsr tmp1, count, 1
+ ldrb A_lw, [src]
+ ldrb C_lw, [srcend, -1]
+ ldrb B_lw, [src, tmp1]
+ strb A_lw, [dstin]
+ strb B_lw, [dstin, tmp1]
+ strb C_lw, [dstend, -1]
+L(copy0):
+ ret
+
+ .p2align 4
+ /* Medium copies: 33..128 bytes. */
+L(copy32_128):
+ ldp A_l, A_h, [src]
+ ldp B_l, B_h, [src, 16]
+ ldp C_l, C_h, [srcend, -32]
+ ldp D_l, D_h, [srcend, -16]
+ cmp count, 64
+ b.hi L(copy128)
+ stp A_l, A_h, [dstin]
+ stp B_l, B_h, [dstin, 16]
+ stp C_l, C_h, [dstend, -32]
+ stp D_l, D_h, [dstend, -16]
ret
+
+ .p2align 4
+ /* Copy 65..128 bytes. */
+L(copy128):
+ ldp E_l, E_h, [src, 32]
+ ldp F_l, F_h, [src, 48]
+ cmp count, 96
+ b.ls L(copy96)
+ ldp G_l, G_h, [srcend, -64]
+ ldp H_l, H_h, [srcend, -48]
+ stp G_l, G_h, [dstend, -64]
+ stp H_l, H_h, [dstend, -48]
+L(copy96):
+ stp A_l, A_h, [dstin]
+ stp B_l, B_h, [dstin, 16]
+ stp E_l, E_h, [dstin, 32]
+ stp F_l, F_h, [dstin, 48]
+ stp C_l, C_h, [dstend, -32]
+ stp D_l, D_h, [dstend, -16]
+ ret
+
+ .p2align 4
+ /* Copy more than 128 bytes. */
+L(copy_long):
+ /* Use backwards copy if there is an overlap. */
+ sub tmp1, dstin, src
+ cbz tmp1, L(copy0)
+ cmp tmp1, count
+ b.lo L(copy_long_backwards)
+
+ /* Copy 16 bytes and then align dst to 16-byte alignment. */
+
+ ldp D_l, D_h, [src]
+ and tmp1, dstin, 15
+ bic dst, dstin, 15
+ sub src, src, tmp1
+ add count, count, tmp1 /* Count is now 16 too large. */
+ ldp A_l, A_h, [src, 16]
+ stp D_l, D_h, [dstin]
+ ldp B_l, B_h, [src, 32]
+ ldp C_l, C_h, [src, 48]
+ ldp D_l, D_h, [src, 64]!
+ subs count, count, 128 + 16 /* Test and readjust count. */
+ b.ls L(copy64_from_end)
+
+L(loop64):
+ stp A_l, A_h, [dst, 16]
+ ldp A_l, A_h, [src, 16]
+ stp B_l, B_h, [dst, 32]
+ ldp B_l, B_h, [src, 32]
+ stp C_l, C_h, [dst, 48]
+ ldp C_l, C_h, [src, 48]
+ stp D_l, D_h, [dst, 64]!
+ ldp D_l, D_h, [src, 64]!
+ subs count, count, 64
+ b.hi L(loop64)
+
+ /* Write the last iteration and copy 64 bytes from the end. */
+L(copy64_from_end):
+ ldp E_l, E_h, [srcend, -64]
+ stp A_l, A_h, [dst, 16]
+ ldp A_l, A_h, [srcend, -48]
+ stp B_l, B_h, [dst, 32]
+ ldp B_l, B_h, [srcend, -32]
+ stp C_l, C_h, [dst, 48]
+ ldp C_l, C_h, [srcend, -16]
+ stp D_l, D_h, [dst, 64]
+ stp E_l, E_h, [dstend, -64]
+ stp A_l, A_h, [dstend, -48]
+ stp B_l, B_h, [dstend, -32]
+ stp C_l, C_h, [dstend, -16]
+ ret
+
+ .p2align 4
+
+ /* Large backwards copy for overlapping copies.
+ Copy 16 bytes and then align dst to 16-byte alignment. */
+L(copy_long_backwards):
+ ldp D_l, D_h, [srcend, -16]
+ and tmp1, dstend, 15
+ sub srcend, srcend, tmp1
+ sub count, count, tmp1
+ ldp A_l, A_h, [srcend, -16]
+ stp D_l, D_h, [dstend, -16]
+ ldp B_l, B_h, [srcend, -32]
+ ldp C_l, C_h, [srcend, -48]
+ ldp D_l, D_h, [srcend, -64]!
+ sub dstend, dstend, tmp1
+ subs count, count, 128
+ b.ls L(copy64_from_start)
+
+L(loop64_backwards):
+ stp A_l, A_h, [dstend, -16]
+ ldp A_l, A_h, [srcend, -16]
+ stp B_l, B_h, [dstend, -32]
+ ldp B_l, B_h, [srcend, -32]
+ stp C_l, C_h, [dstend, -48]
+ ldp C_l, C_h, [srcend, -48]
+ stp D_l, D_h, [dstend, -64]!
+ ldp D_l, D_h, [srcend, -64]!
+ subs count, count, 64
+ b.hi L(loop64_backwards)
+
+ /* Write the last iteration and copy 64 bytes from the start. */
+L(copy64_from_start):
+ ldp G_l, G_h, [src, 48]
+ stp A_l, A_h, [dstend, -16]
+ ldp A_l, A_h, [src, 32]
+ stp B_l, B_h, [dstend, -32]
+ ldp B_l, B_h, [src, 16]
+ stp C_l, C_h, [dstend, -48]
+ ldp C_l, C_h, [src]
+ stp D_l, D_h, [dstend, -64]
+ stp G_l, G_h, [dstin, 48]
+ stp A_l, A_h, [dstin, 32]
+ stp B_l, B_h, [dstin, 16]
+ stp C_l, C_h, [dstin]
+ ret
+
SYM_FUNC_END_PI(memcpy)
EXPORT_SYMBOL(memcpy)
SYM_FUNC_END_ALIAS(__memcpy)
EXPORT_SYMBOL(__memcpy)
+SYM_FUNC_END_ALIAS_PI(memmove)
+EXPORT_SYMBOL(memmove)
+SYM_FUNC_END_ALIAS(__memmove)
+EXPORT_SYMBOL(__memmove)
diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S
deleted file mode 100644
index 1035dce4bdaf..000000000000
--- a/arch/arm64/lib/memmove.S
+++ /dev/null
@@ -1,189 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
- *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/cache.h>
-
-/*
- * Move a buffer from src to test (alignment handled by the hardware).
- * If dest <= src, call memcpy, otherwise copy in reverse order.
- *
- * Parameters:
- * x0 - dest
- * x1 - src
- * x2 - n
- * Returns:
- * x0 - dest
- */
-dstin .req x0
-src .req x1
-count .req x2
-tmp1 .req x3
-tmp1w .req w3
-tmp2 .req x4
-tmp2w .req w4
-tmp3 .req x5
-tmp3w .req w5
-dst .req x6
-
-A_l .req x7
-A_h .req x8
-B_l .req x9
-B_h .req x10
-C_l .req x11
-C_h .req x12
-D_l .req x13
-D_h .req x14
-
-SYM_FUNC_START_ALIAS(__memmove)
-SYM_FUNC_START_WEAK_PI(memmove)
- cmp dstin, src
- b.lo __memcpy
- add tmp1, src, count
- cmp dstin, tmp1
- b.hs __memcpy /* No overlap. */
-
- add dst, dstin, count
- add src, src, count
- cmp count, #16
- b.lo .Ltail15 /*probably non-alignment accesses.*/
-
- ands tmp2, src, #15 /* Bytes to reach alignment. */
- b.eq .LSrcAligned
- sub count, count, tmp2
- /*
- * process the aligned offset length to make the src aligned firstly.
- * those extra instructions' cost is acceptable. It also make the
- * coming accesses are based on aligned address.
- */
- tbz tmp2, #0, 1f
- ldrb tmp1w, [src, #-1]!
- strb tmp1w, [dst, #-1]!
-1:
- tbz tmp2, #1, 2f
- ldrh tmp1w, [src, #-2]!
- strh tmp1w, [dst, #-2]!
-2:
- tbz tmp2, #2, 3f
- ldr tmp1w, [src, #-4]!
- str tmp1w, [dst, #-4]!
-3:
- tbz tmp2, #3, .LSrcAligned
- ldr tmp1, [src, #-8]!
- str tmp1, [dst, #-8]!
-
-.LSrcAligned:
- cmp count, #64
- b.ge .Lcpy_over64
-
- /*
- * Deal with small copies quickly by dropping straight into the
- * exit block.
- */
-.Ltail63:
- /*
- * Copy up to 48 bytes of data. At this point we only need the
- * bottom 6 bits of count to be accurate.
- */
- ands tmp1, count, #0x30
- b.eq .Ltail15
- cmp tmp1w, #0x20
- b.eq 1f
- b.lt 2f
- ldp A_l, A_h, [src, #-16]!
- stp A_l, A_h, [dst, #-16]!
-1:
- ldp A_l, A_h, [src, #-16]!
- stp A_l, A_h, [dst, #-16]!
-2:
- ldp A_l, A_h, [src, #-16]!
- stp A_l, A_h, [dst, #-16]!
-
-.Ltail15:
- tbz count, #3, 1f
- ldr tmp1, [src, #-8]!
- str tmp1, [dst, #-8]!
-1:
- tbz count, #2, 2f
- ldr tmp1w, [src, #-4]!
- str tmp1w, [dst, #-4]!
-2:
- tbz count, #1, 3f
- ldrh tmp1w, [src, #-2]!
- strh tmp1w, [dst, #-2]!
-3:
- tbz count, #0, .Lexitfunc
- ldrb tmp1w, [src, #-1]
- strb tmp1w, [dst, #-1]
-
-.Lexitfunc:
- ret
-
-.Lcpy_over64:
- subs count, count, #128
- b.ge .Lcpy_body_large
- /*
- * Less than 128 bytes to copy, so handle 64 bytes here and then jump
- * to the tail.
- */
- ldp A_l, A_h, [src, #-16]
- stp A_l, A_h, [dst, #-16]
- ldp B_l, B_h, [src, #-32]
- ldp C_l, C_h, [src, #-48]
- stp B_l, B_h, [dst, #-32]
- stp C_l, C_h, [dst, #-48]
- ldp D_l, D_h, [src, #-64]!
- stp D_l, D_h, [dst, #-64]!
-
- tst count, #0x3f
- b.ne .Ltail63
- ret
-
- /*
- * Critical loop. Start at a new cache line boundary. Assuming
- * 64 bytes per line this ensures the entire loop is in one line.
- */
- .p2align L1_CACHE_SHIFT
-.Lcpy_body_large:
- /* pre-load 64 bytes data. */
- ldp A_l, A_h, [src, #-16]
- ldp B_l, B_h, [src, #-32]
- ldp C_l, C_h, [src, #-48]
- ldp D_l, D_h, [src, #-64]!
-1:
- /*
- * interlace the load of next 64 bytes data block with store of the last
- * loaded 64 bytes data.
- */
- stp A_l, A_h, [dst, #-16]
- ldp A_l, A_h, [src, #-16]
- stp B_l, B_h, [dst, #-32]
- ldp B_l, B_h, [src, #-32]
- stp C_l, C_h, [dst, #-48]
- ldp C_l, C_h, [src, #-48]
- stp D_l, D_h, [dst, #-64]!
- ldp D_l, D_h, [src, #-64]!
- subs count, count, #64
- b.ge 1b
- stp A_l, A_h, [dst, #-16]
- stp B_l, B_h, [dst, #-32]
- stp C_l, C_h, [dst, #-48]
- stp D_l, D_h, [dst, #-64]!
-
- tst count, #0x3f
- b.ne .Ltail63
- ret
-SYM_FUNC_END_PI(memmove)
-EXPORT_SYMBOL(memmove)
-SYM_FUNC_END_ALIAS(__memmove)
-EXPORT_SYMBOL(__memmove)
diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
index 351537c12f36..e83643b3995f 100644
--- a/arch/arm64/lib/mte.S
+++ b/arch/arm64/lib/mte.S
@@ -37,6 +37,26 @@ SYM_FUNC_START(mte_clear_page_tags)
SYM_FUNC_END(mte_clear_page_tags)
/*
+ * Zero the page and tags at the same time
+ *
+ * Parameters:
+ * x0 - address to the beginning of the page
+ */
+SYM_FUNC_START(mte_zero_clear_page_tags)
+ mrs x1, dczid_el0
+ and w1, w1, #0xf
+ mov x2, #4
+ lsl x1, x2, x1
+ and x0, x0, #(1 << MTE_TAG_SHIFT) - 1 // clear the tag
+
+1: dc gzva, x0
+ add x0, x0, x1
+ tst x0, #(PAGE_SIZE - 1)
+ b.ne 1b
+ ret
+SYM_FUNC_END(mte_zero_clear_page_tags)
+
+/*
* Copy the tags from the source page to the destination one
* x0 - address of the destination page
* x1 - address of the source page
diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
index 4e79566726c8..d7bee210a798 100644
--- a/arch/arm64/lib/strcmp.S
+++ b/arch/arm64/lib/strcmp.S
@@ -1,84 +1,123 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
+ * Copyright (c) 2012-2021, Arm Limited.
*
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
+ * Adapted from the original at:
+ * https://github.com/ARM-software/optimized-routines/blob/afd6244a1f8d9229/string/aarch64/strcmp.S
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
-/*
- * compare two strings
+/* Assumptions:
*
- * Parameters:
- * x0 - const string 1 pointer
- * x1 - const string 2 pointer
- * Returns:
- * x0 - an integer less than, equal to, or greater than zero
- * if s1 is found, respectively, to be less than, to match,
- * or be greater than s2.
+ * ARMv8-a, AArch64
*/
+#define L(label) .L ## label
+
#define REP8_01 0x0101010101010101
#define REP8_7f 0x7f7f7f7f7f7f7f7f
#define REP8_80 0x8080808080808080
/* Parameters and result. */
-src1 .req x0
-src2 .req x1
-result .req x0
+#define src1 x0
+#define src2 x1
+#define result x0
/* Internal variables. */
-data1 .req x2
-data1w .req w2
-data2 .req x3
-data2w .req w3
-has_nul .req x4
-diff .req x5
-syndrome .req x6
-tmp1 .req x7
-tmp2 .req x8
-tmp3 .req x9
-zeroones .req x10
-pos .req x11
-
+#define data1 x2
+#define data1w w2
+#define data2 x3
+#define data2w w3
+#define has_nul x4
+#define diff x5
+#define syndrome x6
+#define tmp1 x7
+#define tmp2 x8
+#define tmp3 x9
+#define zeroones x10
+#define pos x11
+
+ /* Start of performance-critical section -- one 64B cache line. */
+ .align 6
SYM_FUNC_START_WEAK_PI(strcmp)
eor tmp1, src1, src2
mov zeroones, #REP8_01
tst tmp1, #7
- b.ne .Lmisaligned8
+ b.ne L(misaligned8)
ands tmp1, src1, #7
- b.ne .Lmutual_align
-
- /*
- * NUL detection works on the principle that (X - 1) & (~X) & 0x80
- * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
- * can be done in parallel across the entire word.
- */
-.Lloop_aligned:
+ b.ne L(mutual_align)
+ /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+ (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+ can be done in parallel across the entire word. */
+L(loop_aligned):
ldr data1, [src1], #8
ldr data2, [src2], #8
-.Lstart_realigned:
+L(start_realigned):
sub tmp1, data1, zeroones
orr tmp2, data1, #REP8_7f
eor diff, data1, data2 /* Non-zero if differences found. */
bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
orr syndrome, diff, has_nul
- cbz syndrome, .Lloop_aligned
- b .Lcal_cmpresult
+ cbz syndrome, L(loop_aligned)
+ /* End of performance-critical section -- one 64B cache line. */
+
+L(end):
+#ifndef __AARCH64EB__
+ rev syndrome, syndrome
+ rev data1, data1
+ /* The MS-non-zero bit of the syndrome marks either the first bit
+ that is different, or the top bit of the first zero byte.
+ Shifting left now will bring the critical information into the
+ top bits. */
+ clz pos, syndrome
+ rev data2, data2
+ lsl data1, data1, pos
+ lsl data2, data2, pos
+ /* But we need to zero-extend (char is unsigned) the value and then
+ perform a signed 32-bit subtraction. */
+ lsr data1, data1, #56
+ sub result, data1, data2, lsr #56
+ ret
+#else
+ /* For big-endian we cannot use the trick with the syndrome value
+ as carry-propagation can corrupt the upper bits if the trailing
+ bytes in the string contain 0x01. */
+ /* However, if there is no NUL byte in the dword, we can generate
+ the result directly. We can't just subtract the bytes as the
+ MSB might be significant. */
+ cbnz has_nul, 1f
+ cmp data1, data2
+ cset result, ne
+ cneg result, result, lo
+ ret
+1:
+ /* Re-compute the NUL-byte detection, using a byte-reversed value. */
+ rev tmp3, data1
+ sub tmp1, tmp3, zeroones
+ orr tmp2, tmp3, #REP8_7f
+ bic has_nul, tmp1, tmp2
+ rev has_nul, has_nul
+ orr syndrome, diff, has_nul
+ clz pos, syndrome
+ /* The MS-non-zero bit of the syndrome marks either the first bit
+ that is different, or the top bit of the first zero byte.
+ Shifting left now will bring the critical information into the
+ top bits. */
+ lsl data1, data1, pos
+ lsl data2, data2, pos
+ /* But we need to zero-extend (char is unsigned) the value and then
+ perform a signed 32-bit subtraction. */
+ lsr data1, data1, #56
+ sub result, data1, data2, lsr #56
+ ret
+#endif
-.Lmutual_align:
- /*
- * Sources are mutually aligned, but are not currently at an
- * alignment boundary. Round down the addresses and then mask off
- * the bytes that preceed the start point.
- */
+L(mutual_align):
+ /* Sources are mutually aligned, but are not currently at an
+ alignment boundary. Round down the addresses and then mask off
+ the bytes that preceed the start point. */
bic src1, src1, #7
bic src2, src2, #7
lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
@@ -86,138 +125,52 @@ SYM_FUNC_START_WEAK_PI(strcmp)
neg tmp1, tmp1 /* Bits to alignment -64. */
ldr data2, [src2], #8
mov tmp2, #~0
+#ifdef __AARCH64EB__
/* Big-endian. Early bytes are at MSB. */
-CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
+ lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
+#else
/* Little-endian. Early bytes are at LSB. */
-CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
-
+ lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
+#endif
orr data1, data1, tmp2
orr data2, data2, tmp2
- b .Lstart_realigned
-
-.Lmisaligned8:
- /*
- * Get the align offset length to compare per byte first.
- * After this process, one string's address will be aligned.
- */
- and tmp1, src1, #7
- neg tmp1, tmp1
- add tmp1, tmp1, #8
- and tmp2, src2, #7
- neg tmp2, tmp2
- add tmp2, tmp2, #8
- subs tmp3, tmp1, tmp2
- csel pos, tmp1, tmp2, hi /*Choose the maximum. */
-.Ltinycmp:
+ b L(start_realigned)
+
+L(misaligned8):
+ /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
+ checking to make sure that we don't access beyond page boundary in
+ SRC2. */
+ tst src1, #7
+ b.eq L(loop_misaligned)
+L(do_misaligned):
ldrb data1w, [src1], #1
ldrb data2w, [src2], #1
- subs pos, pos, #1
- ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */
- ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
- b.eq .Ltinycmp
- cbnz pos, 1f /*find the null or unequal...*/
cmp data1w, #1
- ccmp data1w, data2w, #0, cs
- b.eq .Lstart_align /*the last bytes are equal....*/
-1:
- sub result, data1, data2
- ret
-
-.Lstart_align:
- ands xzr, src1, #7
- b.eq .Lrecal_offset
- /*process more leading bytes to make str1 aligned...*/
- add src1, src1, tmp3
- add src2, src2, tmp3
- /*load 8 bytes from aligned str1 and non-aligned str2..*/
+ ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
+ b.ne L(done)
+ tst src1, #7
+ b.ne L(do_misaligned)
+
+L(loop_misaligned):
+ /* Test if we are within the last dword of the end of a 4K page. If
+ yes then jump back to the misaligned loop to copy a byte at a time. */
+ and tmp1, src2, #0xff8
+ eor tmp1, tmp1, #0xff8
+ cbz tmp1, L(do_misaligned)
ldr data1, [src1], #8
ldr data2, [src2], #8
sub tmp1, data1, zeroones
orr tmp2, data1, #REP8_7f
- bic has_nul, tmp1, tmp2
- eor diff, data1, data2 /* Non-zero if differences found. */
- orr syndrome, diff, has_nul
- cbnz syndrome, .Lcal_cmpresult
- /*How far is the current str2 from the alignment boundary...*/
- and tmp3, tmp3, #7
-.Lrecal_offset:
- neg pos, tmp3
-.Lloopcmp_proc:
- /*
- * Divide the eight bytes into two parts. First,backwards the src2
- * to an alignment boundary,load eight bytes from the SRC2 alignment
- * boundary,then compare with the relative bytes from SRC1.
- * If all 8 bytes are equal,then start the second part's comparison.
- * Otherwise finish the comparison.
- * This special handle can garantee all the accesses are in the
- * thread/task space in avoid to overrange access.
- */
- ldr data1, [src1,pos]
- ldr data2, [src2,pos]
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- bic has_nul, tmp1, tmp2
- eor diff, data1, data2 /* Non-zero if differences found. */
- orr syndrome, diff, has_nul
- cbnz syndrome, .Lcal_cmpresult
-
- /*The second part process*/
- ldr data1, [src1], #8
- ldr data2, [src2], #8
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- bic has_nul, tmp1, tmp2
- eor diff, data1, data2 /* Non-zero if differences found. */
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
orr syndrome, diff, has_nul
- cbz syndrome, .Lloopcmp_proc
+ cbz syndrome, L(loop_misaligned)
+ b L(end)
-.Lcal_cmpresult:
- /*
- * reversed the byte-order as big-endian,then CLZ can find the most
- * significant zero bits.
- */
-CPU_LE( rev syndrome, syndrome )
-CPU_LE( rev data1, data1 )
-CPU_LE( rev data2, data2 )
-
- /*
- * For big-endian we cannot use the trick with the syndrome value
- * as carry-propagation can corrupt the upper bits if the trailing
- * bytes in the string contain 0x01.
- * However, if there is no NUL byte in the dword, we can generate
- * the result directly. We cannot just subtract the bytes as the
- * MSB might be significant.
- */
-CPU_BE( cbnz has_nul, 1f )
-CPU_BE( cmp data1, data2 )
-CPU_BE( cset result, ne )
-CPU_BE( cneg result, result, lo )
-CPU_BE( ret )
-CPU_BE( 1: )
- /*Re-compute the NUL-byte detection, using a byte-reversed value. */
-CPU_BE( rev tmp3, data1 )
-CPU_BE( sub tmp1, tmp3, zeroones )
-CPU_BE( orr tmp2, tmp3, #REP8_7f )
-CPU_BE( bic has_nul, tmp1, tmp2 )
-CPU_BE( rev has_nul, has_nul )
-CPU_BE( orr syndrome, diff, has_nul )
-
- clz pos, syndrome
- /*
- * The MS-non-zero bit of the syndrome marks either the first bit
- * that is different, or the top bit of the first zero byte.
- * Shifting left now will bring the critical information into the
- * top bits.
- */
- lsl data1, data1, pos
- lsl data2, data2, pos
- /*
- * But we need to zero-extend (char is unsigned) the value and then
- * perform a signed 32-bit subtraction.
- */
- lsr data1, data1, #56
- sub result, data1, data2, lsr #56
+L(done):
+ sub result, data1, data2
ret
+
SYM_FUNC_END_PI(strcmp)
EXPORT_SYMBOL_NOKASAN(strcmp)
diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S
index ee3ed882dd79..35fbdb7d6e1a 100644
--- a/arch/arm64/lib/strlen.S
+++ b/arch/arm64/lib/strlen.S
@@ -1,115 +1,203 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
+ * Copyright (c) 2013-2021, Arm Limited.
*
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
+ * Adapted from the original at:
+ * https://github.com/ARM-software/optimized-routines/blob/98e4d6a5c13c8e54/string/aarch64/strlen.S
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
-/*
- * calculate the length of a string
+/* Assumptions:
*
- * Parameters:
- * x0 - const string pointer
- * Returns:
- * x0 - the return length of specific string
+ * ARMv8-a, AArch64, unaligned accesses, min page size 4k.
*/
+#define L(label) .L ## label
+
/* Arguments and results. */
-srcin .req x0
-len .req x0
+#define srcin x0
+#define len x0
/* Locals and temporaries. */
-src .req x1
-data1 .req x2
-data2 .req x3
-data2a .req x4
-has_nul1 .req x5
-has_nul2 .req x6
-tmp1 .req x7
-tmp2 .req x8
-tmp3 .req x9
-tmp4 .req x10
-zeroones .req x11
-pos .req x12
+#define src x1
+#define data1 x2
+#define data2 x3
+#define has_nul1 x4
+#define has_nul2 x5
+#define tmp1 x4
+#define tmp2 x5
+#define tmp3 x6
+#define tmp4 x7
+#define zeroones x8
+
+ /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+ (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+ can be done in parallel across the entire word. A faster check
+ (X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives
+ false hits for characters 129..255. */
#define REP8_01 0x0101010101010101
#define REP8_7f 0x7f7f7f7f7f7f7f7f
#define REP8_80 0x8080808080808080
+#define MIN_PAGE_SIZE 4096
+
+ /* Since strings are short on average, we check the first 16 bytes
+ of the string for a NUL character. In order to do an unaligned ldp
+ safely we have to do a page cross check first. If there is a NUL
+ byte we calculate the length from the 2 8-byte words using
+ conditional select to reduce branch mispredictions (it is unlikely
+ strlen will be repeatedly called on strings with the same length).
+
+ If the string is longer than 16 bytes, we align src so don't need
+ further page cross checks, and process 32 bytes per iteration
+ using the fast NUL check. If we encounter non-ASCII characters,
+ fallback to a second loop using the full NUL check.
+
+ If the page cross check fails, we read 16 bytes from an aligned
+ address, remove any characters before the string, and continue
+ in the main loop using aligned loads. Since strings crossing a
+ page in the first 16 bytes are rare (probability of
+ 16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized.
+
+ AArch64 systems have a minimum page size of 4k. We don't bother
+ checking for larger page sizes - the cost of setting up the correct
+ page size is just not worth the extra gain from a small reduction in
+ the cases taking the slow path. Note that we only care about
+ whether the first fetch, which may be misaligned, crosses a page
+ boundary. */
+
SYM_FUNC_START_WEAK_PI(strlen)
- mov zeroones, #REP8_01
- bic src, srcin, #15
- ands tmp1, srcin, #15
- b.ne .Lmisaligned
- /*
- * NUL detection works on the principle that (X - 1) & (~X) & 0x80
- * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
- * can be done in parallel across the entire word.
- */
- /*
- * The inner loop deals with two Dwords at a time. This has a
- * slightly higher start-up cost, but we should win quite quickly,
- * especially on cores with a high number of issue slots per
- * cycle, as we get much better parallelism out of the operations.
- */
-.Lloop:
- ldp data1, data2, [src], #16
-.Lrealigned:
+ and tmp1, srcin, MIN_PAGE_SIZE - 1
+ mov zeroones, REP8_01
+ cmp tmp1, MIN_PAGE_SIZE - 16
+ b.gt L(page_cross)
+ ldp data1, data2, [srcin]
+#ifdef __AARCH64EB__
+ /* For big-endian, carry propagation (if the final byte in the
+ string is 0x01) means we cannot use has_nul1/2 directly.
+ Since we expect strings to be small and early-exit,
+ byte-swap the data now so has_null1/2 will be correct. */
+ rev data1, data1
+ rev data2, data2
+#endif
sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
+ orr tmp2, data1, REP8_7f
sub tmp3, data2, zeroones
- orr tmp4, data2, #REP8_7f
- bic has_nul1, tmp1, tmp2
- bics has_nul2, tmp3, tmp4
- ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
- b.eq .Lloop
+ orr tmp4, data2, REP8_7f
+ bics has_nul1, tmp1, tmp2
+ bic has_nul2, tmp3, tmp4
+ ccmp has_nul2, 0, 0, eq
+ beq L(main_loop_entry)
+
+ /* Enter with C = has_nul1 == 0. */
+ csel has_nul1, has_nul1, has_nul2, cc
+ mov len, 8
+ rev has_nul1, has_nul1
+ clz tmp1, has_nul1
+ csel len, xzr, len, cc
+ add len, len, tmp1, lsr 3
+ ret
+ /* The inner loop processes 32 bytes per iteration and uses the fast
+ NUL check. If we encounter non-ASCII characters, use a second
+ loop with the accurate NUL check. */
+ .p2align 4
+L(main_loop_entry):
+ bic src, srcin, 15
+ sub src, src, 16
+L(main_loop):
+ ldp data1, data2, [src, 32]!
+L(page_cross_entry):
+ sub tmp1, data1, zeroones
+ sub tmp3, data2, zeroones
+ orr tmp2, tmp1, tmp3
+ tst tmp2, zeroones, lsl 7
+ bne 1f
+ ldp data1, data2, [src, 16]
+ sub tmp1, data1, zeroones
+ sub tmp3, data2, zeroones
+ orr tmp2, tmp1, tmp3
+ tst tmp2, zeroones, lsl 7
+ beq L(main_loop)
+ add src, src, 16
+1:
+ /* The fast check failed, so do the slower, accurate NUL check. */
+ orr tmp2, data1, REP8_7f
+ orr tmp4, data2, REP8_7f
+ bics has_nul1, tmp1, tmp2
+ bic has_nul2, tmp3, tmp4
+ ccmp has_nul2, 0, 0, eq
+ beq L(nonascii_loop)
+
+ /* Enter with C = has_nul1 == 0. */
+L(tail):
+#ifdef __AARCH64EB__
+ /* For big-endian, carry propagation (if the final byte in the
+ string is 0x01) means we cannot use has_nul1/2 directly. The
+ easiest way to get the correct byte is to byte-swap the data
+ and calculate the syndrome a second time. */
+ csel data1, data1, data2, cc
+ rev data1, data1
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, REP8_7f
+ bic has_nul1, tmp1, tmp2
+#else
+ csel has_nul1, has_nul1, has_nul2, cc
+#endif
sub len, src, srcin
- cbz has_nul1, .Lnul_in_data2
-CPU_BE( mov data2, data1 ) /*prepare data to re-calculate the syndrome*/
- sub len, len, #8
- mov has_nul2, has_nul1
-.Lnul_in_data2:
- /*
- * For big-endian, carry propagation (if the final byte in the
- * string is 0x01) means we cannot use has_nul directly. The
- * easiest way to get the correct byte is to byte-swap the data
- * and calculate the syndrome a second time.
- */
-CPU_BE( rev data2, data2 )
-CPU_BE( sub tmp1, data2, zeroones )
-CPU_BE( orr tmp2, data2, #REP8_7f )
-CPU_BE( bic has_nul2, tmp1, tmp2 )
-
- sub len, len, #8
- rev has_nul2, has_nul2
- clz pos, has_nul2
- add len, len, pos, lsr #3 /* Bits to bytes. */
+ rev has_nul1, has_nul1
+ add tmp2, len, 8
+ clz tmp1, has_nul1
+ csel len, len, tmp2, cc
+ add len, len, tmp1, lsr 3
ret
-.Lmisaligned:
- cmp tmp1, #8
- neg tmp1, tmp1
- ldp data1, data2, [src], #16
- lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
- mov tmp2, #~0
- /* Big-endian. Early bytes are at MSB. */
-CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
+L(nonascii_loop):
+ ldp data1, data2, [src, 16]!
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, REP8_7f
+ sub tmp3, data2, zeroones
+ orr tmp4, data2, REP8_7f
+ bics has_nul1, tmp1, tmp2
+ bic has_nul2, tmp3, tmp4
+ ccmp has_nul2, 0, 0, eq
+ bne L(tail)
+ ldp data1, data2, [src, 16]!
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, REP8_7f
+ sub tmp3, data2, zeroones
+ orr tmp4, data2, REP8_7f
+ bics has_nul1, tmp1, tmp2
+ bic has_nul2, tmp3, tmp4
+ ccmp has_nul2, 0, 0, eq
+ beq L(nonascii_loop)
+ b L(tail)
+
+ /* Load 16 bytes from [srcin & ~15] and force the bytes that precede
+ srcin to 0x7f, so we ignore any NUL bytes before the string.
+ Then continue in the aligned loop. */
+L(page_cross):
+ bic src, srcin, 15
+ ldp data1, data2, [src]
+ lsl tmp1, srcin, 3
+ mov tmp4, -1
+#ifdef __AARCH64EB__
+ /* Big-endian. Early bytes are at MSB. */
+ lsr tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */
+#else
/* Little-endian. Early bytes are at LSB. */
-CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
+ lsl tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */
+#endif
+ orr tmp1, tmp1, REP8_80
+ orn data1, data1, tmp1
+ orn tmp2, data2, tmp1
+ tst srcin, 8
+ csel data1, data1, tmp4, eq
+ csel data2, data2, tmp2, eq
+ b L(page_cross_entry)
- orr data1, data1, tmp2
- orr data2a, data2, tmp2
- csinv data1, data1, xzr, le
- csel data2, data2, data2a, le
- b .Lrealigned
SYM_FUNC_END_PI(strlen)
EXPORT_SYMBOL_NOKASAN(strlen)
diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
index 2a7ee949ed47..48d44f7fddb1 100644
--- a/arch/arm64/lib/strncmp.S
+++ b/arch/arm64/lib/strncmp.S
@@ -1,299 +1,261 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
+ * Copyright (c) 2013-2021, Arm Limited.
*
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
+ * Adapted from the original at:
+ * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/strncmp.S
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
-/*
- * compare two strings
+/* Assumptions:
*
- * Parameters:
- * x0 - const string 1 pointer
- * x1 - const string 2 pointer
- * x2 - the maximal length to be compared
- * Returns:
- * x0 - an integer less than, equal to, or greater than zero if s1 is found,
- * respectively, to be less than, to match, or be greater than s2.
+ * ARMv8-a, AArch64
*/
+#define L(label) .L ## label
+
#define REP8_01 0x0101010101010101
#define REP8_7f 0x7f7f7f7f7f7f7f7f
#define REP8_80 0x8080808080808080
/* Parameters and result. */
-src1 .req x0
-src2 .req x1
-limit .req x2
-result .req x0
+#define src1 x0
+#define src2 x1
+#define limit x2
+#define result x0
/* Internal variables. */
-data1 .req x3
-data1w .req w3
-data2 .req x4
-data2w .req w4
-has_nul .req x5
-diff .req x6
-syndrome .req x7
-tmp1 .req x8
-tmp2 .req x9
-tmp3 .req x10
-zeroones .req x11
-pos .req x12
-limit_wd .req x13
-mask .req x14
-endloop .req x15
+#define data1 x3
+#define data1w w3
+#define data2 x4
+#define data2w w4
+#define has_nul x5
+#define diff x6
+#define syndrome x7
+#define tmp1 x8
+#define tmp2 x9
+#define tmp3 x10
+#define zeroones x11
+#define pos x12
+#define limit_wd x13
+#define mask x14
+#define endloop x15
+#define count mask
SYM_FUNC_START_WEAK_PI(strncmp)
- cbz limit, .Lret0
+ cbz limit, L(ret0)
eor tmp1, src1, src2
mov zeroones, #REP8_01
tst tmp1, #7
- b.ne .Lmisaligned8
- ands tmp1, src1, #7
- b.ne .Lmutual_align
+ and count, src1, #7
+ b.ne L(misaligned8)
+ cbnz count, L(mutual_align)
/* Calculate the number of full and partial words -1. */
- /*
- * when limit is mulitply of 8, if not sub 1,
- * the judgement of last dword will wrong.
- */
- sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
- lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
+ sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
+ lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
- /*
- * NUL detection works on the principle that (X - 1) & (~X) & 0x80
- * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
- * can be done in parallel across the entire word.
- */
-.Lloop_aligned:
+ /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+ (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+ can be done in parallel across the entire word. */
+ .p2align 4
+L(loop_aligned):
ldr data1, [src1], #8
ldr data2, [src2], #8
-.Lstart_realigned:
+L(start_realigned):
subs limit_wd, limit_wd, #1
sub tmp1, data1, zeroones
orr tmp2, data1, #REP8_7f
- eor diff, data1, data2 /* Non-zero if differences found. */
- csinv endloop, diff, xzr, pl /* Last Dword or differences.*/
- bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ csinv endloop, diff, xzr, pl /* Last Dword or differences. */
+ bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
ccmp endloop, #0, #0, eq
- b.eq .Lloop_aligned
+ b.eq L(loop_aligned)
+ /* End of main loop */
- /*Not reached the limit, must have found the end or a diff. */
- tbz limit_wd, #63, .Lnot_limit
+ /* Not reached the limit, must have found the end or a diff. */
+ tbz limit_wd, #63, L(not_limit)
/* Limit % 8 == 0 => all bytes significant. */
ands limit, limit, #7
- b.eq .Lnot_limit
+ b.eq L(not_limit)
- lsl limit, limit, #3 /* Bits -> bytes. */
+ lsl limit, limit, #3 /* Bits -> bytes. */
mov mask, #~0
-CPU_BE( lsr mask, mask, limit )
-CPU_LE( lsl mask, mask, limit )
+#ifdef __AARCH64EB__
+ lsr mask, mask, limit
+#else
+ lsl mask, mask, limit
+#endif
bic data1, data1, mask
bic data2, data2, mask
/* Make sure that the NUL byte is marked in the syndrome. */
orr has_nul, has_nul, mask
-.Lnot_limit:
+L(not_limit):
orr syndrome, diff, has_nul
- b .Lcal_cmpresult
-.Lmutual_align:
- /*
- * Sources are mutually aligned, but are not currently at an
- * alignment boundary. Round down the addresses and then mask off
- * the bytes that precede the start point.
- * We also need to adjust the limit calculations, but without
- * overflowing if the limit is near ULONG_MAX.
- */
+#ifndef __AARCH64EB__
+ rev syndrome, syndrome
+ rev data1, data1
+ /* The MS-non-zero bit of the syndrome marks either the first bit
+ that is different, or the top bit of the first zero byte.
+ Shifting left now will bring the critical information into the
+ top bits. */
+ clz pos, syndrome
+ rev data2, data2
+ lsl data1, data1, pos
+ lsl data2, data2, pos
+ /* But we need to zero-extend (char is unsigned) the value and then
+ perform a signed 32-bit subtraction. */
+ lsr data1, data1, #56
+ sub result, data1, data2, lsr #56
+ ret
+#else
+ /* For big-endian we cannot use the trick with the syndrome value
+ as carry-propagation can corrupt the upper bits if the trailing
+ bytes in the string contain 0x01. */
+ /* However, if there is no NUL byte in the dword, we can generate
+ the result directly. We can't just subtract the bytes as the
+ MSB might be significant. */
+ cbnz has_nul, 1f
+ cmp data1, data2
+ cset result, ne
+ cneg result, result, lo
+ ret
+1:
+ /* Re-compute the NUL-byte detection, using a byte-reversed value. */
+ rev tmp3, data1
+ sub tmp1, tmp3, zeroones
+ orr tmp2, tmp3, #REP8_7f
+ bic has_nul, tmp1, tmp2
+ rev has_nul, has_nul
+ orr syndrome, diff, has_nul
+ clz pos, syndrome
+ /* The MS-non-zero bit of the syndrome marks either the first bit
+ that is different, or the top bit of the first zero byte.
+ Shifting left now will bring the critical information into the
+ top bits. */
+ lsl data1, data1, pos
+ lsl data2, data2, pos
+ /* But we need to zero-extend (char is unsigned) the value and then
+ perform a signed 32-bit subtraction. */
+ lsr data1, data1, #56
+ sub result, data1, data2, lsr #56
+ ret
+#endif
+
+L(mutual_align):
+ /* Sources are mutually aligned, but are not currently at an
+ alignment boundary. Round down the addresses and then mask off
+ the bytes that precede the start point.
+ We also need to adjust the limit calculations, but without
+ overflowing if the limit is near ULONG_MAX. */
bic src1, src1, #7
bic src2, src2, #7
ldr data1, [src1], #8
- neg tmp3, tmp1, lsl #3 /* 64 - bits(bytes beyond align). */
+ neg tmp3, count, lsl #3 /* 64 - bits(bytes beyond align). */
ldr data2, [src2], #8
mov tmp2, #~0
- sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
+ sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
+#ifdef __AARCH64EB__
/* Big-endian. Early bytes are at MSB. */
-CPU_BE( lsl tmp2, tmp2, tmp3 ) /* Shift (tmp1 & 63). */
+ lsl tmp2, tmp2, tmp3 /* Shift (count & 63). */
+#else
/* Little-endian. Early bytes are at LSB. */
-CPU_LE( lsr tmp2, tmp2, tmp3 ) /* Shift (tmp1 & 63). */
-
+ lsr tmp2, tmp2, tmp3 /* Shift (count & 63). */
+#endif
and tmp3, limit_wd, #7
lsr limit_wd, limit_wd, #3
- /* Adjust the limit. Only low 3 bits used, so overflow irrelevant.*/
- add limit, limit, tmp1
- add tmp3, tmp3, tmp1
+ /* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */
+ add limit, limit, count
+ add tmp3, tmp3, count
orr data1, data1, tmp2
orr data2, data2, tmp2
add limit_wd, limit_wd, tmp3, lsr #3
- b .Lstart_realigned
+ b L(start_realigned)
+
+ .p2align 4
+ /* Don't bother with dwords for up to 16 bytes. */
+L(misaligned8):
+ cmp limit, #16
+ b.hs L(try_misaligned_words)
-/*when src1 offset is not equal to src2 offset...*/
-.Lmisaligned8:
- cmp limit, #8
- b.lo .Ltiny8proc /*limit < 8... */
- /*
- * Get the align offset length to compare per byte first.
- * After this process, one string's address will be aligned.*/
- and tmp1, src1, #7
- neg tmp1, tmp1
- add tmp1, tmp1, #8
- and tmp2, src2, #7
- neg tmp2, tmp2
- add tmp2, tmp2, #8
- subs tmp3, tmp1, tmp2
- csel pos, tmp1, tmp2, hi /*Choose the maximum. */
- /*
- * Here, limit is not less than 8, so directly run .Ltinycmp
- * without checking the limit.*/
- sub limit, limit, pos
-.Ltinycmp:
+L(byte_loop):
+ /* Perhaps we can do better than this. */
ldrb data1w, [src1], #1
ldrb data2w, [src2], #1
- subs pos, pos, #1
- ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */
- ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
- b.eq .Ltinycmp
- cbnz pos, 1f /*find the null or unequal...*/
- cmp data1w, #1
- ccmp data1w, data2w, #0, cs
- b.eq .Lstart_align /*the last bytes are equal....*/
-1:
+ subs limit, limit, #1
+ ccmp data1w, #1, #0, hi /* NZCV = 0b0000. */
+ ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
+ b.eq L(byte_loop)
+L(done):
sub result, data1, data2
ret
-
-.Lstart_align:
+ /* Align the SRC1 to a dword by doing a bytewise compare and then do
+ the dword loop. */
+L(try_misaligned_words):
lsr limit_wd, limit, #3
- cbz limit_wd, .Lremain8
- /*process more leading bytes to make str1 aligned...*/
- ands xzr, src1, #7
- b.eq .Lrecal_offset
- add src1, src1, tmp3 /*tmp3 is positive in this branch.*/
- add src2, src2, tmp3
- ldr data1, [src1], #8
- ldr data2, [src2], #8
+ cbz count, L(do_misaligned)
- sub limit, limit, tmp3
+ neg count, count
+ and count, count, #7
+ sub limit, limit, count
lsr limit_wd, limit, #3
- subs limit_wd, limit_wd, #1
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- eor diff, data1, data2 /* Non-zero if differences found. */
- csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
- bics has_nul, tmp1, tmp2
- ccmp endloop, #0, #0, eq /*has_null is ZERO: no null byte*/
- b.ne .Lunequal_proc
- /*How far is the current str2 from the alignment boundary...*/
- and tmp3, tmp3, #7
-.Lrecal_offset:
- neg pos, tmp3
-.Lloopcmp_proc:
- /*
- * Divide the eight bytes into two parts. First,backwards the src2
- * to an alignment boundary,load eight bytes from the SRC2 alignment
- * boundary,then compare with the relative bytes from SRC1.
- * If all 8 bytes are equal,then start the second part's comparison.
- * Otherwise finish the comparison.
- * This special handle can garantee all the accesses are in the
- * thread/task space in avoid to overrange access.
- */
- ldr data1, [src1,pos]
- ldr data2, [src2,pos]
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
- eor diff, data1, data2 /* Non-zero if differences found. */
- csinv endloop, diff, xzr, eq
- cbnz endloop, .Lunequal_proc
+L(page_end_loop):
+ ldrb data1w, [src1], #1
+ ldrb data2w, [src2], #1
+ cmp data1w, #1
+ ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
+ b.ne L(done)
+ subs count, count, #1
+ b.hi L(page_end_loop)
+
+L(do_misaligned):
+ /* Prepare ourselves for the next page crossing. Unlike the aligned
+ loop, we fetch 1 less dword because we risk crossing bounds on
+ SRC2. */
+ mov count, #8
+ subs limit_wd, limit_wd, #1
+ b.lo L(done_loop)
+L(loop_misaligned):
+ and tmp2, src2, #0xff8
+ eor tmp2, tmp2, #0xff8
+ cbz tmp2, L(page_end_loop)
- /*The second part process*/
ldr data1, [src1], #8
ldr data2, [src2], #8
- subs limit_wd, limit_wd, #1
sub tmp1, data1, zeroones
orr tmp2, data1, #REP8_7f
- eor diff, data1, data2 /* Non-zero if differences found. */
- csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
- bics has_nul, tmp1, tmp2
- ccmp endloop, #0, #0, eq /*has_null is ZERO: no null byte*/
- b.eq .Lloopcmp_proc
-
-.Lunequal_proc:
- orr syndrome, diff, has_nul
- cbz syndrome, .Lremain8
-.Lcal_cmpresult:
- /*
- * reversed the byte-order as big-endian,then CLZ can find the most
- * significant zero bits.
- */
-CPU_LE( rev syndrome, syndrome )
-CPU_LE( rev data1, data1 )
-CPU_LE( rev data2, data2 )
- /*
- * For big-endian we cannot use the trick with the syndrome value
- * as carry-propagation can corrupt the upper bits if the trailing
- * bytes in the string contain 0x01.
- * However, if there is no NUL byte in the dword, we can generate
- * the result directly. We can't just subtract the bytes as the
- * MSB might be significant.
- */
-CPU_BE( cbnz has_nul, 1f )
-CPU_BE( cmp data1, data2 )
-CPU_BE( cset result, ne )
-CPU_BE( cneg result, result, lo )
-CPU_BE( ret )
-CPU_BE( 1: )
- /* Re-compute the NUL-byte detection, using a byte-reversed value.*/
-CPU_BE( rev tmp3, data1 )
-CPU_BE( sub tmp1, tmp3, zeroones )
-CPU_BE( orr tmp2, tmp3, #REP8_7f )
-CPU_BE( bic has_nul, tmp1, tmp2 )
-CPU_BE( rev has_nul, has_nul )
-CPU_BE( orr syndrome, diff, has_nul )
- /*
- * The MS-non-zero bit of the syndrome marks either the first bit
- * that is different, or the top bit of the first zero byte.
- * Shifting left now will bring the critical information into the
- * top bits.
- */
- clz pos, syndrome
- lsl data1, data1, pos
- lsl data2, data2, pos
- /*
- * But we need to zero-extend (char is unsigned) the value and then
- * perform a signed 32-bit subtraction.
- */
- lsr data1, data1, #56
- sub result, data1, data2, lsr #56
- ret
-
-.Lremain8:
- /* Limit % 8 == 0 => all bytes significant. */
- ands limit, limit, #7
- b.eq .Lret0
-.Ltiny8proc:
- ldrb data1w, [src1], #1
- ldrb data2w, [src2], #1
- subs limit, limit, #1
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
+ ccmp diff, #0, #0, eq
+ b.ne L(not_limit)
+ subs limit_wd, limit_wd, #1
+ b.pl L(loop_misaligned)
- ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */
- ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
- b.eq .Ltiny8proc
- sub result, data1, data2
- ret
+L(done_loop):
+ /* We found a difference or a NULL before the limit was reached. */
+ and limit, limit, #7
+ cbz limit, L(not_limit)
+ /* Read the last word. */
+ sub src1, src1, 8
+ sub src2, src2, 8
+ ldr data1, [src1, limit]
+ ldr data2, [src2, limit]
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
+ ccmp diff, #0, #0, eq
+ b.ne L(not_limit)
-.Lret0:
+L(ret0):
mov result, #0
ret
+
SYM_FUNC_END_PI(strncmp)
EXPORT_SYMBOL_NOKASAN(strncmp)
diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c
index c83bb5a4aad2..baee22961bdb 100644
--- a/arch/arm64/lib/uaccess_flushcache.c
+++ b/arch/arm64/lib/uaccess_flushcache.c
@@ -15,7 +15,7 @@ void memcpy_flushcache(void *dst, const void *src, size_t cnt)
* barrier to order the cache maintenance against the memcpy.
*/
memcpy(dst, src, cnt);
- __clean_dcache_area_pop(dst, cnt);
+ dcache_clean_pop((unsigned long)dst, (unsigned long)dst + cnt);
}
EXPORT_SYMBOL_GPL(memcpy_flushcache);
@@ -33,6 +33,6 @@ unsigned long __copy_user_flushcache(void *to, const void __user *from,
rc = raw_copy_from_user(to, from, n);
/* See above */
- __clean_dcache_area_pop(to, n - rc);
+ dcache_clean_pop((unsigned long)to, (unsigned long)to + n - rc);
return rc;
}
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 2d881f34dd9d..5051b3c1a4f1 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -15,7 +15,7 @@
#include <asm/asm-uaccess.h>
/*
- * flush_icache_range(start,end)
+ * caches_clean_inval_pou_macro(start,end) [fixup]
*
* Ensure that the I and D caches are coherent within specified region.
* This is typically used when code has been written to a memory region,
@@ -23,12 +23,27 @@
*
* - start - virtual start address of region
* - end - virtual end address of region
+ * - fixup - optional label to branch to on user fault
*/
-SYM_FUNC_START(__flush_icache_range)
- /* FALLTHROUGH */
+.macro caches_clean_inval_pou_macro, fixup
+alternative_if ARM64_HAS_CACHE_IDC
+ dsb ishst
+ b .Ldc_skip_\@
+alternative_else_nop_endif
+ mov x2, x0
+ mov x3, x1
+ dcache_by_line_op cvau, ish, x2, x3, x4, x5, \fixup
+.Ldc_skip_\@:
+alternative_if ARM64_HAS_CACHE_DIC
+ isb
+ b .Lic_skip_\@
+alternative_else_nop_endif
+ invalidate_icache_by_line x0, x1, x2, x3, \fixup
+.Lic_skip_\@:
+.endm
/*
- * __flush_cache_user_range(start,end)
+ * caches_clean_inval_pou(start,end)
*
* Ensure that the I and D caches are coherent within specified region.
* This is typically used when code has been written to a memory region,
@@ -37,117 +52,103 @@ SYM_FUNC_START(__flush_icache_range)
* - start - virtual start address of region
* - end - virtual end address of region
*/
-SYM_FUNC_START(__flush_cache_user_range)
+SYM_FUNC_START(caches_clean_inval_pou)
+ caches_clean_inval_pou_macro
+ ret
+SYM_FUNC_END(caches_clean_inval_pou)
+
+/*
+ * caches_clean_inval_user_pou(start,end)
+ *
+ * Ensure that the I and D caches are coherent within specified region.
+ * This is typically used when code has been written to a memory region,
+ * and will be executed.
+ *
+ * - start - virtual start address of region
+ * - end - virtual end address of region
+ */
+SYM_FUNC_START(caches_clean_inval_user_pou)
uaccess_ttbr0_enable x2, x3, x4
-alternative_if ARM64_HAS_CACHE_IDC
- dsb ishst
- b 7f
-alternative_else_nop_endif
- dcache_line_size x2, x3
- sub x3, x2, #1
- bic x4, x0, x3
-1:
-user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE
- add x4, x4, x2
- cmp x4, x1
- b.lo 1b
- dsb ish
-7:
-alternative_if ARM64_HAS_CACHE_DIC
- isb
- b 8f
-alternative_else_nop_endif
- invalidate_icache_by_line x0, x1, x2, x3, 9f
-8: mov x0, #0
+ caches_clean_inval_pou_macro 2f
+ mov x0, xzr
1:
uaccess_ttbr0_disable x1, x2
ret
-9:
+2:
mov x0, #-EFAULT
b 1b
-SYM_FUNC_END(__flush_icache_range)
-SYM_FUNC_END(__flush_cache_user_range)
+SYM_FUNC_END(caches_clean_inval_user_pou)
/*
- * invalidate_icache_range(start,end)
+ * icache_inval_pou(start,end)
*
* Ensure that the I cache is invalid within specified region.
*
* - start - virtual start address of region
* - end - virtual end address of region
*/
-SYM_FUNC_START(invalidate_icache_range)
+SYM_FUNC_START(icache_inval_pou)
alternative_if ARM64_HAS_CACHE_DIC
- mov x0, xzr
isb
ret
alternative_else_nop_endif
- uaccess_ttbr0_enable x2, x3, x4
-
- invalidate_icache_by_line x0, x1, x2, x3, 2f
- mov x0, xzr
-1:
- uaccess_ttbr0_disable x1, x2
+ invalidate_icache_by_line x0, x1, x2, x3
ret
-2:
- mov x0, #-EFAULT
- b 1b
-SYM_FUNC_END(invalidate_icache_range)
+SYM_FUNC_END(icache_inval_pou)
/*
- * __flush_dcache_area(kaddr, size)
+ * dcache_clean_inval_poc(start, end)
*
- * Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * Ensure that any D-cache lines for the interval [start, end)
* are cleaned and invalidated to the PoC.
*
- * - kaddr - kernel address
- * - size - size in question
+ * - start - virtual start address of region
+ * - end - virtual end address of region
*/
-SYM_FUNC_START_PI(__flush_dcache_area)
+SYM_FUNC_START_PI(dcache_clean_inval_poc)
dcache_by_line_op civac, sy, x0, x1, x2, x3
ret
-SYM_FUNC_END_PI(__flush_dcache_area)
+SYM_FUNC_END_PI(dcache_clean_inval_poc)
/*
- * __clean_dcache_area_pou(kaddr, size)
+ * dcache_clean_pou(start, end)
*
- * Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * Ensure that any D-cache lines for the interval [start, end)
* are cleaned to the PoU.
*
- * - kaddr - kernel address
- * - size - size in question
+ * - start - virtual start address of region
+ * - end - virtual end address of region
*/
-SYM_FUNC_START(__clean_dcache_area_pou)
+SYM_FUNC_START(dcache_clean_pou)
alternative_if ARM64_HAS_CACHE_IDC
dsb ishst
ret
alternative_else_nop_endif
dcache_by_line_op cvau, ish, x0, x1, x2, x3
ret
-SYM_FUNC_END(__clean_dcache_area_pou)
+SYM_FUNC_END(dcache_clean_pou)
/*
- * __inval_dcache_area(kaddr, size)
+ * dcache_inval_poc(start, end)
*
- * Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * Ensure that any D-cache lines for the interval [start, end)
* are invalidated. Any partial lines at the ends of the interval are
* also cleaned to PoC to prevent data loss.
*
- * - kaddr - kernel address
- * - size - size in question
+ * - start - kernel start address of region
+ * - end - kernel end address of region
*/
SYM_FUNC_START_LOCAL(__dma_inv_area)
-SYM_FUNC_START_PI(__inval_dcache_area)
+SYM_FUNC_START_PI(dcache_inval_poc)
/* FALLTHROUGH */
/*
- * __dma_inv_area(start, size)
+ * __dma_inv_area(start, end)
* - start - virtual start address of region
- * - size - size in question
+ * - end - virtual end address of region
*/
- add x1, x1, x0
dcache_line_size x2, x3
sub x3, x2, #1
tst x1, x3 // end cache line aligned?
@@ -165,48 +166,48 @@ SYM_FUNC_START_PI(__inval_dcache_area)
b.lo 2b
dsb sy
ret
-SYM_FUNC_END_PI(__inval_dcache_area)
+SYM_FUNC_END_PI(dcache_inval_poc)
SYM_FUNC_END(__dma_inv_area)
/*
- * __clean_dcache_area_poc(kaddr, size)
+ * dcache_clean_poc(start, end)
*
- * Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * Ensure that any D-cache lines for the interval [start, end)
* are cleaned to the PoC.
*
- * - kaddr - kernel address
- * - size - size in question
+ * - start - virtual start address of region
+ * - end - virtual end address of region
*/
SYM_FUNC_START_LOCAL(__dma_clean_area)
-SYM_FUNC_START_PI(__clean_dcache_area_poc)
+SYM_FUNC_START_PI(dcache_clean_poc)
/* FALLTHROUGH */
/*
- * __dma_clean_area(start, size)
+ * __dma_clean_area(start, end)
* - start - virtual start address of region
- * - size - size in question
+ * - end - virtual end address of region
*/
dcache_by_line_op cvac, sy, x0, x1, x2, x3
ret
-SYM_FUNC_END_PI(__clean_dcache_area_poc)
+SYM_FUNC_END_PI(dcache_clean_poc)
SYM_FUNC_END(__dma_clean_area)
/*
- * __clean_dcache_area_pop(kaddr, size)
+ * dcache_clean_pop(start, end)
*
- * Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * Ensure that any D-cache lines for the interval [start, end)
* are cleaned to the PoP.
*
- * - kaddr - kernel address
- * - size - size in question
+ * - start - virtual start address of region
+ * - end - virtual end address of region
*/
-SYM_FUNC_START_PI(__clean_dcache_area_pop)
+SYM_FUNC_START_PI(dcache_clean_pop)
alternative_if_not ARM64_HAS_DCPOP
- b __clean_dcache_area_poc
+ b dcache_clean_poc
alternative_else_nop_endif
dcache_by_line_op cvap, sy, x0, x1, x2, x3
ret
-SYM_FUNC_END_PI(__clean_dcache_area_pop)
+SYM_FUNC_END_PI(dcache_clean_pop)
/*
* __dma_flush_area(start, size)
@@ -217,6 +218,7 @@ SYM_FUNC_END_PI(__clean_dcache_area_pop)
* - size - size in question
*/
SYM_FUNC_START_PI(__dma_flush_area)
+ add x1, x0, x1
dcache_by_line_op civac, sy, x0, x1, x2, x3
ret
SYM_FUNC_END_PI(__dma_flush_area)
@@ -228,6 +230,7 @@ SYM_FUNC_END_PI(__dma_flush_area)
* - dir - DMA direction
*/
SYM_FUNC_START_PI(__dma_map_area)
+ add x1, x0, x1
cmp w2, #DMA_FROM_DEVICE
b.eq __dma_inv_area
b __dma_clean_area
@@ -240,6 +243,7 @@ SYM_FUNC_END_PI(__dma_map_area)
* - dir - DMA direction
*/
SYM_FUNC_START_PI(__dma_unmap_area)
+ add x1, x0, x1
cmp w2, #DMA_TO_DEVICE
b.ne __dma_inv_area
ret
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 001737a8f309..cd72576ae2b7 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -402,14 +402,12 @@ static int asids_init(void)
{
asid_bits = get_cpu_asid_bits();
atomic64_set(&asid_generation, ASID_FIRST_VERSION);
- asid_map = kcalloc(BITS_TO_LONGS(NUM_USER_ASIDS), sizeof(*asid_map),
- GFP_KERNEL);
+ asid_map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL);
if (!asid_map)
panic("Failed to allocate bitmap for %lu ASIDs\n",
NUM_USER_ASIDS);
- pinned_asid_map = kcalloc(BITS_TO_LONGS(NUM_USER_ASIDS),
- sizeof(*pinned_asid_map), GFP_KERNEL);
+ pinned_asid_map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL);
nr_pinned_asids = 0;
/*
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 871c82ab0a30..349c488765ca 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -99,6 +99,8 @@ static void mem_abort_decode(unsigned int esr)
pr_alert(" EA = %lu, S1PTW = %lu\n",
(esr & ESR_ELx_EA) >> ESR_ELx_EA_SHIFT,
(esr & ESR_ELx_S1PTW) >> ESR_ELx_S1PTW_SHIFT);
+ pr_alert(" FSC = 0x%02x: %s\n", (esr & ESR_ELx_FSC),
+ esr_to_fault_info(esr)->name);
if (esr_is_data_abort(esr))
data_abort_decode(esr);
@@ -232,13 +234,17 @@ static bool is_el1_instruction_abort(unsigned int esr)
return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR;
}
+static bool is_el1_data_abort(unsigned int esr)
+{
+ return ESR_ELx_EC(esr) == ESR_ELx_EC_DABT_CUR;
+}
+
static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
- unsigned int ec = ESR_ELx_EC(esr);
unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
- if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR)
+ if (!is_el1_data_abort(esr) && !is_el1_instruction_abort(esr))
return false;
if (fsc_type == ESR_ELx_FSC_PERM)
@@ -258,7 +264,7 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
unsigned long flags;
u64 par, dfsc;
- if (ESR_ELx_EC(esr) != ESR_ELx_EC_DABT_CUR ||
+ if (!is_el1_data_abort(esr) ||
(esr & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT)
return false;
@@ -346,10 +352,9 @@ static void do_tag_recovery(unsigned long addr, unsigned int esr,
static bool is_el1_mte_sync_tag_check_fault(unsigned int esr)
{
- unsigned int ec = ESR_ELx_EC(esr);
unsigned int fsc = esr & ESR_ELx_FSC;
- if (ec != ESR_ELx_EC_DABT_CUR)
+ if (!is_el1_data_abort(esr))
return false;
if (fsc == ESR_ELx_FSC_MTE)
@@ -504,7 +509,7 @@ static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
*/
if (!(vma->vm_flags & vm_flags))
return VM_FAULT_BADACCESS;
- return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags, regs);
+ return handle_mm_fault(vma, addr, mm_flags, regs);
}
static bool is_el0_instruction_abort(unsigned int esr)
@@ -836,13 +841,6 @@ void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs)
}
NOKPROBE_SYMBOL(do_mem_abort);
-void do_el0_irq_bp_hardening(void)
-{
- /* PC has already been checked in entry.S */
- arm64_apply_bp_hardening();
-}
-NOKPROBE_SYMBOL(do_el0_irq_bp_hardening);
-
void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN,
@@ -921,3 +919,29 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
debug_exception_exit(regs);
}
NOKPROBE_SYMBOL(do_debug_exception);
+
+/*
+ * Used during anonymous page fault handling.
+ */
+struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
+ unsigned long vaddr)
+{
+ gfp_t flags = GFP_HIGHUSER_MOVABLE | __GFP_ZERO;
+
+ /*
+ * If the page is mapped with PROT_MTE, initialise the tags at the
+ * point of allocation and page zeroing as this is usually faster than
+ * separate DC ZVA and STGM.
+ */
+ if (vma->vm_flags & VM_MTE)
+ flags |= __GFP_ZEROTAGS;
+
+ return alloc_page_vma(flags, vma, vaddr);
+}
+
+void tag_clear_highpage(struct page *page)
+{
+ mte_zero_clear_page_tags(page_address(page));
+ page_kasan_tag_reset(page);
+ set_bit(PG_mte_tagged, &page->flags);
+}
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 6d44c028d1c9..2aaf950b906c 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -14,28 +14,25 @@
#include <asm/cache.h>
#include <asm/tlbflush.h>
-void sync_icache_aliases(void *kaddr, unsigned long len)
+void sync_icache_aliases(unsigned long start, unsigned long end)
{
- unsigned long addr = (unsigned long)kaddr;
-
if (icache_is_aliasing()) {
- __clean_dcache_area_pou(kaddr, len);
- __flush_icache_all();
+ dcache_clean_pou(start, end);
+ icache_inval_all_pou();
} else {
/*
* Don't issue kick_all_cpus_sync() after I-cache invalidation
* for user mappings.
*/
- __flush_icache_range(addr, addr + len);
+ caches_clean_inval_pou(start, end);
}
}
-static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
- unsigned long uaddr, void *kaddr,
- unsigned long len)
+static void flush_ptrace_access(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
{
if (vma->vm_flags & VM_EXEC)
- sync_icache_aliases(kaddr, len);
+ sync_icache_aliases(start, end);
}
/*
@@ -48,7 +45,7 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long len)
{
memcpy(dst, src, len);
- flush_ptrace_access(vma, page, uaddr, dst, len);
+ flush_ptrace_access(vma, (unsigned long)dst, (unsigned long)dst + len);
}
void __sync_icache_dcache(pte_t pte)
@@ -56,7 +53,9 @@ void __sync_icache_dcache(pte_t pte)
struct page *page = pte_page(pte);
if (!test_bit(PG_dcache_clean, &page->flags)) {
- sync_icache_aliases(page_address(page), page_size(page));
+ sync_icache_aliases((unsigned long)page_address(page),
+ (unsigned long)page_address(page) +
+ page_size(page));
set_bit(PG_dcache_clean, &page->flags);
}
}
@@ -77,20 +76,20 @@ EXPORT_SYMBOL(flush_dcache_page);
/*
* Additional functions defined in assembly.
*/
-EXPORT_SYMBOL(__flush_icache_range);
+EXPORT_SYMBOL(caches_clean_inval_pou);
#ifdef CONFIG_ARCH_HAS_PMEM_API
void arch_wb_cache_pmem(void *addr, size_t size)
{
/* Ensure order against any prior non-cacheable writes */
dmb(osh);
- __clean_dcache_area_pop(addr, size);
+ dcache_clean_pop((unsigned long)addr, (unsigned long)addr + size);
}
EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
void arch_invalidate_pmem(void *addr, size_t size)
{
- __inval_dcache_area(addr, size);
+ dcache_inval_poc((unsigned long)addr, (unsigned long)addr + size);
}
EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
#endif
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 49019ea0c8a8..8490ed2917ff 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -474,6 +474,13 @@ void __init mem_init(void)
BUILD_BUG_ON(TASK_SIZE_32 > DEFAULT_MAP_WINDOW_64);
#endif
+ /*
+ * Selected page table levels should match when derived from
+ * scratch using the virtual address range and page size.
+ */
+ BUILD_BUG_ON(ARM64_HW_PGTABLE_LEVELS(CONFIG_ARM64_VA_BITS) !=
+ CONFIG_PGTABLE_LEVELS);
+
if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) {
extern int sysctl_overcommit_memory;
/*
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 7553a7eab3db..595fde9a47dd 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -228,7 +228,7 @@ static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
next = pmd_addr_end(addr, end);
/* try section mapping first */
- if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
+ if (((addr | next | phys) & ~PMD_MASK) == 0 &&
(flags & NO_BLOCK_MAPPINGS) == 0) {
pmd_set_huge(pmdp, phys, prot);
@@ -1114,14 +1114,14 @@ static void free_empty_tables(unsigned long addr, unsigned long end,
}
#endif
-#if !ARM64_SWAPPER_USES_SECTION_MAPS
+#if !ARM64_KERNEL_USES_PMD_MAPS
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
return vmemmap_populate_basepages(start, end, node, altmap);
}
-#else /* !ARM64_SWAPPER_USES_SECTION_MAPS */
+#else /* !ARM64_KERNEL_USES_PMD_MAPS */
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
@@ -1166,17 +1166,18 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
return 0;
}
-#endif /* !ARM64_SWAPPER_USES_SECTION_MAPS */
+#endif /* !ARM64_KERNEL_USES_PMD_MAPS */
+
+#ifdef CONFIG_MEMORY_HOTPLUG
void vmemmap_free(unsigned long start, unsigned long end,
struct vmem_altmap *altmap)
{
-#ifdef CONFIG_MEMORY_HOTPLUG
WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
unmap_hotplug_range(start, end, true, altmap);
free_empty_tables(start, end, VMEMMAP_START, VMEMMAP_END);
-#endif
}
+#endif /* CONFIG_MEMORY_HOTPLUG */
static inline pud_t *fixmap_pud(unsigned long addr)
{
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 97d7bcd8d4f2..35936c5ae1ce 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -46,9 +46,13 @@
#endif
#ifdef CONFIG_KASAN_HW_TAGS
-#define TCR_KASAN_HW_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1
+#define TCR_MTE_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1
#else
-#define TCR_KASAN_HW_FLAGS 0
+/*
+ * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on
+ * TBI being enabled at EL1.
+ */
+#define TCR_MTE_FLAGS TCR_TBI1 | TCR_TBID1
#endif
/*
@@ -58,10 +62,8 @@
#define MAIR_EL1_SET \
(MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \
MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) | \
- MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) | \
MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) | \
MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \
- MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT) | \
MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL_TAGGED))
#ifdef CONFIG_CPU_PM
@@ -83,11 +85,7 @@ SYM_FUNC_START(cpu_do_suspend)
mrs x9, mdscr_el1
mrs x10, oslsr_el1
mrs x11, sctlr_el1
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
- mrs x12, tpidr_el1
-alternative_else
- mrs x12, tpidr_el2
-alternative_endif
+ get_this_cpu_offset x12
mrs x13, sp_el0
stp x2, x3, [x0]
stp x4, x5, [x0, #16]
@@ -145,11 +143,7 @@ SYM_FUNC_START(cpu_do_resume)
msr mdscr_el1, x10
msr sctlr_el1, x12
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
- msr tpidr_el1, x13
-alternative_else
- msr tpidr_el2, x13
-alternative_endif
+ set_this_cpu_offset x13
msr sp_el0, x14
/*
* Restore oslsr_el1 by writing oslar_el1
@@ -464,7 +458,7 @@ SYM_FUNC_START(__cpu_setup)
msr_s SYS_TFSRE0_EL1, xzr
/* set the TCR_EL1 bits */
- mov_q x10, TCR_KASAN_HW_FLAGS
+ mov_q x10, TCR_MTE_FLAGS
orr tcr, tcr, x10
1:
#endif
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index a1937dfff31c..1c403536c9bb 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -159,10 +159,6 @@ static const struct prot_bits pte_bits[] = {
.set = "DEVICE/nGnRE",
}, {
.mask = PTE_ATTRINDX_MASK,
- .val = PTE_ATTRINDX(MT_DEVICE_GRE),
- .set = "DEVICE/GRE",
- }, {
- .mask = PTE_ATTRINDX_MASK,
.val = PTE_ATTRINDX(MT_NORMAL_NC),
.set = "MEM/NORMAL-NC",
}, {
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index f7b194878a99..dccf98a37283 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -16,6 +16,7 @@
#include <asm/byteorder.h>
#include <asm/cacheflush.h>
#include <asm/debug-monitors.h>
+#include <asm/insn.h>
#include <asm/set_memory.h>
#include "bpf_jit.h"
@@ -178,9 +179,6 @@ static bool is_addsub_imm(u32 imm)
return !(imm & ~0xfff) || !(imm & ~0xfff000);
}
-/* Stack must be multiples of 16B */
-#define STACK_ALIGN(sz) (((sz) + 15) & ~15)
-
/* Tail call offset to jump into */
#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)
#define PROLOGUE_OFFSET 8
@@ -255,7 +253,8 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
emit(A64_BTI_J, ctx);
}
- ctx->stack_size = STACK_ALIGN(prog->aux->stack_depth);
+ /* Stack must be multiples of 16B */
+ ctx->stack_size = round_up(prog->aux->stack_depth, 16);
/* Set up function call stack */
emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
@@ -487,17 +486,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
case BPF_ALU | BPF_DIV | BPF_X:
case BPF_ALU64 | BPF_DIV | BPF_X:
+ emit(A64_UDIV(is64, dst, dst, src), ctx);
+ break;
case BPF_ALU | BPF_MOD | BPF_X:
case BPF_ALU64 | BPF_MOD | BPF_X:
- switch (BPF_OP(code)) {
- case BPF_DIV:
- emit(A64_UDIV(is64, dst, dst, src), ctx);
- break;
- case BPF_MOD:
- emit(A64_UDIV(is64, tmp, dst, src), ctx);
- emit(A64_MSUB(is64, dst, dst, tmp, src), ctx);
- break;
- }
+ emit(A64_UDIV(is64, tmp, dst, src), ctx);
+ emit(A64_MSUB(is64, dst, dst, tmp, src), ctx);
break;
case BPF_ALU | BPF_LSH | BPF_X:
case BPF_ALU64 | BPF_LSH | BPF_X:
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 21fbdda7086e..49305c2e6dfd 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -3,7 +3,8 @@
# Internal CPU capabilities constants, keep this list sorted
BTI
-HAS_32BIT_EL0
+# Unreliable: use system_supports_32bit_el0() instead.
+HAS_32BIT_EL0_DO_NOT_USE
HAS_32BIT_EL1
HAS_ADDRESS_AUTH
HAS_ADDRESS_AUTH_ARCH