diff options
Diffstat (limited to 'arch')
90 files changed, 699 insertions, 276 deletions
diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 17fd1ed700cc..9152782444b5 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -67,7 +67,22 @@ sr r5, [ARC_REG_LPB_CTRL] 1: #endif /* CONFIG_ARC_LPB_DISABLE */ -#endif + + /* On HSDK, CCMs need to remapped super early */ +#ifdef CONFIG_ARC_SOC_HSDK + mov r6, 0x60000000 + lr r5, [ARC_REG_ICCM_BUILD] + breq r5, 0, 1f + sr r6, [ARC_REG_AUX_ICCM] +1: + lr r5, [ARC_REG_DCCM_BUILD] + breq r5, 0, 2f + sr r6, [ARC_REG_AUX_DCCM] +2: +#endif /* CONFIG_ARC_SOC_HSDK */ + +#endif /* CONFIG_ISA_ARCV2 */ + ; Config DSP_CTRL properly, so kernel may use integer multiply, ; multiply-accumulate, and divide operations DSP_EARLY_INIT diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index feba91c9d969..b23986f98450 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -112,7 +112,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, int (*consumer_fn) (unsigned int, void *), void *arg) { #ifdef CONFIG_ARC_DW2_UNWIND - int ret = 0; + int ret = 0, cnt = 0; unsigned int address; struct unwind_frame_info frame_info; @@ -132,6 +132,11 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, break; frame_info.regs.r63 = frame_info.regs.r31; + + if (cnt++ > 128) { + printk("unwinder looping too long, aborting !\n"); + return 0; + } } return address; /* return the last address it saw */ diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index 0b63fc095b99..b3ea1fa11f87 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -17,22 +17,6 @@ int arc_hsdk_axi_dmac_coherent __section(".data") = 0; #define ARC_CCM_UNUSED_ADDR 0x60000000 -static void __init hsdk_init_per_cpu(unsigned int cpu) -{ - /* - * By default ICCM is mapped to 0x7z while this area is used for - * kernel virtual mappings, so move it to currently unused area. - */ - if (cpuinfo_arc700[cpu].iccm.sz) - write_aux_reg(ARC_REG_AUX_ICCM, ARC_CCM_UNUSED_ADDR); - - /* - * By default DCCM is mapped to 0x8z while this area is used by kernel, - * so move it to currently unused area. - */ - if (cpuinfo_arc700[cpu].dccm.sz) - write_aux_reg(ARC_REG_AUX_DCCM, ARC_CCM_UNUSED_ADDR); -} #define ARC_PERIPHERAL_BASE 0xf0000000 #define CREG_BASE (ARC_PERIPHERAL_BASE + 0x1000) @@ -339,5 +323,4 @@ static const char *hsdk_compat[] __initconst = { MACHINE_START(SIMULATION, "hsdk") .dt_compat = hsdk_compat, .init_early = hsdk_init_early, - .init_per_cpu = hsdk_init_per_cpu, MACHINE_END diff --git a/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts b/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts index f1a41152e9dd..adde62d6fce7 100644 --- a/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts +++ b/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts @@ -227,12 +227,12 @@ /delete-property/ #size-cells; spi-slave; status = "okay"; - ready-gpio = <&gpio 125 GPIO_ACTIVE_HIGH>; + ready-gpios = <&gpio 125 GPIO_ACTIVE_HIGH>; slave { compatible = "olpc,xo1.75-ec"; spi-cpha; - cmd-gpio = <&gpio 155 GPIO_ACTIVE_HIGH>; + cmd-gpios = <&gpio 155 GPIO_ACTIVE_HIGH>; }; }; diff --git a/arch/arm/boot/dts/mmp3.dtsi b/arch/arm/boot/dts/mmp3.dtsi index cc4efd0efabd..4ae630d37d09 100644 --- a/arch/arm/boot/dts/mmp3.dtsi +++ b/arch/arm/boot/dts/mmp3.dtsi @@ -296,6 +296,7 @@ interrupts = <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>; clocks = <&soc_clocks MMP2_CLK_CCIC0>; clock-names = "axi"; + power-domains = <&soc_clocks MMP3_POWER_DOMAIN_CAMERA>; #clock-cells = <0>; clock-output-names = "mclk"; status = "disabled"; @@ -307,6 +308,7 @@ interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>; clocks = <&soc_clocks MMP2_CLK_CCIC1>; clock-names = "axi"; + power-domains = <&soc_clocks MMP3_POWER_DOMAIN_CAMERA>; #clock-cells = <0>; clock-output-names = "mclk"; status = "disabled"; diff --git a/arch/arm/boot/dts/stm32mp157c-ed1.dts b/arch/arm/boot/dts/stm32mp157c-ed1.dts index ca109dc18238..2e77ccec3fc1 100644 --- a/arch/arm/boot/dts/stm32mp157c-ed1.dts +++ b/arch/arm/boot/dts/stm32mp157c-ed1.dts @@ -89,6 +89,14 @@ states = <1800000 0x1>, <2900000 0x0>; }; + + vin: vin { + compatible = "regulator-fixed"; + regulator-name = "vin"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + regulator-always-on; + }; }; &adc { @@ -150,11 +158,18 @@ regulators { compatible = "st,stpmic1-regulators"; + buck1-supply = <&vin>; + buck2-supply = <&vin>; + buck3-supply = <&vin>; + buck4-supply = <&vin>; ldo1-supply = <&v3v3>; ldo2-supply = <&v3v3>; ldo3-supply = <&vdd_ddr>; + ldo4-supply = <&vin>; ldo5-supply = <&v3v3>; ldo6-supply = <&v3v3>; + vref_ddr-supply = <&vin>; + boost-supply = <&vin>; pwr_sw1-supply = <&bst_out>; pwr_sw2-supply = <&bst_out>; diff --git a/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi b/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi index a5307745719a..93398cfae97e 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi @@ -80,6 +80,14 @@ dais = <&sai2a_port &sai2b_port &i2s2_port>; status = "okay"; }; + + vin: vin { + compatible = "regulator-fixed"; + regulator-name = "vin"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + regulator-always-on; + }; }; &adc { @@ -240,9 +248,18 @@ regulators { compatible = "st,stpmic1-regulators"; + buck1-supply = <&vin>; + buck2-supply = <&vin>; + buck3-supply = <&vin>; + buck4-supply = <&vin>; ldo1-supply = <&v3v3>; + ldo2-supply = <&vin>; ldo3-supply = <&vdd_ddr>; + ldo4-supply = <&vin>; + ldo5-supply = <&vin>; ldo6-supply = <&v3v3>; + vref_ddr-supply = <&vin>; + boost-supply = <&vin>; pwr_sw1-supply = <&bst_out>; pwr_sw2-supply = <&bst_out>; diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index 0f95a6ef8543..1c5a666c54b5 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -143,7 +143,7 @@ trips { cpu_alert0: cpu-alert0 { /* milliCelsius */ - temperature = <850000>; + temperature = <85000>; hysteresis = <2000>; type = "passive"; }; diff --git a/arch/arm/configs/imx_v4_v5_defconfig b/arch/arm/configs/imx_v4_v5_defconfig index aeb1209e0804..bb70acc6b526 100644 --- a/arch/arm/configs/imx_v4_v5_defconfig +++ b/arch/arm/configs/imx_v4_v5_defconfig @@ -93,6 +93,7 @@ CONFIG_SPI=y CONFIG_SPI_IMX=y CONFIG_SPI_SPIDEV=y CONFIG_GPIO_SYSFS=y +CONFIG_GPIO_MXC=y CONFIG_W1=y CONFIG_W1_MASTER_MXC=y CONFIG_W1_SLAVE_THERM=y diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 0fa79bd00219..221f5c340c86 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -217,6 +217,7 @@ CONFIG_GPIO_PCA953X=y CONFIG_GPIO_PCF857X=y CONFIG_GPIO_STMPE=y CONFIG_GPIO_74X164=y +CONFIG_GPIO_MXC=y CONFIG_POWER_RESET=y CONFIG_POWER_RESET_SYSCON=y CONFIG_POWER_RESET_SYSCON_POWEROFF=y diff --git a/arch/arm/configs/multi_v5_defconfig b/arch/arm/configs/multi_v5_defconfig index 70b709a669d2..e00be9faa23b 100644 --- a/arch/arm/configs/multi_v5_defconfig +++ b/arch/arm/configs/multi_v5_defconfig @@ -166,6 +166,7 @@ CONFIG_SPI_IMX=y CONFIG_SPI_ORION=y CONFIG_GPIO_ASPEED=m CONFIG_GPIO_ASPEED_SGPIO=y +CONFIG_GPIO_MXC=y CONFIG_POWER_RESET=y CONFIG_POWER_RESET_GPIO=y CONFIG_POWER_RESET_QNAP=y diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index e731cdf7c88c..a611b0c1e540 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -465,6 +465,7 @@ CONFIG_GPIO_PALMAS=y CONFIG_GPIO_TPS6586X=y CONFIG_GPIO_TPS65910=y CONFIG_GPIO_TWL4030=y +CONFIG_GPIO_MXC=y CONFIG_POWER_AVS=y CONFIG_ROCKCHIP_IODOMAIN=y CONFIG_POWER_RESET_AS3722=y diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S index 2d962fe48821..a3a64bf97250 100644 --- a/arch/arm/mach-mvebu/coherency_ll.S +++ b/arch/arm/mach-mvebu/coherency_ll.S @@ -35,13 +35,8 @@ ENTRY(ll_get_coherency_base) /* * MMU is disabled, use the physical address of the coherency - * base address. However, if the coherency fabric isn't mapped - * (i.e its virtual address is zero), it means coherency is - * not enabled, so we return 0. + * base address, (or 0x0 if the coherency fabric is not mapped) */ - ldr r1, =coherency_base - cmp r1, #0 - beq 2f adr r1, 3f ldr r3, [r1] ldr r1, [r1, r3] diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index d57112a276f5..c23dbf8bebee 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -354,8 +354,8 @@ static void __init free_highpages(void) /* set highmem page free */ for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &range_start, &range_end, NULL) { - unsigned long start = PHYS_PFN(range_start); - unsigned long end = PHYS_PFN(range_end); + unsigned long start = PFN_UP(range_start); + unsigned long end = PFN_DOWN(range_end); /* Ignore complete lowmem entries */ if (end <= max_low) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f858c352f72a..1515f6f153a0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -636,6 +636,26 @@ config ARM64_ERRATUM_1542419 If unsure, say Y. +config ARM64_ERRATUM_1508412 + bool "Cortex-A77: 1508412: workaround deadlock on sequence of NC/Device load and store exclusive or PAR read" + default y + help + This option adds a workaround for Arm Cortex-A77 erratum 1508412. + + Affected Cortex-A77 cores (r0p0, r1p0) could deadlock on a sequence + of a store-exclusive or read of PAR_EL1 and a load with device or + non-cacheable memory attributes. The workaround depends on a firmware + counterpart. + + KVM guests must also have the workaround implemented or they can + deadlock the system. + + Work around the issue by inserting DMB SY barriers around PAR_EL1 + register reads and warning KVM users. The DMB barrier is sufficient + to prevent a speculative PAR_EL1 read. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y @@ -982,7 +1002,7 @@ config NUMA config NODES_SHIFT int "Maximum NUMA Nodes (as a power of 2)" range 1 10 - default "2" + default "4" depends on NEED_MULTIPLE_NODES help Specify the maximum number of NUMA Nodes available on the target diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 6f2494dd6d60..5c4ac1c9f4e0 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -54,6 +54,7 @@ config ARCH_BCM_IPROC config ARCH_BERLIN bool "Marvell Berlin SoC Family" select DW_APB_ICTL + select DW_APB_TIMER_OF select GPIOLIB select PINCTRL help diff --git a/arch/arm64/boot/dts/amlogic/meson-axg-s400.dts b/arch/arm64/boot/dts/amlogic/meson-axg-s400.dts index cb1360ae1211..7740f97c240f 100644 --- a/arch/arm64/boot/dts/amlogic/meson-axg-s400.dts +++ b/arch/arm64/boot/dts/amlogic/meson-axg-s400.dts @@ -584,3 +584,9 @@ pinctrl-0 = <&uart_ao_a_pins>; pinctrl-names = "default"; }; + +&usb { + status = "okay"; + dr_mode = "otg"; + vbus-supply = <&usb_pwr>; +}; diff --git a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi index b9efc8469265..724ee179b316 100644 --- a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi @@ -171,6 +171,46 @@ #size-cells = <2>; ranges; + usb: usb@ffe09080 { + compatible = "amlogic,meson-axg-usb-ctrl"; + reg = <0x0 0xffe09080 0x0 0x20>; + interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>; + #address-cells = <2>; + #size-cells = <2>; + ranges; + + clocks = <&clkc CLKID_USB>, <&clkc CLKID_USB1_DDR_BRIDGE>; + clock-names = "usb_ctrl", "ddr"; + resets = <&reset RESET_USB_OTG>; + + dr_mode = "otg"; + + phys = <&usb2_phy1>; + phy-names = "usb2-phy1"; + + dwc2: usb@ff400000 { + compatible = "amlogic,meson-g12a-usb", "snps,dwc2"; + reg = <0x0 0xff400000 0x0 0x40000>; + interrupts = <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clkc CLKID_USB1>; + clock-names = "otg"; + phys = <&usb2_phy1>; + dr_mode = "peripheral"; + g-rx-fifo-size = <192>; + g-np-tx-fifo-size = <128>; + g-tx-fifo-size = <128 128 16 16 16>; + }; + + dwc3: usb@ff500000 { + compatible = "snps,dwc3"; + reg = <0x0 0xff500000 0x0 0x100000>; + interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>; + dr_mode = "host"; + maximum-speed = "high-speed"; + snps,dis_u2_susphy_quirk; + }; + }; + ethmac: ethernet@ff3f0000 { compatible = "amlogic,meson-axg-dwmac", "snps,dwmac-3.70a", @@ -187,6 +227,8 @@ "timing-adjustment"; rx-fifo-depth = <4096>; tx-fifo-depth = <2048>; + resets = <&reset RESET_ETHERNET>; + reset-names = "stmmaceth"; status = "disabled"; }; @@ -1734,6 +1776,16 @@ clock-names = "core", "clkin0", "clkin1"; resets = <&reset RESET_SD_EMMC_C>; }; + + usb2_phy1: phy@9020 { + compatible = "amlogic,meson-gxl-usb2-phy"; + #phy-cells = <0>; + reg = <0x0 0x9020 0x0 0x20>; + clocks = <&clkc CLKID_USB>; + clock-names = "phy"; + resets = <&reset RESET_USB_OTG>; + reset-names = "phy"; + }; }; sram: sram@fffc0000 { diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi index 1e83ec5b8c91..8514fe6a275a 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi @@ -209,7 +209,7 @@ }; ethmac: ethernet@ff3f0000 { - compatible = "amlogic,meson-axg-dwmac", + compatible = "amlogic,meson-g12a-dwmac", "snps,dwmac-3.70a", "snps,dwmac"; reg = <0x0 0xff3f0000 0x0 0x10000>, @@ -224,6 +224,8 @@ "timing-adjustment"; rx-fifo-depth = <4096>; tx-fifo-depth = <2048>; + resets = <&reset RESET_ETHERNET>; + reset-names = "stmmaceth"; status = "disabled"; mdio0: mdio { @@ -282,6 +284,8 @@ hwrng: rng@218 { compatible = "amlogic,meson-rng"; reg = <0x0 0x218 0x0 0x4>; + clocks = <&clkc CLKID_RNG0>; + clock-names = "core"; }; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2-plus.dts b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2-plus.dts index 5de2815ba99d..ce1198ad34e4 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2-plus.dts +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2-plus.dts @@ -19,7 +19,7 @@ regulator-min-microvolt = <680000>; regulator-max-microvolt = <1040000>; - pwms = <&pwm_AO_cd 1 1500 0>; + pwms = <&pwm_ab 0 1500 0>; }; &vddcpu_b { diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index 0edd137151f8..726b91d3a905 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -13,6 +13,7 @@ #include <dt-bindings/interrupt-controller/irq.h> #include <dt-bindings/interrupt-controller/arm-gic.h> #include <dt-bindings/power/meson-gxbb-power.h> +#include <dt-bindings/reset/amlogic,meson-gxbb-reset.h> #include <dt-bindings/thermal/thermal.h> / { @@ -575,6 +576,8 @@ interrupt-names = "macirq"; rx-fifo-depth = <4096>; tx-fifo-depth = <2048>; + resets = <&reset RESET_ETHERNET>; + reset-names = "stmmaceth"; power-domains = <&pwrc PWRC_GXBB_ETHERNET_MEM_ID>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7-emmc.dts b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7-emmc.dts index 03733fd92732..215d2f702623 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7-emmc.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7-emmc.dts @@ -20,17 +20,23 @@ compatible = "globalscale,espressobin-v7-emmc", "globalscale,espressobin-v7", "globalscale,espressobin", "marvell,armada3720", "marvell,armada3710"; + + aliases { + /* ethernet1 is wan port */ + ethernet1 = &switch0port3; + ethernet3 = &switch0port1; + }; }; &switch0 { ports { - port@1 { + switch0port1: port@1 { reg = <1>; label = "lan1"; phy-handle = <&switch0phy0>; }; - port@3 { + switch0port3: port@3 { reg = <3>; label = "wan"; phy-handle = <&switch0phy2>; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7.dts b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7.dts index 8570c5f47d7d..b6f4af8ebafb 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7.dts @@ -19,17 +19,23 @@ model = "Globalscale Marvell ESPRESSOBin Board V7"; compatible = "globalscale,espressobin-v7", "globalscale,espressobin", "marvell,armada3720", "marvell,armada3710"; + + aliases { + /* ethernet1 is wan port */ + ethernet1 = &switch0port3; + ethernet3 = &switch0port1; + }; }; &switch0 { ports { - port@1 { + switch0port1: port@1 { reg = <1>; label = "lan1"; phy-handle = <&switch0phy0>; }; - port@3 { + switch0port3: port@3 { reg = <3>; label = "wan"; phy-handle = <&switch0phy2>; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dtsi b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dtsi index b97218c72727..0775c16e0ec8 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dtsi @@ -13,6 +13,10 @@ / { aliases { ethernet0 = ð0; + /* for dsa slave device */ + ethernet1 = &switch0port1; + ethernet2 = &switch0port2; + ethernet3 = &switch0port3; serial0 = &uart0; serial1 = &uart1; }; @@ -120,7 +124,7 @@ #address-cells = <1>; #size-cells = <0>; - port@0 { + switch0port0: port@0 { reg = <0>; label = "cpu"; ethernet = <ð0>; @@ -131,19 +135,19 @@ }; }; - port@1 { + switch0port1: port@1 { reg = <1>; label = "wan"; phy-handle = <&switch0phy0>; }; - port@2 { + switch0port2: port@2 { reg = <2>; label = "lan0"; phy-handle = <&switch0phy1>; }; - port@3 { + switch0port3: port@3 { reg = <3>; label = "lan1"; phy-handle = <&switch0phy2>; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 17a2df6a263e..5cfe3cf6f2ac 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -500,6 +500,7 @@ CONFIG_GPIO_ALTERA=m CONFIG_GPIO_DWAPB=y CONFIG_GPIO_MB86S7X=y CONFIG_GPIO_MPC8XXX=y +CONFIG_GPIO_MXC=y CONFIG_GPIO_PL061=y CONFIG_GPIO_RCAR=y CONFIG_GPIO_UNIPHIER=y diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h index e3d47b52161d..ec7720dbe2c8 100644 --- a/arch/arm64/include/asm/brk-imm.h +++ b/arch/arm64/include/asm/brk-imm.h @@ -10,6 +10,7 @@ * #imm16 values used for BRK instruction generation * 0x004: for installing kprobes * 0x005: for installing uprobes + * 0x006: for kprobe software single-step * Allowed values for kgdb are 0x400 - 0x7ff * 0x100: for triggering a fault on purpose (reserved) * 0x400: for dynamic BRK instruction @@ -19,6 +20,7 @@ */ #define KPROBES_BRK_IMM 0x004 #define UPROBES_BRK_IMM 0x005 +#define KPROBES_BRK_SS_IMM 0x006 #define FAULT_BRK_IMM 0x100 #define KGDB_DYN_DBG_BRK_IMM 0x400 #define KGDB_COMPILED_DBG_BRK_IMM 0x401 diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 0ac3e06a2118..63d43b5f82f6 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -24,6 +24,7 @@ #define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) #define ICACHE_POLICY_VPIPT 0 +#define ICACHE_POLICY_RESERVED 1 #define ICACHE_POLICY_VIPT 2 #define ICACHE_POLICY_PIPT 3 diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 42868dbd29fd..e7d98997c09c 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -65,7 +65,8 @@ #define ARM64_HAS_ARMv8_4_TTL 55 #define ARM64_HAS_TLB_RANGE 56 #define ARM64_MTE 57 +#define ARM64_WORKAROUND_1508412 58 -#define ARM64_NCAPS 58 +#define ARM64_NCAPS 59 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index f7e7144af174..97244d4feca9 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -375,6 +375,23 @@ cpucap_multi_entry_cap_matches(const struct arm64_cpu_capabilities *entry, return false; } +static __always_inline bool is_vhe_hyp_code(void) +{ + /* Only defined for code run in VHE hyp context */ + return __is_defined(__KVM_VHE_HYPERVISOR__); +} + +static __always_inline bool is_nvhe_hyp_code(void) +{ + /* Only defined for code run in NVHE hyp context */ + return __is_defined(__KVM_NVHE_HYPERVISOR__); +} + +static __always_inline bool is_hyp_code(void) +{ + return is_vhe_hyp_code() || is_nvhe_hyp_code(); +} + extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; extern struct static_key_false arm64_const_caps_ready; @@ -428,35 +445,40 @@ static __always_inline bool __cpus_have_const_cap(int num) } /* - * Test for a capability, possibly with a runtime check. + * Test for a capability without a runtime check. * - * Before capabilities are finalized, this behaves as cpus_have_cap(). + * Before capabilities are finalized, this will BUG(). * After capabilities are finalized, this is patched to avoid a runtime check. * * @num must be a compile-time constant. */ -static __always_inline bool cpus_have_const_cap(int num) +static __always_inline bool cpus_have_final_cap(int num) { if (system_capabilities_finalized()) return __cpus_have_const_cap(num); else - return cpus_have_cap(num); + BUG(); } /* - * Test for a capability without a runtime check. + * Test for a capability, possibly with a runtime check for non-hyp code. * - * Before capabilities are finalized, this will BUG(). + * For hyp code, this behaves the same as cpus_have_final_cap(). + * + * For non-hyp code: + * Before capabilities are finalized, this behaves as cpus_have_cap(). * After capabilities are finalized, this is patched to avoid a runtime check. * * @num must be a compile-time constant. */ -static __always_inline bool cpus_have_final_cap(int num) +static __always_inline bool cpus_have_const_cap(int num) { - if (system_capabilities_finalized()) + if (is_hyp_code()) + return cpus_have_final_cap(num); + else if (system_capabilities_finalized()) return __cpus_have_const_cap(num); else - BUG(); + return cpus_have_cap(num); } static inline void cpus_set_cap(unsigned int num) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 7219cddeba66..9e2e9a63c7b6 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -71,6 +71,7 @@ #define ARM_CPU_PART_CORTEX_A55 0xD05 #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C +#define ARM_CPU_PART_CORTEX_A77 0xD0D #define APM_CPU_PART_POTENZA 0x000 @@ -105,6 +106,7 @@ #define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) +#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 0b298f48f5bf..657c921fd784 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -53,6 +53,7 @@ /* kprobes BRK opcodes with ESR encoding */ #define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (KPROBES_BRK_IMM << 5)) +#define BRK64_OPCODE_KPROBES_SS (AARCH64_BREAK_MON | (KPROBES_BRK_SS_IMM << 5)) /* uprobes BRK opcodes with ESR encoding */ #define BRK64_OPCODE_UPROBES (AARCH64_BREAK_MON | (UPROBES_BRK_IMM << 5)) diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h index 97e511d645a2..8699ce30f587 100644 --- a/arch/arm64/include/asm/kprobes.h +++ b/arch/arm64/include/asm/kprobes.h @@ -16,7 +16,7 @@ #include <linux/percpu.h> #define __ARCH_WANT_KPROBES_INSN_SLOT -#define MAX_INSN_SIZE 1 +#define MAX_INSN_SIZE 2 #define flush_insn_slot(p) do { } while (0) #define kretprobe_blacklist_size 0 diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0aecbab6a7fb..781d029b8aa8 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -239,6 +239,7 @@ enum vcpu_sysreg { #define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) #define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) #define cp14_DBGDCCINT (MDCCINT_EL1 * 2) +#define cp14_DBGVCR (DBGVCR32_EL2 * 2) #define NR_COPRO_REGS (NR_SYS_REGS * 2) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d52c1b3ce589..174817ba119c 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1007,6 +1007,7 @@ #include <linux/build_bug.h> #include <linux/types.h> +#include <asm/alternative.h> #define __DEFINE_MRS_MSR_S_REGNUM \ " .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ @@ -1095,6 +1096,14 @@ write_sysreg_s(__scs_new, sysreg); \ } while (0) +#define read_sysreg_par() ({ \ + u64 par; \ + asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \ + par = read_sysreg(par_el1); \ + asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \ + par; \ +}) + #endif #endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 09977acc007d..6069be50baf9 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -86,13 +86,12 @@ static inline bool is_kernel_in_hyp_mode(void) static __always_inline bool has_vhe(void) { /* - * The following macros are defined for code specic to VHE/nVHE. - * If has_vhe() is inlined into those compilation units, it can - * be determined statically. Otherwise fall back to caps. + * Code only run in VHE/NVHE hyp context can assume VHE is present or + * absent. Otherwise fall back to caps. */ - if (__is_defined(__KVM_VHE_HYPERVISOR__)) + if (is_vhe_hyp_code()) return true; - else if (__is_defined(__KVM_NVHE_HYPERVISOR__)) + else if (is_nvhe_hyp_code()) return false; else return cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 24d75af344b1..61314fd70f13 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -523,6 +523,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .cpu_enable = cpu_enable_trap_ctr_access, }, #endif +#ifdef CONFIG_ARM64_ERRATUM_1508412 + { + /* we depend on the firmware portion for correctness */ + .desc = "ARM erratum 1508412 (kernel portion)", + .capability = ARM64_WORKAROUND_1508412, + ERRATA_MIDR_RANGE(MIDR_CORTEX_A77, + 0, 0, + 1, 0), + }, +#endif { } }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 6a7bb3729d60..77605aec25fe 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -34,10 +34,10 @@ DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); static struct cpuinfo_arm64 boot_cpu_data; static const char *icache_policy_str[] = { - [0 ... ICACHE_POLICY_PIPT] = "RESERVED/UNKNOWN", + [ICACHE_POLICY_VPIPT] = "VPIPT", + [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN", [ICACHE_POLICY_VIPT] = "VIPT", [ICACHE_POLICY_PIPT] = "PIPT", - [ICACHE_POLICY_VPIPT] = "VPIPT", }; unsigned long __icache_flags; @@ -334,10 +334,11 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) case ICACHE_POLICY_VPIPT: set_bit(ICACHEF_VPIPT, &__icache_flags); break; - default: + case ICACHE_POLICY_RESERVED: case ICACHE_POLICY_VIPT: /* Assume aliasing */ set_bit(ICACHEF_ALIASING, &__icache_flags); + break; } pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S index df67c0f2a077..a71844fb923e 100644 --- a/arch/arm64/kernel/efi-header.S +++ b/arch/arm64/kernel/efi-header.S @@ -147,6 +147,6 @@ efi_debug_entry: * correctly at this alignment, we must ensure that .text is * placed at a 4k boundary in the Image to begin with. */ - .align 12 + .balign SEGMENT_ALIGN efi_header_end: .endm diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index f30007dff35f..b295fb912b12 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -365,6 +365,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 br x30 #endif .else + /* Ensure any device/NC reads complete */ + alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412 + eret .endif sb diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 61684a500914..c615b285ff5b 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -87,7 +87,6 @@ KVM_NVHE_ALIAS(__icache_flags); /* Kernel symbols needed for cpus_have_final/const_caps checks. */ KVM_NVHE_ALIAS(arm64_const_caps_ready); KVM_NVHE_ALIAS(cpu_hwcap_keys); -KVM_NVHE_ALIAS(cpu_hwcaps); /* Static keys which are set if a vGIC trap should be handled in hyp. */ KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index af9987c154ca..66adee8b5fc8 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -43,7 +43,7 @@ static void *image_load(struct kimage *image, u64 flags, value; bool be_image, be_kernel; struct kexec_buf kbuf; - unsigned long text_offset; + unsigned long text_offset, kernel_segment_number; struct kexec_segment *kernel_segment; int ret; @@ -88,11 +88,37 @@ static void *image_load(struct kimage *image, /* Adjust kernel segment with TEXT_OFFSET */ kbuf.memsz += text_offset; - ret = kexec_add_buffer(&kbuf); - if (ret) + kernel_segment_number = image->nr_segments; + + /* + * The location of the kernel segment may make it impossible to satisfy + * the other segment requirements, so we try repeatedly to find a + * location that will work. + */ + while ((ret = kexec_add_buffer(&kbuf)) == 0) { + /* Try to load additional data */ + kernel_segment = &image->segment[kernel_segment_number]; + ret = load_other_segments(image, kernel_segment->mem, + kernel_segment->memsz, initrd, + initrd_len, cmdline); + if (!ret) + break; + + /* + * We couldn't find space for the other segments; erase the + * kernel segment and try the next available hole. + */ + image->nr_segments -= 1; + kbuf.buf_min = kernel_segment->mem + kernel_segment->memsz; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + } + + if (ret) { + pr_err("Could not find any suitable kernel location!"); return ERR_PTR(ret); + } - kernel_segment = &image->segment[image->nr_segments - 1]; + kernel_segment = &image->segment[kernel_segment_number]; kernel_segment->mem += text_offset; kernel_segment->memsz -= text_offset; image->start = kernel_segment->mem; @@ -101,12 +127,7 @@ static void *image_load(struct kimage *image, kernel_segment->mem, kbuf.bufsz, kernel_segment->memsz); - /* Load additional data */ - ret = load_other_segments(image, - kernel_segment->mem, kernel_segment->memsz, - initrd, initrd_len, cmdline); - - return ERR_PTR(ret); + return 0; } #ifdef CONFIG_KEXEC_IMAGE_VERIFY_SIG diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 5b0e67b93cdc..03210f644790 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -240,6 +240,11 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) return ret; } +/* + * Tries to add the initrd and DTB to the image. If it is not possible to find + * valid locations, this function will undo changes to the image and return non + * zero. + */ int load_other_segments(struct kimage *image, unsigned long kernel_load_addr, unsigned long kernel_size, @@ -248,7 +253,8 @@ int load_other_segments(struct kimage *image, { struct kexec_buf kbuf; void *headers, *dtb = NULL; - unsigned long headers_sz, initrd_load_addr = 0, dtb_len; + unsigned long headers_sz, initrd_load_addr = 0, dtb_len, + orig_segments = image->nr_segments; int ret = 0; kbuf.image = image; @@ -334,6 +340,7 @@ int load_other_segments(struct kimage *image, return 0; out_err: + image->nr_segments = orig_segments; vfree(dtb); return ret; } diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index deba738142ed..f11a1a1f7026 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -36,25 +36,16 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); static void __kprobes post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *); -static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode) -{ - void *addrs[1]; - u32 insns[1]; - - addrs[0] = addr; - insns[0] = opcode; - - return aarch64_insn_patch_text(addrs, insns, 1); -} - static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { + kprobe_opcode_t *addr = p->ainsn.api.insn; + void *addrs[] = {addr, addr + 1}; + u32 insns[] = {p->opcode, BRK64_OPCODE_KPROBES_SS}; + /* prepare insn slot */ - patch_text(p->ainsn.api.insn, p->opcode); + aarch64_insn_patch_text(addrs, insns, 2); - flush_icache_range((uintptr_t) (p->ainsn.api.insn), - (uintptr_t) (p->ainsn.api.insn) + - MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + flush_icache_range((uintptr_t)addr, (uintptr_t)(addr + MAX_INSN_SIZE)); /* * Needs restoring of return address after stepping xol. @@ -128,13 +119,18 @@ void *alloc_insn_page(void) /* arm kprobe: install breakpoint in text */ void __kprobes arch_arm_kprobe(struct kprobe *p) { - patch_text(p->addr, BRK64_OPCODE_KPROBES); + void *addr = p->addr; + u32 insn = BRK64_OPCODE_KPROBES; + + aarch64_insn_patch_text(&addr, &insn, 1); } /* disarm kprobe: remove breakpoint from text */ void __kprobes arch_disarm_kprobe(struct kprobe *p) { - patch_text(p->addr, p->opcode); + void *addr = p->addr; + + aarch64_insn_patch_text(&addr, &p->opcode, 1); } void __kprobes arch_remove_kprobe(struct kprobe *p) @@ -163,20 +159,15 @@ static void __kprobes set_current_kprobe(struct kprobe *p) } /* - * Interrupts need to be disabled before single-step mode is set, and not - * reenabled until after single-step mode ends. - * Without disabling interrupt on local CPU, there is a chance of - * interrupt occurrence in the period of exception return and start of - * out-of-line single-step, that result in wrongly single stepping - * into the interrupt handler. + * Mask all of DAIF while executing the instruction out-of-line, to keep things + * simple and avoid nesting exceptions. Interrupts do have to be disabled since + * the kprobe state is per-CPU and doesn't get migrated. */ static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb, struct pt_regs *regs) { kcb->saved_irqflag = regs->pstate & DAIF_MASK; - regs->pstate |= PSR_I_BIT; - /* Unmask PSTATE.D for enabling software step exceptions. */ - regs->pstate &= ~PSR_D_BIT; + regs->pstate |= DAIF_MASK; } static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb, @@ -219,10 +210,7 @@ static void __kprobes setup_singlestep(struct kprobe *p, slot = (unsigned long)p->ainsn.api.insn; set_ss_context(kcb, slot); /* mark pending ss */ - - /* IRQs and single stepping do not mix well. */ kprobes_save_local_irqflag(kcb, regs); - kernel_enable_single_step(regs); instruction_pointer_set(regs, slot); } else { /* insn simulation */ @@ -273,12 +261,8 @@ post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs) } /* call post handler */ kcb->kprobe_status = KPROBE_HIT_SSDONE; - if (cur->post_handler) { - /* post_handler can hit breakpoint and single step - * again, so we enable D-flag for recursive exception. - */ + if (cur->post_handler) cur->post_handler(cur, regs, 0); - } reset_current_kprobe(); } @@ -302,8 +286,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) if (!instruction_pointer(regs)) BUG(); - kernel_disable_single_step(); - if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); else @@ -365,10 +347,6 @@ static void __kprobes kprobe_handler(struct pt_regs *regs) * pre-handler and it returned non-zero, it will * modify the execution path and no need to single * stepping. Let's just reset current kprobe and exit. - * - * pre_handler can hit a breakpoint and can step thru - * before return, keep PSTATE D-flag enabled until - * pre_handler return back. */ if (!p->pre_handler || !p->pre_handler(p, regs)) { setup_singlestep(p, regs, kcb, 0); @@ -399,7 +377,7 @@ kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr) } static int __kprobes -kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) +kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned int esr) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); int retval; @@ -409,16 +387,15 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) if (retval == DBG_HOOK_HANDLED) { kprobes_restore_local_irqflag(kcb, regs); - kernel_disable_single_step(); - post_kprobe_handler(kcb, regs); } return retval; } -static struct step_hook kprobes_step_hook = { - .fn = kprobe_single_step_handler, +static struct break_hook kprobes_break_ss_hook = { + .imm = KPROBES_BRK_SS_IMM, + .fn = kprobe_breakpoint_ss_handler, }; static int __kprobes @@ -486,7 +463,7 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p) int __init arch_init_kprobes(void) { register_kernel_break_hook(&kprobes_break_hook); - register_kernel_step_hook(&kprobes_step_hook); + register_kernel_break_hook(&kprobes_break_ss_hook); return 0; } diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 25f3c80b5ffe..c18eb7d41274 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -135,8 +135,6 @@ static enum mitigation_state spectre_v2_get_cpu_hw_mitigation_state(void) return SPECTRE_VULNERABLE; } -#define SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED (1) - static enum mitigation_state spectre_v2_get_cpu_fw_mitigation_state(void) { int ret; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 82e75fc2c903..09c96f57818c 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -222,6 +222,7 @@ asmlinkage notrace void secondary_start_kernel(void) if (system_uses_irq_prio_masking()) init_gic_priority_masking(); + rcu_cpu_starting(cpu); preempt_disable(); trace_hardirqs_off(); diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 7f96a1a9f68c..79280c53b9a6 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -22,16 +22,21 @@ endif CC_COMPAT ?= $(CC) CC_COMPAT += $(CC_COMPAT_CLANG_FLAGS) + +ifneq ($(LLVM),) +LD_COMPAT ?= $(LD) +else +LD_COMPAT ?= $(CROSS_COMPILE_COMPAT)ld +endif else CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc +LD_COMPAT ?= $(CROSS_COMPILE_COMPAT)ld endif cc32-option = $(call try-run,\ $(CC_COMPAT) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) cc32-disable-warning = $(call try-run,\ $(CC_COMPAT) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) -cc32-ldoption = $(call try-run,\ - $(CC_COMPAT) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2)) cc32-as-instr = $(call try-run,\ printf "%b\n" "$(1)" | $(CC_COMPAT) $(VDSO_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3)) @@ -122,14 +127,10 @@ dmbinstr := $(call cc32-as-instr,dmb ishld,-DCONFIG_AS_DMB_ISHLD=1) VDSO_CFLAGS += $(dmbinstr) VDSO_AFLAGS += $(dmbinstr) -VDSO_LDFLAGS := $(VDSO_CPPFLAGS) # From arm vDSO Makefile -VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 -VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 -VDSO_LDFLAGS += -nostdlib -shared -mfloat-abi=soft -VDSO_LDFLAGS += -Wl,--hash-style=sysv -VDSO_LDFLAGS += -Wl,--build-id=sha1 -VDSO_LDFLAGS += $(call cc32-ldoption,-fuse-ld=bfd) +VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1 +VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096 +VDSO_LDFLAGS += -nostdlib -shared --hash-style=sysv --build-id=sha1 # Borrow vdsomunge.c from the arm vDSO @@ -189,8 +190,8 @@ quiet_cmd_vdsold_and_vdso_check = LD32 $@ cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check) quiet_cmd_vdsold = LD32 $@ - cmd_vdsold = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \ - -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@ + cmd_vdsold = $(LD_COMPAT) $(VDSO_LDFLAGS) \ + -T $(filter %.lds,$^) $(filter %.o,$^) -o $@ quiet_cmd_vdsocc = CC32 $@ cmd_vdsocc = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $< quiet_cmd_vdsocc_gettimeofday = CC32 $@ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 6d78c041fdf6..1bda604f4c70 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -278,7 +278,7 @@ SECTIONS * explicitly check instead of blindly discarding. */ .plt : { - *(.plt) *(.plt.*) *(.iplt) *(.igot) + *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt) } ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!") diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index f56122eedffc..5750ec34960e 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -808,6 +808,25 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) preempt_enable(); + /* + * The ARMv8 architecture doesn't give the hypervisor + * a mechanism to prevent a guest from dropping to AArch32 EL0 + * if implemented by the CPU. If we spot the guest in such + * state and that we decided it wasn't supposed to do so (like + * with the asymmetric AArch32 case), return to userspace with + * a fatal error. + */ + if (!system_supports_32bit_el0() && vcpu_mode_is_32bit(vcpu)) { + /* + * As we have caught the guest red-handed, decide that + * it isn't fit for purpose anymore by making the vcpu + * invalid. The VMM can try and fix it by issuing a + * KVM_ARM_VCPU_INIT if it really wants to. + */ + vcpu->arch.target = -1; + ret = ARM_EXCEPTION_IL; + } + ret = handle_exit(vcpu, ret); } @@ -1719,7 +1738,8 @@ int kvm_arch_init(void *opaque) return -ENODEV; } - if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)) + if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) || + cpus_have_final_cap(ARM64_WORKAROUND_1508412)) kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \ "Only trusted guests should be used on this system.\n"); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 313a8fa3c721..1f875a8f20c4 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -140,9 +140,9 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) * We do need to save/restore PAR_EL1 though, as we haven't * saved the guest context yet, and we may return early... */ - par = read_sysreg(par_el1); + par = read_sysreg_par(); if (!__kvm_at("s1e1r", far)) - tmp = read_sysreg(par_el1); + tmp = read_sysreg_par(); else tmp = SYS_PAR_EL1_F; /* back to the guest */ write_sysreg(par, par_el1); @@ -421,7 +421,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && handle_tx2_tvm(vcpu)) - return true; + goto guest; /* * We trap the first access to the FP/SIMD to save the host context @@ -431,13 +431,13 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) * Similarly for trapped SVE accesses. */ if (__hyp_handle_fpsimd(vcpu)) - return true; + goto guest; if (__hyp_handle_ptrauth(vcpu)) - return true; + goto guest; if (!__populate_fault_info(vcpu)) - return true; + goto guest; if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { bool valid; @@ -452,7 +452,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) int ret = __vgic_v2_perform_cpuif_access(vcpu); if (ret == 1) - return true; + goto guest; /* Promote an illegal access to an SError.*/ if (ret == -1) @@ -468,12 +468,17 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) int ret = __vgic_v3_perform_cpuif_access(vcpu); if (ret == 1) - return true; + goto guest; } exit: /* Return to the host kernel and handle the exit */ return false; + +guest: + /* Re-enter the guest */ + asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); + return true; } static inline void __kvm_unexpected_el2_exception(void) diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 7a986030145f..cce43bfe158f 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -43,7 +43,7 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR); ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR); ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL); - ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg(par_el1); + ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par(); ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index ff9a0f547b9f..ed27f06a31ba 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -17,8 +17,6 @@ SYM_FUNC_START(__host_exit) get_host_ctxt x0, x1 - ALTERNATIVE(nop, SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) - /* Store the host regs x2 and x3 */ stp x2, x3, [x0, #CPU_XREG_OFFSET(2)] diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 47224dc62c51..b11a9d7db677 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -57,16 +57,25 @@ __do_hyp_init: cmp x0, #HVC_STUB_HCALL_NR b.lo __kvm_handle_stub_hvc - /* Set tpidr_el2 for use by HYP to free a register */ - msr tpidr_el2, x2 - - mov x2, #KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) - cmp x0, x2 - b.eq 1f + // We only actively check bits [24:31], and everything + // else has to be zero, which we check at build time. +#if (KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) & 0xFFFFFFFF00FFFFFF) +#error Unexpected __KVM_HOST_SMCCC_FUNC___kvm_hyp_init value +#endif + + ror x0, x0, #24 + eor x0, x0, #((KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) >> 24) & 0xF) + ror x0, x0, #4 + eor x0, x0, #((KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) >> 28) & 0xF) + cbz x0, 1f mov x0, #SMCCC_RET_NOT_SUPPORTED eret -1: phys_to_ttbr x0, x1 +1: + /* Set tpidr_el2 for use by HYP to free a register */ + msr tpidr_el2, x2 + + phys_to_ttbr x0, x1 alternative_if ARM64_HAS_CNP orr x0, x0, #TTBR_CNP_BIT alternative_else_nop_endif diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index a457a0306e03..8ae8160bc93a 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -250,7 +250,7 @@ void __noreturn hyp_panic(void) { u64 spsr = read_sysreg_el2(SYS_SPSR); u64 elr = read_sysreg_el2(SYS_ELR); - u64 par = read_sysreg(par_el1); + u64 par = read_sysreg_par(); bool restore_host = true; struct kvm_cpu_context *host_ctxt; struct kvm_vcpu *vcpu; diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 39ca71ab8866..fbde89a2c6e8 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -128,7 +128,6 @@ void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - mmu = kern_hyp_va(mmu); __tlb_switch_to_guest(mmu, &cxt); __tlbi(vmalle1); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 0cdf6e461cbd..0271b4a3b9fe 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -635,7 +635,7 @@ static void stage2_flush_dcache(void *addr, u64 size) static bool stage2_pte_cacheable(kvm_pte_t pte) { - u64 memattr = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR, pte); + u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; return memattr == PAGE_S2_MEMATTR(NORMAL); } @@ -846,7 +846,7 @@ int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm) u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; - pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL | __GFP_ZERO); + pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!pgt->pgd) return -ENOMEM; diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index fe69de16dadc..62546e20b251 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -215,7 +215,7 @@ void __noreturn hyp_panic(void) { u64 spsr = read_sysreg_el2(SYS_SPSR); u64 elr = read_sysreg_el2(SYS_ELR); - u64 par = read_sysreg(par_el1); + u64 par = read_sysreg_par(); __hyp_call_panic(spsr, elr, par); unreachable(); diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 9824025ccc5c..25ea4ecb6449 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -31,7 +31,7 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) val = SMCCC_RET_SUCCESS; break; case SPECTRE_UNAFFECTED: - val = SMCCC_RET_NOT_REQUIRED; + val = SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED; break; } break; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 19aacc7d64de..57972bdb213a 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -787,14 +787,26 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vma_shift = PAGE_SHIFT; } - if (vma_shift == PUD_SHIFT && - !fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE)) - vma_shift = PMD_SHIFT; - - if (vma_shift == PMD_SHIFT && - !fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) { - force_pte = true; + switch (vma_shift) { + case PUD_SHIFT: + if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE)) + break; + fallthrough; + case CONT_PMD_SHIFT: + vma_shift = PMD_SHIFT; + fallthrough; + case PMD_SHIFT: + if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) + break; + fallthrough; + case CONT_PTE_SHIFT: vma_shift = PAGE_SHIFT; + force_pte = true; + fallthrough; + case PAGE_SHIFT: + break; + default: + WARN_ONCE(1, "Unknown vma_shift %d", vma_shift); } vma_pagesize = 1UL << vma_shift; @@ -839,6 +851,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (kvm_is_device_pfn(pfn)) { device = true; + force_pte = true; } else if (logging_active && !write_fault) { /* * Only actually map the page as writable if this was a write diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d9117bc56237..fb12d3ef423a 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -95,7 +95,7 @@ static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; - case PAR_EL1: *val = read_sysreg_s(SYS_PAR_EL1); break; + case PAR_EL1: *val = read_sysreg_par(); break; case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; @@ -1897,9 +1897,9 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(1), /* DBGDCCINT */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32, NULL, cp14_DBGDCCINT }, /* DBGDSCRext */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32, NULL, cp14_DBGDSCRext }, DBG_BCR_BVR_WCR_WVR(2), /* DBGDTR[RT]Xint */ { Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi }, @@ -1914,7 +1914,7 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(6), /* DBGVCR */ - { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32, NULL, cp14_DBGVCR }, DBG_BCR_BVR_WCR_WVR(7), DBG_BCR_BVR_WCR_WVR(8), DBG_BCR_BVR_WCR_WVR(9), diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index e0bf83d556f2..dc8d2a216a6e 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -56,9 +56,8 @@ stp \reg1, \reg2, [\ptr], \val .endm - .weak memcpy SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_PI(memcpy) +SYM_FUNC_START_WEAK_PI(memcpy) #include "copy_template.S" ret SYM_FUNC_END_PI(memcpy) diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S index 02cda2e33bde..1035dce4bdaf 100644 --- a/arch/arm64/lib/memmove.S +++ b/arch/arm64/lib/memmove.S @@ -45,9 +45,8 @@ C_h .req x12 D_l .req x13 D_h .req x14 - .weak memmove SYM_FUNC_START_ALIAS(__memmove) -SYM_FUNC_START_PI(memmove) +SYM_FUNC_START_WEAK_PI(memmove) cmp dstin, src b.lo __memcpy add tmp1, src, count diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index 77c3c7ba0084..a9c1c9a01ea9 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -42,9 +42,8 @@ dst .req x8 tmp3w .req w9 tmp3 .req x9 - .weak memset SYM_FUNC_START_ALIAS(__memset) -SYM_FUNC_START_PI(memset) +SYM_FUNC_START_WEAK_PI(memset) mov dst, dstin /* Preserve return value. */ and A_lw, val, #255 orr A_lw, A_lw, A_lw, lsl #8 diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 94c99c1c19e3..1ee94002801f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -262,7 +262,7 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr, local_irq_save(flags); asm volatile("at s1e1r, %0" :: "r" (addr)); isb(); - par = read_sysreg(par_el1); + par = read_sysreg_par(); local_irq_restore(flags); /* diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 0784bf3caf43..a4d3c578fbd8 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -93,9 +93,10 @@ CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y CONFIG_CMA_DEBUG=y CONFIG_CMA_DEBUGFS=y +CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y -CONFIG_ZSMALLOC=m +CONFIG_ZSMALLOC=y CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y @@ -378,7 +379,6 @@ CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m -# CONFIG_NET_DROP_MONITOR is not set CONFIG_PCI=y # CONFIG_PCIEASPM is not set CONFIG_PCI_DEBUG=y @@ -386,7 +386,7 @@ CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_DEVTMPFS=y CONFIG_CONNECTOR=y -CONFIG_ZRAM=m +CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_DRBD=m @@ -689,6 +689,7 @@ CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_SM2=m CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_CHACHA20POLY1305=m @@ -709,7 +710,6 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SM3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -753,6 +753,7 @@ CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y +CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_CORDIC=m CONFIG_CRC32_SELFTEST=y CONFIG_CRC4=m @@ -829,6 +830,7 @@ CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m CONFIG_FAULT_INJECTION=y CONFIG_FAILSLAB=y CONFIG_FAIL_PAGE_ALLOC=y +CONFIG_FAULT_INJECTION_USERCOPY=y CONFIG_FAIL_MAKE_REQUEST=y CONFIG_FAIL_IO_TIMEOUT=y CONFIG_FAIL_FUTEX=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 905bc8c4cfaf..17d5df2c1eff 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -87,9 +87,10 @@ CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y +CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y -CONFIG_ZSMALLOC=m +CONFIG_ZSMALLOC=y CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y @@ -371,7 +372,6 @@ CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m -# CONFIG_NET_DROP_MONITOR is not set CONFIG_PCI=y # CONFIG_PCIEASPM is not set CONFIG_HOTPLUG_PCI=y @@ -379,7 +379,7 @@ CONFIG_HOTPLUG_PCI_S390=y CONFIG_UEVENT_HELPER=y CONFIG_DEVTMPFS=y CONFIG_CONNECTOR=y -CONFIG_ZRAM=m +CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_DRBD=m @@ -680,6 +680,7 @@ CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_SM2=m CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_CHACHA20POLY1305=m @@ -701,7 +702,6 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SM3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -745,6 +745,7 @@ CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y +CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m CONFIG_CRC4=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 8f67c55625f9..a302630341ef 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -17,11 +17,11 @@ CONFIG_HZ_100=y # CONFIG_CHSC_SCH is not set # CONFIG_SCM_BUS is not set CONFIG_CRASH_DUMP=y -# CONFIG_SECCOMP is not set # CONFIG_PFAULT is not set # CONFIG_S390_HYPFS_FS is not set # CONFIG_VIRTUALIZATION is not set # CONFIG_S390_GUEST is not set +# CONFIG_SECCOMP is not set CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 6b8d8c69b1a1..b5dbae78969b 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -692,16 +692,6 @@ static inline int pud_large(pud_t pud) return !!(pud_val(pud) & _REGION3_ENTRY_LARGE); } -static inline unsigned long pud_pfn(pud_t pud) -{ - unsigned long origin_mask; - - origin_mask = _REGION_ENTRY_ORIGIN; - if (pud_large(pud)) - origin_mask = _REGION3_ENTRY_ORIGIN_LARGE; - return (pud_val(pud) & origin_mask) >> PAGE_SHIFT; -} - #define pmd_leaf pmd_large static inline int pmd_large(pmd_t pmd) { @@ -747,16 +737,6 @@ static inline int pmd_none(pmd_t pmd) return pmd_val(pmd) == _SEGMENT_ENTRY_EMPTY; } -static inline unsigned long pmd_pfn(pmd_t pmd) -{ - unsigned long origin_mask; - - origin_mask = _SEGMENT_ENTRY_ORIGIN; - if (pmd_large(pmd)) - origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE; - return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT; -} - #define pmd_write pmd_write static inline int pmd_write(pmd_t pmd) { @@ -1238,11 +1218,39 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) -#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) -#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN) #define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN) #define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) +static inline unsigned long pmd_deref(pmd_t pmd) +{ + unsigned long origin_mask; + + origin_mask = _SEGMENT_ENTRY_ORIGIN; + if (pmd_large(pmd)) + origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE; + return pmd_val(pmd) & origin_mask; +} + +static inline unsigned long pmd_pfn(pmd_t pmd) +{ + return pmd_deref(pmd) >> PAGE_SHIFT; +} + +static inline unsigned long pud_deref(pud_t pud) +{ + unsigned long origin_mask; + + origin_mask = _REGION_ENTRY_ORIGIN; + if (pud_large(pud)) + origin_mask = _REGION3_ENTRY_ORIGIN_LARGE; + return pud_val(pud) & origin_mask; +} + +static inline unsigned long pud_pfn(pud_t pud) +{ + return pud_deref(pud) >> PAGE_SHIFT; +} + /* * The pgd_offset function *always* adds the index for the top-level * region/segment table. This is done to get a sequence like the diff --git a/arch/s390/include/asm/vdso/vdso.h b/arch/s390/include/asm/vdso/vdso.h deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/arch/s390/include/asm/vdso/vdso.h +++ /dev/null diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index ece58f2217cb..2012c1cf0853 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -61,14 +61,6 @@ int main(void) BLANK(); OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val); BLANK(); - /* constants used by the vdso */ - DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME); - DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC); - DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); - DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); - DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID); - DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC); - BLANK(); /* idle data offsets */ OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter); OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index ebfe86d097f0..390d97daa2b3 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -855,13 +855,14 @@ void __init smp_detect_cpus(void) static void smp_init_secondary(void) { - int cpu = smp_processor_id(); + int cpu = raw_smp_processor_id(); S390_lowcore.last_update_clock = get_tod_clock(); restore_access_regs(S390_lowcore.access_regs_save_area); set_cpu_flag(CIF_ASCE_PRIMARY); set_cpu_flag(CIF_ASCE_SECONDARY); cpu_init(); + rcu_cpu_starting(cpu); preempt_disable(); init_cpu_timer(); vtime_init(); diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index d33f21545dfd..9a6bae503fe6 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -101,6 +101,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) if (ret) break; + /* the PCI function will be scanned once function 0 appears */ + if (!zdev->zbus->bus) + break; + pdev = pci_scan_single_device(zdev->zbus->bus, zdev->devfn); if (!pdev) break; diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index a5e5db6ada3c..39b2eded7bc2 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -164,6 +164,7 @@ void initialize_identity_maps(void *rmode) add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE); /* Load the new page-table. */ + sev_verify_cbit(top_level_pgt); write_cr3(top_level_pgt); } diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index dd07e7b41b11..aa561795efd1 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -68,6 +68,9 @@ SYM_FUNC_START(get_sev_encryption_bit) SYM_FUNC_END(get_sev_encryption_bit) .code64 + +#include "../../kernel/sev_verify_cbit.S" + SYM_FUNC_START(set_sev_encryption_mask) #ifdef CONFIG_AMD_MEM_ENCRYPT push %rbp @@ -81,6 +84,19 @@ SYM_FUNC_START(set_sev_encryption_mask) bts %rax, sme_me_mask(%rip) /* Create the encryption mask */ + /* + * Read MSR_AMD64_SEV again and store it to sev_status. Can't do this in + * get_sev_encryption_bit() because this function is 32-bit code and + * shared between 64-bit and 32-bit boot path. + */ + movl $MSR_AMD64_SEV, %ecx /* Read the SEV MSR */ + rdmsr + + /* Store MSR value in sev_status */ + shlq $32, %rdx + orq %rdx, %rax + movq %rax, sev_status(%rip) + .Lno_sev_mask: movq %rbp, %rsp /* Restore original stack pointer */ @@ -96,5 +112,7 @@ SYM_FUNC_END(set_sev_encryption_mask) #ifdef CONFIG_AMD_MEM_ENCRYPT .balign 8 -SYM_DATA(sme_me_mask, .quad 0) +SYM_DATA(sme_me_mask, .quad 0) +SYM_DATA(sev_status, .quad 0) +SYM_DATA(sev_check_data, .quad 0) #endif diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 6d31f1b4c4d1..d9a631c5973c 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -159,4 +159,6 @@ void boot_page_fault(void); void boot_stage1_vc(void); void boot_stage2_vc(void); +unsigned long sev_verify_cbit(unsigned long cr3); + #endif /* BOOT_COMPRESSED_MISC_H */ diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 40e0e322161d..284e73661a18 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -273,11 +273,15 @@ void __init hv_apic_init(void) pr_info("Hyper-V: Using enlightened APIC (%s mode)", x2apic_enabled() ? "x2apic" : "xapic"); /* - * With x2apic, architectural x2apic MSRs are equivalent to the - * respective synthetic MSRs, so there's no need to override - * the apic accessors. The only exception is - * hv_apic_eoi_write, because it benefits from lazy EOI when - * available, but it works for both xapic and x2apic modes. + * When in x2apic mode, don't use the Hyper-V specific APIC + * accessors since the field layout in the ICR register is + * different in x2apic mode. Furthermore, the architectural + * x2apic MSRs function just as well as the Hyper-V + * synthetic APIC MSRs, so there's no benefit in having + * separate Hyper-V accessors for x2apic mode. The only + * exception is hv_apic_eoi_write, because it benefits from + * lazy EOI when available, but the same accessor works for + * both xapic and x2apic because the field layout is the same. */ apic_set_eoi_write(hv_apic_eoi_write); if (!x2apic_enabled()) { diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 812e9b4c1114..950afebfba88 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -32,6 +32,7 @@ #define KVM_FEATURE_POLL_CONTROL 12 #define KVM_FEATURE_PV_SCHED_YIELD 13 #define KVM_FEATURE_ASYNC_PF_INT 14 +#define KVM_FEATURE_MSI_EXT_DEST_ID 15 #define KVM_HINTS_REALTIME 0 diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 7eb2a1c87969..3c417734790f 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -161,6 +161,21 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) /* Setup early boot stage 4-/5-level pagetables. */ addq phys_base(%rip), %rax + + /* + * For SEV guests: Verify that the C-bit is correct. A malicious + * hypervisor could lie about the C-bit position to perform a ROP + * attack on the guest by writing to the unencrypted stack and wait for + * the next RET instruction. + * %rsi carries pointer to realmode data and is callee-clobbered. Save + * and restore it. + */ + pushq %rsi + movq %rax, %rdi + call sev_verify_cbit + popq %rsi + + /* Switch to new page-table */ movq %rax, %cr3 /* Ensure I am executing from virtual addresses */ @@ -279,6 +294,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) SYM_CODE_END(secondary_startup_64) #include "verify_cpu.S" +#include "sev_verify_cbit.S" #ifdef CONFIG_HOTPLUG_CPU /* diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c index 5f83ccaab877..7d04b356d44d 100644 --- a/arch/x86/kernel/sev-es-shared.c +++ b/arch/x86/kernel/sev-es-shared.c @@ -178,6 +178,32 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) goto fail; regs->dx = val >> 32; + /* + * This is a VC handler and the #VC is only raised when SEV-ES is + * active, which means SEV must be active too. Do sanity checks on the + * CPUID results to make sure the hypervisor does not trick the kernel + * into the no-sev path. This could map sensitive data unencrypted and + * make it accessible to the hypervisor. + * + * In particular, check for: + * - Hypervisor CPUID bit + * - Availability of CPUID leaf 0x8000001f + * - SEV CPUID bit. + * + * The hypervisor might still report the wrong C-bit position, but this + * can't be checked here. + */ + + if ((fn == 1 && !(regs->cx & BIT(31)))) + /* Hypervisor bit */ + goto fail; + else if (fn == 0x80000000 && (regs->ax < 0x8000001f)) + /* SEV leaf check */ + goto fail; + else if ((fn == 0x8000001f && !(regs->ax & BIT(1)))) + /* SEV bit */ + goto fail; + /* Skip over the CPUID two-byte opcode */ regs->ip += 2; diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index 4a96726fbaf8..0bd1a0fc587e 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -374,8 +374,8 @@ fault: return ES_EXCEPTION; } -static bool vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, - unsigned long vaddr, phys_addr_t *paddr) +static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, + unsigned long vaddr, phys_addr_t *paddr) { unsigned long va = (unsigned long)vaddr; unsigned int level; @@ -394,15 +394,19 @@ static bool vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, if (user_mode(ctxt->regs)) ctxt->fi.error_code |= X86_PF_USER; - return false; + return ES_EXCEPTION; } + if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC)) + /* Emulated MMIO to/from encrypted memory not supported */ + return ES_UNSUPPORTED; + pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; pa |= va & ~page_level_mask(level); *paddr = pa; - return true; + return ES_OK; } /* Include code shared with pre-decompression boot stage */ @@ -731,6 +735,7 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, { u64 exit_code, exit_info_1, exit_info_2; unsigned long ghcb_pa = __pa(ghcb); + enum es_result res; phys_addr_t paddr; void __user *ref; @@ -740,11 +745,12 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; - if (!vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr)) { - if (!read) + res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr); + if (res != ES_OK) { + if (res == ES_EXCEPTION && !read) ctxt->fi.error_code |= X86_PF_WRITE; - return ES_EXCEPTION; + return res; } exit_info_1 = paddr; diff --git a/arch/x86/kernel/sev_verify_cbit.S b/arch/x86/kernel/sev_verify_cbit.S new file mode 100644 index 000000000000..ee04941a6546 --- /dev/null +++ b/arch/x86/kernel/sev_verify_cbit.S @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * sev_verify_cbit.S - Code for verification of the C-bit position reported + * by the Hypervisor when running with SEV enabled. + * + * Copyright (c) 2020 Joerg Roedel (jroedel@suse.de) + * + * sev_verify_cbit() is called before switching to a new long-mode page-table + * at boot. + * + * Verify that the C-bit position is correct by writing a random value to + * an encrypted memory location while on the current page-table. Then it + * switches to the new page-table to verify the memory content is still the + * same. After that it switches back to the current page-table and when the + * check succeeded it returns. If the check failed the code invalidates the + * stack pointer and goes into a hlt loop. The stack-pointer is invalidated to + * make sure no interrupt or exception can get the CPU out of the hlt loop. + * + * New page-table pointer is expected in %rdi (first parameter) + * + */ +SYM_FUNC_START(sev_verify_cbit) +#ifdef CONFIG_AMD_MEM_ENCRYPT + /* First check if a C-bit was detected */ + movq sme_me_mask(%rip), %rsi + testq %rsi, %rsi + jz 3f + + /* sme_me_mask != 0 could mean SME or SEV - Check also for SEV */ + movq sev_status(%rip), %rsi + testq %rsi, %rsi + jz 3f + + /* Save CR4 in %rsi */ + movq %cr4, %rsi + + /* Disable Global Pages */ + movq %rsi, %rdx + andq $(~X86_CR4_PGE), %rdx + movq %rdx, %cr4 + + /* + * Verified that running under SEV - now get a random value using + * RDRAND. This instruction is mandatory when running as an SEV guest. + * + * Don't bail out of the loop if RDRAND returns errors. It is better to + * prevent forward progress than to work with a non-random value here. + */ +1: rdrand %rdx + jnc 1b + + /* Store value to memory and keep it in %rdx */ + movq %rdx, sev_check_data(%rip) + + /* Backup current %cr3 value to restore it later */ + movq %cr3, %rcx + + /* Switch to new %cr3 - This might unmap the stack */ + movq %rdi, %cr3 + + /* + * Compare value in %rdx with memory location. If C-bit is incorrect + * this would read the encrypted data and make the check fail. + */ + cmpq %rdx, sev_check_data(%rip) + + /* Restore old %cr3 */ + movq %rcx, %cr3 + + /* Restore previous CR4 */ + movq %rsi, %cr4 + + /* Check CMPQ result */ + je 3f + + /* + * The check failed, prevent any forward progress to prevent ROP + * attacks, invalidate the stack and go into a hlt loop. + */ + xorq %rsp, %rsp + subq $0x1000, %rsp +2: hlt + jmp 2b +3: +#endif + /* Return page-table pointer */ + movq %rdi, %rax + ret +SYM_FUNC_END(sev_verify_cbit) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 3c70fb34028b..e19df6cde35d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -793,19 +793,6 @@ static __always_inline unsigned long debug_read_clear_dr6(void) set_debugreg(DR6_RESERVED, 6); dr6 ^= DR6_RESERVED; /* Flip to positive polarity */ - /* - * Clear the virtual DR6 value, ptrace routines will set bits here for - * things we want signals for. - */ - current->thread.virtual_dr6 = 0; - - /* - * The SDM says "The processor clears the BTF flag when it - * generates a debug exception." Clear TIF_BLOCKSTEP to keep - * TIF_BLOCKSTEP in sync with the hardware BTF flag. - */ - clear_thread_flag(TIF_BLOCKSTEP); - return dr6; } @@ -873,6 +860,20 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, */ WARN_ON_ONCE(user_mode(regs)); + if (test_thread_flag(TIF_BLOCKSTEP)) { + /* + * The SDM says "The processor clears the BTF flag when it + * generates a debug exception." but PTRACE_BLOCKSTEP requested + * it for userspace, but we just took a kernel #DB, so re-set + * BTF. + */ + unsigned long debugctl; + + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + debugctl |= DEBUGCTLMSR_BTF; + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + } + /* * Catch SYSENTER with TF set and clear DR_STEP. If this hit a * watchpoint at the same time then that will still be handled. @@ -936,6 +937,22 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, instrumentation_begin(); /* + * Start the virtual/ptrace DR6 value with just the DR_STEP mask + * of the real DR6. ptrace_triggered() will set the DR_TRAPn bits. + * + * Userspace expects DR_STEP to be visible in ptrace_get_debugreg(6) + * even if it is not the result of PTRACE_SINGLESTEP. + */ + current->thread.virtual_dr6 = (dr6 & DR_STEP); + + /* + * The SDM says "The processor clears the BTF flag when it + * generates a debug exception." Clear TIF_BLOCKSTEP to keep + * TIF_BLOCKSTEP in sync with the hardware BTF flag. + */ + clear_thread_flag(TIF_BLOCKSTEP); + + /* * If dr6 has no reason to give us about the origin of this trap, * then it's very likely the result of an icebp/int01 trap. * User wants a sigtrap for that. diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 17587f496ec7..1f96adff8dc4 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -225,7 +225,7 @@ static gfn_t get_mmio_spte_gfn(u64 spte) { u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask; - gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len) + gpa |= (spte >> SHADOW_NONPRESENT_OR_RSVD_MASK_LEN) & shadow_nonpresent_or_rsvd_mask; return gpa >> PAGE_SHIFT; @@ -591,15 +591,15 @@ static u64 mmu_spte_get_lockless(u64 *sptep) static u64 restore_acc_track_spte(u64 spte) { u64 new_spte = spte; - u64 saved_bits = (spte >> shadow_acc_track_saved_bits_shift) - & shadow_acc_track_saved_bits_mask; + u64 saved_bits = (spte >> SHADOW_ACC_TRACK_SAVED_BITS_SHIFT) + & SHADOW_ACC_TRACK_SAVED_BITS_MASK; WARN_ON_ONCE(spte_ad_enabled(spte)); WARN_ON_ONCE(!is_access_track_spte(spte)); new_spte &= ~shadow_acc_track_mask; - new_spte &= ~(shadow_acc_track_saved_bits_mask << - shadow_acc_track_saved_bits_shift); + new_spte &= ~(SHADOW_ACC_TRACK_SAVED_BITS_MASK << + SHADOW_ACC_TRACK_SAVED_BITS_SHIFT); new_spte |= saved_bits; return new_spte; diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index d9c5665a55e9..fcac2cac78fe 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -55,7 +55,7 @@ u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access) mask |= shadow_mmio_value | access; mask |= gpa | shadow_nonpresent_or_rsvd_mask; mask |= (gpa & shadow_nonpresent_or_rsvd_mask) - << shadow_nonpresent_or_rsvd_mask_len; + << SHADOW_NONPRESENT_OR_RSVD_MASK_LEN; return mask; } @@ -231,12 +231,12 @@ u64 mark_spte_for_access_track(u64 spte) !spte_can_locklessly_be_made_writable(spte), "kvm: Writable SPTE is not locklessly dirty-trackable\n"); - WARN_ONCE(spte & (shadow_acc_track_saved_bits_mask << - shadow_acc_track_saved_bits_shift), + WARN_ONCE(spte & (SHADOW_ACC_TRACK_SAVED_BITS_MASK << + SHADOW_ACC_TRACK_SAVED_BITS_SHIFT), "kvm: Access Tracking saved bit locations are not zero\n"); - spte |= (spte & shadow_acc_track_saved_bits_mask) << - shadow_acc_track_saved_bits_shift; + spte |= (spte & SHADOW_ACC_TRACK_SAVED_BITS_MASK) << + SHADOW_ACC_TRACK_SAVED_BITS_SHIFT; spte &= ~shadow_acc_track_mask; return spte; @@ -245,7 +245,7 @@ u64 mark_spte_for_access_track(u64 spte) void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask) { BUG_ON((u64)(unsigned)access_mask != access_mask); - WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << shadow_nonpresent_or_rsvd_mask_len)); + WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << SHADOW_NONPRESENT_OR_RSVD_MASK_LEN)); WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask); shadow_mmio_value = mmio_value | SPTE_MMIO_MASK; shadow_mmio_access_mask = access_mask; @@ -306,9 +306,9 @@ void kvm_mmu_reset_all_pte_masks(void) low_phys_bits = boot_cpu_data.x86_phys_bits; if (boot_cpu_has_bug(X86_BUG_L1TF) && !WARN_ON_ONCE(boot_cpu_data.x86_cache_bits >= - 52 - shadow_nonpresent_or_rsvd_mask_len)) { + 52 - SHADOW_NONPRESENT_OR_RSVD_MASK_LEN)) { low_phys_bits = boot_cpu_data.x86_cache_bits - - shadow_nonpresent_or_rsvd_mask_len; + - SHADOW_NONPRESENT_OR_RSVD_MASK_LEN; shadow_nonpresent_or_rsvd_mask = rsvd_bits(low_phys_bits, boot_cpu_data.x86_cache_bits - 1); } diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 4ecf40e0b8fe..5c75a451c000 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -105,19 +105,19 @@ extern u64 __read_mostly shadow_acc_track_mask; extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask; /* + * The number of high-order 1 bits to use in the mask above. + */ +#define SHADOW_NONPRESENT_OR_RSVD_MASK_LEN 5 + +/* * The mask/shift to use for saving the original R/X bits when marking the PTE * as not-present for access tracking purposes. We do not save the W bit as the * PTEs being access tracked also need to be dirty tracked, so the W bit will be * restored only when a write is attempted to the page. */ -static const u64 shadow_acc_track_saved_bits_mask = PT64_EPT_READABLE_MASK | - PT64_EPT_EXECUTABLE_MASK; -static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT; - -/* - * The number of high-order 1 bits to use in the mask above. - */ -static const u64 shadow_nonpresent_or_rsvd_mask_len = 5; +#define SHADOW_ACC_TRACK_SAVED_BITS_MASK (PT64_EPT_READABLE_MASK | \ + PT64_EPT_EXECUTABLE_MASK) +#define SHADOW_ACC_TRACK_SAVED_BITS_SHIFT PT64_SECOND_AVAIL_BITS_SHIFT /* * In some cases, we need to preserve the GFN of a non-present or reserved diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index e5325bd0f304..f3199bb02f22 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -297,14 +297,13 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = { }; const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1); -void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) +__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) { vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL; vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC; vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL; vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL; - } #endif diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index e5f7a7ebf27d..bd41d9462355 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h @@ -185,7 +185,7 @@ static inline void evmcs_load(u64 phys_addr) vp_ap->enlighten_vmentry = 1; } -void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf); +__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf); #else /* !IS_ENABLED(CONFIG_HYPERV) */ static inline void evmcs_write64(unsigned long field, u64 value) {} static inline void evmcs_write32(unsigned long field, u32 value) {} @@ -194,7 +194,6 @@ static inline u64 evmcs_read64(unsigned long field) { return 0; } static inline u32 evmcs_read32(unsigned long field) { return 0; } static inline u16 evmcs_read16(unsigned long field) { return 0; } static inline void evmcs_load(u64 phys_addr) {} -static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {} static inline void evmcs_touch_msr_bitmap(void) {} #endif /* IS_ENABLED(CONFIG_HYPERV) */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d14c94d0aff1..47b8357b9751 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2560,8 +2560,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, vmcs_conf->vmexit_ctrl = _vmexit_control; vmcs_conf->vmentry_ctrl = _vmentry_control; - if (static_branch_unlikely(&enable_evmcs)) +#if IS_ENABLED(CONFIG_HYPERV) + if (enlightened_vmcs) evmcs_sanitize_exec_ctrls(vmcs_conf); +#endif return 0; } @@ -6834,7 +6836,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) static int vmx_create_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx; - unsigned long *msr_bitmap; int i, cpu, err; BUILD_BUG_ON(offsetof(struct vcpu_vmx, vcpu) != 0); @@ -6894,7 +6895,6 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS); bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS); - msr_bitmap = vmx->vmcs01.msr_bitmap; vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R); vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW); vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 397f599b20e5..f5ede41bf9e6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -265,13 +265,13 @@ static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr, if (ignore_msrs) { if (report_ignored_msrs) - vcpu_unimpl(vcpu, "ignored %s: 0x%x data 0x%llx\n", - op, msr, data); + kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n", + op, msr, data); /* Mask the error */ return 0; } else { - vcpu_debug_ratelimited(vcpu, "unhandled %s: 0x%x data 0x%llx\n", - op, msr, data); + kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n", + op, msr, data); return -ENOENT; } } diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index efbb3de472df..bc0833713be9 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -39,6 +39,7 @@ */ u64 sme_me_mask __section(".data") = 0; u64 sev_status __section(".data") = 0; +u64 sev_check_data __section(".data") = 0; EXPORT_SYMBOL(sme_me_mask); DEFINE_STATIC_KEY_FALSE(sev_enable_key); EXPORT_SYMBOL_GPL(sev_enable_key); diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index c6fc83efee0c..8731b7ad9308 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -89,8 +89,8 @@ static void __init free_highpages(void) /* set highmem page free */ for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &range_start, &range_end, NULL) { - unsigned long start = PHYS_PFN(range_start); - unsigned long end = PHYS_PFN(range_end); + unsigned long start = PFN_UP(range_start); + unsigned long end = PFN_DOWN(range_end); /* Ignore complete lowmem entries */ if (end <= max_low) |