diff options
Diffstat (limited to 'arch')
277 files changed, 3885 insertions, 1320 deletions
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 235ad559acb2..e41eca79c950 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -806,6 +806,7 @@ dtb-$(CONFIG_ARCH_OMAP3) += \ logicpd-som-lv-37xx-devkit.dtb \ omap3430-sdp.dtb \ omap3-beagle.dtb \ + omap3-beagle-ab4.dtb \ omap3-beagle-xm.dtb \ omap3-beagle-xm-ab.dtb \ omap3-cm-t3517.dtb \ diff --git a/arch/arm/boot/dts/am335x-wega.dtsi b/arch/arm/boot/dts/am335x-wega.dtsi index 673159d93a6a..f957fea8208e 100644 --- a/arch/arm/boot/dts/am335x-wega.dtsi +++ b/arch/arm/boot/dts/am335x-wega.dtsi @@ -55,7 +55,7 @@ 2 1 0 0 /* # 0: INACTIVE, 1: TX, 2: RX */ >; tx-num-evt = <16>; - rt-num-evt = <16>; + rx-num-evt = <16>; status = "okay"; }; diff --git a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi index 6dde51c2aed3..e4775bbceecc 100644 --- a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi +++ b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi @@ -118,7 +118,7 @@ }; pinctrl_fwqspid_default: fwqspid_default { - function = "FWQSPID"; + function = "FWSPID"; groups = "FWQSPID"; }; diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi index dff18fc9a906..21294f775a20 100644 --- a/arch/arm/boot/dts/bcm2711.dtsi +++ b/arch/arm/boot/dts/bcm2711.dtsi @@ -290,6 +290,7 @@ hvs: hvs@7e400000 { compatible = "brcm,bcm2711-hvs"; + reg = <0x7e400000 0x8000>; interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>; }; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 6b485cbed8d5..42bff117656c 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -160,7 +160,7 @@ target-module@48210000 { compatible = "ti,sysc-omap4-simple", "ti,sysc"; power-domains = <&prm_mpu>; - clocks = <&mpu_clkctrl DRA7_MPU_CLKCTRL 0>; + clocks = <&mpu_clkctrl DRA7_MPU_MPU_CLKCTRL 0>; clock-names = "fck"; #address-cells = <1>; #size-cells = <1>; @@ -875,10 +875,10 @@ <0x58000014 4>; reg-names = "rev", "syss"; ti,syss-mask = <1>; - clocks = <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 0>, - <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 9>, - <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 10>, - <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 11>; + clocks = <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 0>, + <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 9>, + <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 10>, + <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 11>; clock-names = "fck", "hdmi_clk", "sys_clk", "tv_clk"; #address-cells = <1>; #size-cells = <1>; @@ -912,7 +912,7 @@ SYSC_OMAP2_SOFTRESET | SYSC_OMAP2_AUTOIDLE)>; ti,syss-mask = <1>; - clocks = <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 8>; + clocks = <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 8>; clock-names = "fck"; #address-cells = <1>; #size-cells = <1>; @@ -939,8 +939,8 @@ <SYSC_IDLE_SMART>, <SYSC_IDLE_SMART_WKUP>; ti,sysc-mask = <(SYSC_OMAP4_SOFTRESET)>; - clocks = <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 9>, - <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 8>; + clocks = <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 9>, + <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 8>; clock-names = "fck", "dss_clk"; #address-cells = <1>; #size-cells = <1>; @@ -979,7 +979,7 @@ compatible = "vivante,gc"; reg = <0x0 0x700>; interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&dss_clkctrl DRA7_BB2D_CLKCTRL 0>; + clocks = <&dss_clkctrl DRA7_DSS_BB2D_CLKCTRL 0>; clock-names = "core"; }; }; @@ -1333,7 +1333,7 @@ ti,no-reset-on-init; ti,no-idle; timer@0 { - assigned-clocks = <&wkupaon_clkctrl DRA7_TIMER1_CLKCTRL 24>; + assigned-clocks = <&wkupaon_clkctrl DRA7_WKUPAON_TIMER1_CLKCTRL 24>; assigned-clock-parents = <&sys_32k_ck>; }; }; diff --git a/arch/arm/boot/dts/imx23-evk.dts b/arch/arm/boot/dts/imx23-evk.dts index 8cbaf1c81174..3b609d987d88 100644 --- a/arch/arm/boot/dts/imx23-evk.dts +++ b/arch/arm/boot/dts/imx23-evk.dts @@ -79,7 +79,6 @@ MX23_PAD_LCD_RESET__GPIO_1_18 MX23_PAD_PWM3__GPIO_1_29 MX23_PAD_PWM4__GPIO_1_30 - MX23_PAD_SSP1_DETECT__SSP1_DETECT >; fsl,drive-strength = <MXS_DRIVE_4mA>; fsl,voltage = <MXS_VOLTAGE_HIGH>; diff --git a/arch/arm/boot/dts/imx6qdl-udoo.dtsi b/arch/arm/boot/dts/imx6qdl-udoo.dtsi index d07d8f83456d..ccfa8e320be6 100644 --- a/arch/arm/boot/dts/imx6qdl-udoo.dtsi +++ b/arch/arm/boot/dts/imx6qdl-udoo.dtsi @@ -5,6 +5,8 @@ * Author: Fabio Estevam <fabio.estevam@freescale.com> */ +#include <dt-bindings/gpio/gpio.h> + / { aliases { backlight = &backlight; @@ -226,6 +228,7 @@ MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x17059 MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x17059 MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x17059 + MX6QDL_PAD_SD3_DAT5__GPIO7_IO00 0x1b0b0 >; }; @@ -304,7 +307,7 @@ &usdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; - non-removable; + cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx7ulp.dtsi b/arch/arm/boot/dts/imx7ulp.dtsi index b7ea37ad4e55..bcec98b96411 100644 --- a/arch/arm/boot/dts/imx7ulp.dtsi +++ b/arch/arm/boot/dts/imx7ulp.dtsi @@ -259,7 +259,7 @@ interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>; clocks = <&pcc2 IMX7ULP_CLK_WDG1>; assigned-clocks = <&pcc2 IMX7ULP_CLK_WDG1>; - assigned-clocks-parents = <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>; + assigned-clock-parents = <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>; timeout-sec = <40>; }; diff --git a/arch/arm/boot/dts/meson.dtsi b/arch/arm/boot/dts/meson.dtsi index 3be7cba603d5..26eaba3fa96f 100644 --- a/arch/arm/boot/dts/meson.dtsi +++ b/arch/arm/boot/dts/meson.dtsi @@ -59,7 +59,7 @@ }; uart_A: serial@84c0 { - compatible = "amlogic,meson6-uart", "amlogic,meson-uart"; + compatible = "amlogic,meson6-uart"; reg = <0x84c0 0x18>; interrupts = <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>; fifo-size = <128>; @@ -67,7 +67,7 @@ }; uart_B: serial@84dc { - compatible = "amlogic,meson6-uart", "amlogic,meson-uart"; + compatible = "amlogic,meson6-uart"; reg = <0x84dc 0x18>; interrupts = <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>; status = "disabled"; @@ -105,7 +105,7 @@ }; uart_C: serial@8700 { - compatible = "amlogic,meson6-uart", "amlogic,meson-uart"; + compatible = "amlogic,meson6-uart"; reg = <0x8700 0x18>; interrupts = <GIC_SPI 93 IRQ_TYPE_EDGE_RISING>; status = "disabled"; @@ -228,7 +228,7 @@ }; uart_AO: serial@4c0 { - compatible = "amlogic,meson6-uart", "amlogic,meson-ao-uart", "amlogic,meson-uart"; + compatible = "amlogic,meson6-uart", "amlogic,meson-ao-uart"; reg = <0x4c0 0x18>; interrupts = <GIC_SPI 90 IRQ_TYPE_EDGE_RISING>; status = "disabled"; diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi index f80ddc98d3a2..9997a5d0333a 100644 --- a/arch/arm/boot/dts/meson8.dtsi +++ b/arch/arm/boot/dts/meson8.dtsi @@ -736,27 +736,27 @@ }; &uart_AO { - compatible = "amlogic,meson8-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_CLK81>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8-uart", "amlogic,meson-ao-uart"; + clocks = <&xtal>, <&clkc CLKID_CLK81>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &uart_A { - compatible = "amlogic,meson8-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART0>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8-uart"; + clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &uart_B { - compatible = "amlogic,meson8-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART1>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8-uart"; + clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &uart_C { - compatible = "amlogic,meson8-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART2>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8-uart"; + clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &usb0 { diff --git a/arch/arm/boot/dts/meson8b.dtsi b/arch/arm/boot/dts/meson8b.dtsi index b49b7cbaed4e..94f1c03decce 100644 --- a/arch/arm/boot/dts/meson8b.dtsi +++ b/arch/arm/boot/dts/meson8b.dtsi @@ -724,27 +724,27 @@ }; &uart_AO { - compatible = "amlogic,meson8b-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_CLK81>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8b-uart", "amlogic,meson-ao-uart"; + clocks = <&xtal>, <&clkc CLKID_CLK81>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &uart_A { - compatible = "amlogic,meson8b-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART0>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8b-uart"; + clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &uart_B { - compatible = "amlogic,meson8b-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART1>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8b-uart"; + clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &uart_C { - compatible = "amlogic,meson8b-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART2>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8b-uart"; + clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &usb0 { diff --git a/arch/arm/boot/dts/omap3-beagle-ab4.dts b/arch/arm/boot/dts/omap3-beagle-ab4.dts new file mode 100644 index 000000000000..990ff2d84686 --- /dev/null +++ b/arch/arm/boot/dts/omap3-beagle-ab4.dts @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0-only +/dts-v1/; + +#include "omap3-beagle.dts" + +/ { + model = "TI OMAP3 BeagleBoard A to B4"; + compatible = "ti,omap3-beagle-ab4", "ti,omap3-beagle", "ti,omap3430", "ti,omap3"; +}; + +/* + * Workaround for capacitor C70 issue, see "Boards revision A and < B5" + * section at https://elinux.org/BeagleBoard_Community + */ + +/* Unusable as clocksource because of unreliable oscillator */ +&counter32k { + status = "disabled"; +}; + +/* Unusable as clockevent because of unreliable oscillator, allow to idle */ +&timer1_target { + /delete-property/ti,no-reset-on-init; + /delete-property/ti,no-idle; + timer@0 { + /delete-property/ti,timer-alwon; + }; +}; + +/* Preferred always-on timer for clocksource */ +&timer12_target { + ti,no-reset-on-init; + ti,no-idle; + timer@0 { + /* Always clocked by secure_32k_fck */ + }; +}; + +/* Preferred timer for clockevent */ +&timer2_target { + ti,no-reset-on-init; + ti,no-idle; + timer@0 { + assigned-clocks = <&gpt2_fck>; + assigned-clock-parents = <&sys_ck>; + }; +}; diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts index f9f34b8458e9..0548b391334f 100644 --- a/arch/arm/boot/dts/omap3-beagle.dts +++ b/arch/arm/boot/dts/omap3-beagle.dts @@ -304,39 +304,6 @@ phys = <0 &hsusb2_phy>; }; -/* Unusable as clocksource because of unreliable oscillator */ -&counter32k { - status = "disabled"; -}; - -/* Unusable as clockevent because if unreliable oscillator, allow to idle */ -&timer1_target { - /delete-property/ti,no-reset-on-init; - /delete-property/ti,no-idle; - timer@0 { - /delete-property/ti,timer-alwon; - }; -}; - -/* Preferred always-on timer for clocksource */ -&timer12_target { - ti,no-reset-on-init; - ti,no-idle; - timer@0 { - /* Always clocked by secure_32k_fck */ - }; -}; - -/* Preferred timer for clockevent */ -&timer2_target { - ti,no-reset-on-init; - ti,no-idle; - timer@0 { - assigned-clocks = <&gpt2_fck>; - assigned-clock-parents = <&sys_ck>; - }; -}; - &twl_gpio { ti,use-leds; /* pullups: BIT(1) */ diff --git a/arch/arm/boot/dts/omap3-devkit8000-common.dtsi b/arch/arm/boot/dts/omap3-devkit8000-common.dtsi index 5e55198e4576..54cd37336be7 100644 --- a/arch/arm/boot/dts/omap3-devkit8000-common.dtsi +++ b/arch/arm/boot/dts/omap3-devkit8000-common.dtsi @@ -158,6 +158,24 @@ status = "disabled"; }; +/* Unusable as clockevent because if unreliable oscillator, allow to idle */ +&timer1_target { + /delete-property/ti,no-reset-on-init; + /delete-property/ti,no-idle; + timer@0 { + /delete-property/ti,timer-alwon; + }; +}; + +/* Preferred timer for clockevent */ +&timer12_target { + ti,no-reset-on-init; + ti,no-idle; + timer@0 { + /* Always clocked by secure_32k_fck */ + }; +}; + &twl_gpio { ti,use-leds; /* diff --git a/arch/arm/boot/dts/omap3-devkit8000.dts b/arch/arm/boot/dts/omap3-devkit8000.dts index c2995a280729..162d0726b008 100644 --- a/arch/arm/boot/dts/omap3-devkit8000.dts +++ b/arch/arm/boot/dts/omap3-devkit8000.dts @@ -14,36 +14,3 @@ display2 = &tv0; }; }; - -/* Unusable as clocksource because of unreliable oscillator */ -&counter32k { - status = "disabled"; -}; - -/* Unusable as clockevent because if unreliable oscillator, allow to idle */ -&timer1_target { - /delete-property/ti,no-reset-on-init; - /delete-property/ti,no-idle; - timer@0 { - /delete-property/ti,timer-alwon; - }; -}; - -/* Preferred always-on timer for clocksource */ -&timer12_target { - ti,no-reset-on-init; - ti,no-idle; - timer@0 { - /* Always clocked by secure_32k_fck */ - }; -}; - -/* Preferred timer for clockevent */ -&timer2_target { - ti,no-reset-on-init; - ti,no-idle; - timer@0 { - assigned-clocks = <&gpt2_fck>; - assigned-clock-parents = <&sys_ck>; - }; -}; diff --git a/arch/arm/boot/dts/rk322x.dtsi b/arch/arm/boot/dts/rk322x.dtsi index 8eed9e3a92e9..5868eb512f69 100644 --- a/arch/arm/boot/dts/rk322x.dtsi +++ b/arch/arm/boot/dts/rk322x.dtsi @@ -718,8 +718,8 @@ interrupts = <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>; assigned-clocks = <&cru SCLK_HDMI_PHY>; assigned-clock-parents = <&hdmi_phy>; - clocks = <&cru SCLK_HDMI_HDCP>, <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_CEC>; - clock-names = "isfr", "iahb", "cec"; + clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_HDCP>, <&cru SCLK_HDMI_CEC>; + clock-names = "iahb", "isfr", "cec"; pinctrl-names = "default"; pinctrl-0 = <&hdmii2c_xfer &hdmi_hpd &hdmi_cec>; resets = <&cru SRST_HDMI_P>; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index aaaa61875701..45a9d9b908d2 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -971,7 +971,7 @@ status = "disabled"; }; - crypto: cypto-controller@ff8a0000 { + crypto: crypto@ff8a0000 { compatible = "rockchip,rk3288-crypto"; reg = <0x0 0xff8a0000 0x0 0x4000>; interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm/boot/dts/spear320-hmi.dts b/arch/arm/boot/dts/spear320-hmi.dts index 367ba48aac3e..b587e4ec11e5 100644 --- a/arch/arm/boot/dts/spear320-hmi.dts +++ b/arch/arm/boot/dts/spear320-hmi.dts @@ -235,7 +235,6 @@ #address-cells = <1>; #size-cells = <0>; reg = <0x41>; - irq-over-gpio; irq-gpios = <&gpiopinctrl 29 0x4>; id = <0>; blocks = <0x5>; diff --git a/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts b/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts index 580ca497f312..f8c5899fbdba 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts @@ -185,10 +185,6 @@ cap-sd-highspeed; cap-mmc-highspeed; /* All direction control is used */ - st,sig-dir-cmd; - st,sig-dir-dat0; - st,sig-dir-dat2; - st,sig-dir-dat31; st,sig-pin-fbclk; full-pwr-cycle; vmmc-supply = <&ab8500_ldo_aux3_reg>; diff --git a/arch/arm/boot/dts/tegra124-nyan-big-fhd.dts b/arch/arm/boot/dts/tegra124-nyan-big-fhd.dts index d35fb79d2f51..4db43324dafa 100644 --- a/arch/arm/boot/dts/tegra124-nyan-big-fhd.dts +++ b/arch/arm/boot/dts/tegra124-nyan-big-fhd.dts @@ -5,7 +5,13 @@ / { /* Version of Nyan Big with 1080p panel */ - panel { - compatible = "auo,b133htn01"; + host1x@50000000 { + dpaux@545c0000 { + aux-bus { + panel: panel { + compatible = "auo,b133htn01"; + }; + }; + }; }; }; diff --git a/arch/arm/boot/dts/tegra124-nyan-big.dts b/arch/arm/boot/dts/tegra124-nyan-big.dts index 1d2aac2cb6d0..fdc1d64dfff9 100644 --- a/arch/arm/boot/dts/tegra124-nyan-big.dts +++ b/arch/arm/boot/dts/tegra124-nyan-big.dts @@ -13,12 +13,15 @@ "google,nyan-big-rev1", "google,nyan-big-rev0", "google,nyan-big", "google,nyan", "nvidia,tegra124"; - panel: panel { - compatible = "auo,b133xtn01"; - - power-supply = <&vdd_3v3_panel>; - backlight = <&backlight>; - ddc-i2c-bus = <&dpaux>; + host1x@50000000 { + dpaux@545c0000 { + aux-bus { + panel: panel { + compatible = "auo,b133xtn01"; + backlight = <&backlight>; + }; + }; + }; }; mmc@700b0400 { /* SD Card on this bus */ diff --git a/arch/arm/boot/dts/tegra124-nyan-blaze.dts b/arch/arm/boot/dts/tegra124-nyan-blaze.dts index 677babde6460..abdf4456826f 100644 --- a/arch/arm/boot/dts/tegra124-nyan-blaze.dts +++ b/arch/arm/boot/dts/tegra124-nyan-blaze.dts @@ -15,12 +15,15 @@ "google,nyan-blaze-rev0", "google,nyan-blaze", "google,nyan", "nvidia,tegra124"; - panel: panel { - compatible = "samsung,ltn140at29-301"; - - power-supply = <&vdd_3v3_panel>; - backlight = <&backlight>; - ddc-i2c-bus = <&dpaux>; + host1x@50000000 { + dpaux@545c0000 { + aux-bus { + panel: panel { + compatible = "samsung,ltn140at29-301"; + backlight = <&backlight>; + }; + }; + }; }; sound { diff --git a/arch/arm/boot/dts/tegra124-venice2.dts b/arch/arm/boot/dts/tegra124-venice2.dts index 232c90604df9..6a9592ceb5f2 100644 --- a/arch/arm/boot/dts/tegra124-venice2.dts +++ b/arch/arm/boot/dts/tegra124-venice2.dts @@ -48,6 +48,13 @@ dpaux@545c0000 { vdd-supply = <&vdd_3v3_panel>; status = "okay"; + + aux-bus { + panel: panel { + compatible = "lg,lp129qe"; + backlight = <&backlight>; + }; + }; }; }; @@ -1080,13 +1087,6 @@ }; }; - panel: panel { - compatible = "lg,lp129qe"; - power-supply = <&vdd_3v3_panel>; - backlight = <&backlight>; - ddc-i2c-bus = <&dpaux>; - }; - vdd_mux: regulator-mux { compatible = "regulator-fixed"; regulator-name = "+VDD_MUX"; diff --git a/arch/arm/crypto/blake2s-shash.c b/arch/arm/crypto/blake2s-shash.c index 17c1c3bfe2f5..763c73beea2d 100644 --- a/arch/arm/crypto/blake2s-shash.c +++ b/arch/arm/crypto/blake2s-shash.c @@ -13,12 +13,12 @@ static int crypto_blake2s_update_arm(struct shash_desc *desc, const u8 *in, unsigned int inlen) { - return crypto_blake2s_update(desc, in, inlen, blake2s_compress); + return crypto_blake2s_update(desc, in, inlen, false); } static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out) { - return crypto_blake2s_final(desc, out, blake2s_compress); + return crypto_blake2s_final(desc, out, false); } #define BLAKE2S_ALG(name, driver_name, digest_size) \ diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 6fe67963ba5a..aee73ef5b3dc 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -107,6 +107,16 @@ .endm #endif +#if __LINUX_ARM_ARCH__ < 7 + .macro dsb, args + mcr p15, 0, r0, c7, c10, 4 + .endm + + .macro isb, args + mcr p15, 0, r0, c7, c5, 4 + .endm +#endif + .macro asm_trace_hardirqs_off, save=1 #if defined(CONFIG_TRACE_IRQFLAGS) .if \save diff --git a/arch/arm/include/asm/spectre.h b/arch/arm/include/asm/spectre.h new file mode 100644 index 000000000000..85f9e538fb32 --- /dev/null +++ b/arch/arm/include/asm/spectre.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_SPECTRE_H +#define __ASM_SPECTRE_H + +enum { + SPECTRE_UNAFFECTED, + SPECTRE_MITIGATED, + SPECTRE_VULNERABLE, +}; + +enum { + __SPECTRE_V2_METHOD_BPIALL, + __SPECTRE_V2_METHOD_ICIALLU, + __SPECTRE_V2_METHOD_SMC, + __SPECTRE_V2_METHOD_HVC, + __SPECTRE_V2_METHOD_LOOP8, +}; + +enum { + SPECTRE_V2_METHOD_BPIALL = BIT(__SPECTRE_V2_METHOD_BPIALL), + SPECTRE_V2_METHOD_ICIALLU = BIT(__SPECTRE_V2_METHOD_ICIALLU), + SPECTRE_V2_METHOD_SMC = BIT(__SPECTRE_V2_METHOD_SMC), + SPECTRE_V2_METHOD_HVC = BIT(__SPECTRE_V2_METHOD_HVC), + SPECTRE_V2_METHOD_LOOP8 = BIT(__SPECTRE_V2_METHOD_LOOP8), +}; + +#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES +void spectre_v2_update_state(unsigned int state, unsigned int methods); +#else +static inline void spectre_v2_update_state(unsigned int state, + unsigned int methods) +{} +#endif + +int spectre_bhb_update_vectors(unsigned int method); + +#endif diff --git a/arch/arm/include/asm/vmlinux.lds.h b/arch/arm/include/asm/vmlinux.lds.h index 4a91428c324d..fad45c884e98 100644 --- a/arch/arm/include/asm/vmlinux.lds.h +++ b/arch/arm/include/asm/vmlinux.lds.h @@ -26,6 +26,19 @@ #define ARM_MMU_DISCARD(x) x #endif +/* + * ld.lld does not support NOCROSSREFS: + * https://github.com/ClangBuiltLinux/linux/issues/1609 + */ +#ifdef CONFIG_LD_IS_LLD +#define NOCROSSREFS +#endif + +/* Set start/end symbol names to the LMA for the section */ +#define ARM_LMA(sym, section) \ + sym##_start = LOADADDR(section); \ + sym##_end = LOADADDR(section) + SIZEOF(section) + #define PROC_INFO \ . = ALIGN(4); \ __proc_info_begin = .; \ @@ -110,19 +123,31 @@ * only thing that matters is their relative offsets */ #define ARM_VECTORS \ - __vectors_start = .; \ - .vectors 0xffff0000 : AT(__vectors_start) { \ - *(.vectors) \ + __vectors_lma = .; \ + OVERLAY 0xffff0000 : NOCROSSREFS AT(__vectors_lma) { \ + .vectors { \ + *(.vectors) \ + } \ + .vectors.bhb.loop8 { \ + *(.vectors.bhb.loop8) \ + } \ + .vectors.bhb.bpiall { \ + *(.vectors.bhb.bpiall) \ + } \ } \ - . = __vectors_start + SIZEOF(.vectors); \ - __vectors_end = .; \ + ARM_LMA(__vectors, .vectors); \ + ARM_LMA(__vectors_bhb_loop8, .vectors.bhb.loop8); \ + ARM_LMA(__vectors_bhb_bpiall, .vectors.bhb.bpiall); \ + . = __vectors_lma + SIZEOF(.vectors) + \ + SIZEOF(.vectors.bhb.loop8) + \ + SIZEOF(.vectors.bhb.bpiall); \ \ - __stubs_start = .; \ - .stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) { \ + __stubs_lma = .; \ + .stubs ADDR(.vectors) + 0x1000 : AT(__stubs_lma) { \ *(.stubs) \ } \ - . = __stubs_start + SIZEOF(.stubs); \ - __stubs_end = .; \ + ARM_LMA(__stubs, .stubs); \ + . = __stubs_lma + SIZEOF(.stubs); \ \ PROVIDE(vector_fiq_offset = vector_fiq - ADDR(.vectors)); diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index ae295a3bcfef..6ef3b535b7bf 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -106,4 +106,6 @@ endif obj-$(CONFIG_HAVE_ARM_SMCCC) += smccc-call.o +obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += spectre.o + extra-y := $(head-y) vmlinux.lds diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 5cd057859fe9..ee3f7a599181 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -1002,12 +1002,11 @@ vector_\name: sub lr, lr, #\correction .endif - @ - @ Save r0, lr_<exception> (parent PC) and spsr_<exception> - @ (parent CPSR) - @ + @ Save r0, lr_<exception> (parent PC) stmia sp, {r0, lr} @ save r0, lr - mrs lr, spsr + + @ Save spsr_<exception> (parent CPSR) +2: mrs lr, spsr str lr, [sp, #8] @ save spsr @ @@ -1028,6 +1027,44 @@ vector_\name: movs pc, lr @ branch to handler in SVC mode ENDPROC(vector_\name) +#ifdef CONFIG_HARDEN_BRANCH_HISTORY + .subsection 1 + .align 5 +vector_bhb_loop8_\name: + .if \correction + sub lr, lr, #\correction + .endif + + @ Save r0, lr_<exception> (parent PC) + stmia sp, {r0, lr} + + @ bhb workaround + mov r0, #8 +3: b . + 4 + subs r0, r0, #1 + bne 3b + dsb + isb + b 2b +ENDPROC(vector_bhb_loop8_\name) + +vector_bhb_bpiall_\name: + .if \correction + sub lr, lr, #\correction + .endif + + @ Save r0, lr_<exception> (parent PC) + stmia sp, {r0, lr} + + @ bhb workaround + mcr p15, 0, r0, c7, c5, 6 @ BPIALL + @ isb not needed due to "movs pc, lr" in the vector stub + @ which gives a "context synchronisation". + b 2b +ENDPROC(vector_bhb_bpiall_\name) + .previous +#endif + .align 2 @ handler addresses follow this label 1: @@ -1036,6 +1073,10 @@ ENDPROC(vector_\name) .section .stubs, "ax", %progbits @ This must be the first word .word vector_swi +#ifdef CONFIG_HARDEN_BRANCH_HISTORY + .word vector_bhb_loop8_swi + .word vector_bhb_bpiall_swi +#endif vector_rst: ARM( swi SYS_ERROR0 ) @@ -1150,8 +1191,10 @@ vector_addrexcptn: * FIQ "NMI" handler *----------------------------------------------------------------------------- * Handle a FIQ using the SVC stack allowing FIQ act like NMI on x86 - * systems. + * systems. This must be the last vector stub, so lets place it in its own + * subsection. */ + .subsection 2 vector_stub fiq, FIQ_MODE, 4 .long __fiq_usr @ 0 (USR_26 / USR_32) @@ -1184,6 +1227,30 @@ vector_addrexcptn: W(b) vector_irq W(b) vector_fiq +#ifdef CONFIG_HARDEN_BRANCH_HISTORY + .section .vectors.bhb.loop8, "ax", %progbits +.L__vectors_bhb_loop8_start: + W(b) vector_rst + W(b) vector_bhb_loop8_und + W(ldr) pc, .L__vectors_bhb_loop8_start + 0x1004 + W(b) vector_bhb_loop8_pabt + W(b) vector_bhb_loop8_dabt + W(b) vector_addrexcptn + W(b) vector_bhb_loop8_irq + W(b) vector_bhb_loop8_fiq + + .section .vectors.bhb.bpiall, "ax", %progbits +.L__vectors_bhb_bpiall_start: + W(b) vector_rst + W(b) vector_bhb_bpiall_und + W(ldr) pc, .L__vectors_bhb_bpiall_start + 0x1008 + W(b) vector_bhb_bpiall_pabt + W(b) vector_bhb_bpiall_dabt + W(b) vector_addrexcptn + W(b) vector_bhb_bpiall_irq + W(b) vector_bhb_bpiall_fiq +#endif + .data .align 2 diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index ac86c34682bb..dbc1913ee30b 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -154,12 +154,36 @@ ENDPROC(ret_from_fork) */ .align 5 +#ifdef CONFIG_HARDEN_BRANCH_HISTORY +ENTRY(vector_bhb_loop8_swi) + sub sp, sp, #PT_REGS_SIZE + stmia sp, {r0 - r12} + mov r8, #8 +1: b 2f +2: subs r8, r8, #1 + bne 1b + dsb + isb + b 3f +ENDPROC(vector_bhb_loop8_swi) + + .align 5 +ENTRY(vector_bhb_bpiall_swi) + sub sp, sp, #PT_REGS_SIZE + stmia sp, {r0 - r12} + mcr p15, 0, r8, c7, c5, 6 @ BPIALL + isb + b 3f +ENDPROC(vector_bhb_bpiall_swi) +#endif + .align 5 ENTRY(vector_swi) #ifdef CONFIG_CPU_V7M v7m_exception_entry #else sub sp, sp, #PT_REGS_SIZE stmia sp, {r0 - r12} @ Calling r0 - r12 +3: ARM( add r8, sp, #S_PC ) ARM( stmdb r8, {sp, lr}^ ) @ Calling sp, lr THUMB( mov r8, sp ) diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 7bd30c0a4280..22f937e6f3ff 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -154,22 +154,38 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr) return 0; } -static struct undef_hook kgdb_brkpt_hook = { +static struct undef_hook kgdb_brkpt_arm_hook = { .instr_mask = 0xffffffff, .instr_val = KGDB_BREAKINST, - .cpsr_mask = MODE_MASK, + .cpsr_mask = PSR_T_BIT | MODE_MASK, .cpsr_val = SVC_MODE, .fn = kgdb_brk_fn }; -static struct undef_hook kgdb_compiled_brkpt_hook = { +static struct undef_hook kgdb_brkpt_thumb_hook = { + .instr_mask = 0xffff, + .instr_val = KGDB_BREAKINST & 0xffff, + .cpsr_mask = PSR_T_BIT | MODE_MASK, + .cpsr_val = PSR_T_BIT | SVC_MODE, + .fn = kgdb_brk_fn +}; + +static struct undef_hook kgdb_compiled_brkpt_arm_hook = { .instr_mask = 0xffffffff, .instr_val = KGDB_COMPILED_BREAK, - .cpsr_mask = MODE_MASK, + .cpsr_mask = PSR_T_BIT | MODE_MASK, .cpsr_val = SVC_MODE, .fn = kgdb_compiled_brk_fn }; +static struct undef_hook kgdb_compiled_brkpt_thumb_hook = { + .instr_mask = 0xffff, + .instr_val = KGDB_COMPILED_BREAK & 0xffff, + .cpsr_mask = PSR_T_BIT | MODE_MASK, + .cpsr_val = PSR_T_BIT | SVC_MODE, + .fn = kgdb_compiled_brk_fn +}; + static int __kgdb_notify(struct die_args *args, unsigned long cmd) { struct pt_regs *regs = args->regs; @@ -210,8 +226,10 @@ int kgdb_arch_init(void) if (ret != 0) return ret; - register_undef_hook(&kgdb_brkpt_hook); - register_undef_hook(&kgdb_compiled_brkpt_hook); + register_undef_hook(&kgdb_brkpt_arm_hook); + register_undef_hook(&kgdb_brkpt_thumb_hook); + register_undef_hook(&kgdb_compiled_brkpt_arm_hook); + register_undef_hook(&kgdb_compiled_brkpt_thumb_hook); return 0; } @@ -224,8 +242,10 @@ int kgdb_arch_init(void) */ void kgdb_arch_exit(void) { - unregister_undef_hook(&kgdb_brkpt_hook); - unregister_undef_hook(&kgdb_compiled_brkpt_hook); + unregister_undef_hook(&kgdb_brkpt_arm_hook); + unregister_undef_hook(&kgdb_brkpt_thumb_hook); + unregister_undef_hook(&kgdb_compiled_brkpt_arm_hook); + unregister_undef_hook(&kgdb_compiled_brkpt_thumb_hook); unregister_die_notifier(&kgdb_notifier); } diff --git a/arch/arm/kernel/spectre.c b/arch/arm/kernel/spectre.c new file mode 100644 index 000000000000..0dcefc36fb7a --- /dev/null +++ b/arch/arm/kernel/spectre.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/bpf.h> +#include <linux/cpu.h> +#include <linux/device.h> + +#include <asm/spectre.h> + +static bool _unprivileged_ebpf_enabled(void) +{ +#ifdef CONFIG_BPF_SYSCALL + return !sysctl_unprivileged_bpf_disabled; +#else + return false; +#endif +} + +ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); +} + +static unsigned int spectre_v2_state; +static unsigned int spectre_v2_methods; + +void spectre_v2_update_state(unsigned int state, unsigned int method) +{ + if (state > spectre_v2_state) + spectre_v2_state = state; + spectre_v2_methods |= method; +} + +ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, + char *buf) +{ + const char *method; + + if (spectre_v2_state == SPECTRE_UNAFFECTED) + return sprintf(buf, "%s\n", "Not affected"); + + if (spectre_v2_state != SPECTRE_MITIGATED) + return sprintf(buf, "%s\n", "Vulnerable"); + + if (_unprivileged_ebpf_enabled()) + return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n"); + + switch (spectre_v2_methods) { + case SPECTRE_V2_METHOD_BPIALL: + method = "Branch predictor hardening"; + break; + + case SPECTRE_V2_METHOD_ICIALLU: + method = "I-cache invalidation"; + break; + + case SPECTRE_V2_METHOD_SMC: + case SPECTRE_V2_METHOD_HVC: + method = "Firmware call"; + break; + + case SPECTRE_V2_METHOD_LOOP8: + method = "History overwrite"; + break; + + default: + method = "Multiple mitigations"; + break; + } + + return sprintf(buf, "Mitigation: %s\n", method); +} diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index da04ed85855a..cae4a748811f 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -30,6 +30,7 @@ #include <linux/atomic.h> #include <asm/cacheflush.h> #include <asm/exception.h> +#include <asm/spectre.h> #include <asm/unistd.h> #include <asm/traps.h> #include <asm/ptrace.h> @@ -789,10 +790,59 @@ static inline void __init kuser_init(void *vectors) } #endif +#ifndef CONFIG_CPU_V7M +static void copy_from_lma(void *vma, void *lma_start, void *lma_end) +{ + memcpy(vma, lma_start, lma_end - lma_start); +} + +static void flush_vectors(void *vma, size_t offset, size_t size) +{ + unsigned long start = (unsigned long)vma + offset; + unsigned long end = start + size; + + flush_icache_range(start, end); +} + +#ifdef CONFIG_HARDEN_BRANCH_HISTORY +int spectre_bhb_update_vectors(unsigned int method) +{ + extern char __vectors_bhb_bpiall_start[], __vectors_bhb_bpiall_end[]; + extern char __vectors_bhb_loop8_start[], __vectors_bhb_loop8_end[]; + void *vec_start, *vec_end; + + if (system_state >= SYSTEM_FREEING_INITMEM) { + pr_err("CPU%u: Spectre BHB workaround too late - system vulnerable\n", + smp_processor_id()); + return SPECTRE_VULNERABLE; + } + + switch (method) { + case SPECTRE_V2_METHOD_LOOP8: + vec_start = __vectors_bhb_loop8_start; + vec_end = __vectors_bhb_loop8_end; + break; + + case SPECTRE_V2_METHOD_BPIALL: + vec_start = __vectors_bhb_bpiall_start; + vec_end = __vectors_bhb_bpiall_end; + break; + + default: + pr_err("CPU%u: unknown Spectre BHB state %d\n", + smp_processor_id(), method); + return SPECTRE_VULNERABLE; + } + + copy_from_lma(vectors_page, vec_start, vec_end); + flush_vectors(vectors_page, 0, vec_end - vec_start); + + return SPECTRE_MITIGATED; +} +#endif + void __init early_trap_init(void *vectors_base) { -#ifndef CONFIG_CPU_V7M - unsigned long vectors = (unsigned long)vectors_base; extern char __stubs_start[], __stubs_end[]; extern char __vectors_start[], __vectors_end[]; unsigned i; @@ -813,17 +863,20 @@ void __init early_trap_init(void *vectors_base) * into the vector page, mapped at 0xffff0000, and ensure these * are visible to the instruction stream. */ - memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start); - memcpy((void *)vectors + 0x1000, __stubs_start, __stubs_end - __stubs_start); + copy_from_lma(vectors_base, __vectors_start, __vectors_end); + copy_from_lma(vectors_base + 0x1000, __stubs_start, __stubs_end); kuser_init(vectors_base); - flush_icache_range(vectors, vectors + PAGE_SIZE * 2); + flush_vectors(vectors_base, 0, PAGE_SIZE * 2); +} #else /* ifndef CONFIG_CPU_V7M */ +void __init early_trap_init(void *vectors_base) +{ /* * on V7-M there is no need to copy the vector table to a dedicated * memory area. The address is configurable and so a table in the kernel * image can be used. */ -#endif } +#endif diff --git a/arch/arm/mach-mstar/Kconfig b/arch/arm/mach-mstar/Kconfig index cd300eeedc20..0bf4d312bcfd 100644 --- a/arch/arm/mach-mstar/Kconfig +++ b/arch/arm/mach-mstar/Kconfig @@ -3,6 +3,7 @@ menuconfig ARCH_MSTARV7 depends on ARCH_MULTI_V7 select ARM_GIC select ARM_HEAVY_MB + select HAVE_ARM_ARCH_TIMER select MST_IRQ select MSTAR_MSC313_MPLL help diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 6daaa645ae5d..21413a9b7b6c 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -263,9 +263,9 @@ static int __init omapdss_init_of(void) } r = of_platform_populate(node, NULL, NULL, &pdev->dev); + put_device(&pdev->dev); if (r) { pr_err("Unable to populate DSS submodule devices\n"); - put_device(&pdev->dev); return r; } diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index ccb0e3732c0d..31d1a21f6041 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -752,8 +752,10 @@ static int __init _init_clkctrl_providers(void) for_each_matching_node(np, ti_clkctrl_match_table) { ret = _setup_clkctrl_provider(np); - if (ret) + if (ret) { + of_node_put(np); break; + } } return ret; diff --git a/arch/arm/mach-socfpga/Kconfig b/arch/arm/mach-socfpga/Kconfig index 43ddec677c0b..594edf9bbea4 100644 --- a/arch/arm/mach-socfpga/Kconfig +++ b/arch/arm/mach-socfpga/Kconfig @@ -2,6 +2,7 @@ menuconfig ARCH_INTEL_SOCFPGA bool "Altera SOCFPGA family" depends on ARCH_MULTI_V7 + select ARCH_HAS_RESET_CONTROLLER select ARCH_SUPPORTS_BIG_ENDIAN select ARM_AMBA select ARM_GIC @@ -18,6 +19,7 @@ menuconfig ARCH_INTEL_SOCFPGA select PL310_ERRATA_727915 select PL310_ERRATA_753970 if PL310 select PL310_ERRATA_769419 + select RESET_CONTROLLER if ARCH_INTEL_SOCFPGA config SOCFPGA_SUSPEND diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 58afba346729..9724c16e9076 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -830,6 +830,7 @@ config CPU_BPREDICT_DISABLE config CPU_SPECTRE bool + select GENERIC_CPU_VULNERABILITIES config HARDEN_BRANCH_PREDICTOR bool "Harden the branch predictor against aliasing attacks" if EXPERT @@ -850,6 +851,16 @@ config HARDEN_BRANCH_PREDICTOR If unsure, say Y. +config HARDEN_BRANCH_HISTORY + bool "Harden Spectre style attacks against branch history" if EXPERT + depends on CPU_SPECTRE + default y + help + Speculation attacks against some high-performance processors can + make use of branch history to influence future speculation. When + taking an exception, a sequence of branches overwrites the branch + history, or branch history is invalidated. + config TLS_REG_EMUL bool select NEED_KUSER_HELPERS diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 274e4f73fd33..5e2be37a198e 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -212,12 +212,14 @@ early_param("ecc", early_ecc); static int __init early_cachepolicy(char *p) { pr_warn("cachepolicy kernel parameter not supported without cp15\n"); + return 0; } early_param("cachepolicy", early_cachepolicy); static int __init noalign_setup(char *__unused) { pr_warn("noalign kernel parameter not supported without cp15\n"); + return 1; } __setup("noalign", noalign_setup); diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 114c05ab4dd9..06dbfb968182 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -6,8 +6,35 @@ #include <asm/cp15.h> #include <asm/cputype.h> #include <asm/proc-fns.h> +#include <asm/spectre.h> #include <asm/system_misc.h> +#ifdef CONFIG_ARM_PSCI +static int __maybe_unused spectre_v2_get_cpu_fw_mitigation_state(void) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + + switch ((int)res.a0) { + case SMCCC_RET_SUCCESS: + return SPECTRE_MITIGATED; + + case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED: + return SPECTRE_UNAFFECTED; + + default: + return SPECTRE_VULNERABLE; + } +} +#else +static int __maybe_unused spectre_v2_get_cpu_fw_mitigation_state(void) +{ + return SPECTRE_VULNERABLE; +} +#endif + #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR DEFINE_PER_CPU(harden_branch_predictor_fn_t, harden_branch_predictor_fn); @@ -36,13 +63,61 @@ static void __maybe_unused call_hvc_arch_workaround_1(void) arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static void cpu_v7_spectre_init(void) +static unsigned int spectre_v2_install_workaround(unsigned int method) { const char *spectre_v2_method = NULL; int cpu = smp_processor_id(); if (per_cpu(harden_branch_predictor_fn, cpu)) - return; + return SPECTRE_MITIGATED; + + switch (method) { + case SPECTRE_V2_METHOD_BPIALL: + per_cpu(harden_branch_predictor_fn, cpu) = + harden_branch_predictor_bpiall; + spectre_v2_method = "BPIALL"; + break; + + case SPECTRE_V2_METHOD_ICIALLU: + per_cpu(harden_branch_predictor_fn, cpu) = + harden_branch_predictor_iciallu; + spectre_v2_method = "ICIALLU"; + break; + + case SPECTRE_V2_METHOD_HVC: + per_cpu(harden_branch_predictor_fn, cpu) = + call_hvc_arch_workaround_1; + cpu_do_switch_mm = cpu_v7_hvc_switch_mm; + spectre_v2_method = "hypervisor"; + break; + + case SPECTRE_V2_METHOD_SMC: + per_cpu(harden_branch_predictor_fn, cpu) = + call_smc_arch_workaround_1; + cpu_do_switch_mm = cpu_v7_smc_switch_mm; + spectre_v2_method = "firmware"; + break; + } + + if (spectre_v2_method) + pr_info("CPU%u: Spectre v2: using %s workaround\n", + smp_processor_id(), spectre_v2_method); + + return SPECTRE_MITIGATED; +} +#else +static unsigned int spectre_v2_install_workaround(unsigned int method) +{ + pr_info("CPU%u: Spectre V2: workarounds disabled by configuration\n", + smp_processor_id()); + + return SPECTRE_VULNERABLE; +} +#endif + +static void cpu_v7_spectre_v2_init(void) +{ + unsigned int state, method = 0; switch (read_cpuid_part()) { case ARM_CPU_PART_CORTEX_A8: @@ -51,69 +126,133 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A17: case ARM_CPU_PART_CORTEX_A73: case ARM_CPU_PART_CORTEX_A75: - per_cpu(harden_branch_predictor_fn, cpu) = - harden_branch_predictor_bpiall; - spectre_v2_method = "BPIALL"; + state = SPECTRE_MITIGATED; + method = SPECTRE_V2_METHOD_BPIALL; break; case ARM_CPU_PART_CORTEX_A15: case ARM_CPU_PART_BRAHMA_B15: - per_cpu(harden_branch_predictor_fn, cpu) = - harden_branch_predictor_iciallu; - spectre_v2_method = "ICIALLU"; + state = SPECTRE_MITIGATED; + method = SPECTRE_V2_METHOD_ICIALLU; break; -#ifdef CONFIG_ARM_PSCI case ARM_CPU_PART_BRAHMA_B53: /* Requires no workaround */ + state = SPECTRE_UNAFFECTED; break; + default: /* Other ARM CPUs require no workaround */ - if (read_cpuid_implementor() == ARM_CPU_IMP_ARM) + if (read_cpuid_implementor() == ARM_CPU_IMP_ARM) { + state = SPECTRE_UNAFFECTED; break; + } + fallthrough; - /* Cortex A57/A72 require firmware workaround */ - case ARM_CPU_PART_CORTEX_A57: - case ARM_CPU_PART_CORTEX_A72: { - struct arm_smccc_res res; - arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 != 0) - return; + /* Cortex A57/A72 require firmware workaround */ + case ARM_CPU_PART_CORTEX_A57: + case ARM_CPU_PART_CORTEX_A72: + state = spectre_v2_get_cpu_fw_mitigation_state(); + if (state != SPECTRE_MITIGATED) + break; switch (arm_smccc_1_1_get_conduit()) { case SMCCC_CONDUIT_HVC: - per_cpu(harden_branch_predictor_fn, cpu) = - call_hvc_arch_workaround_1; - cpu_do_switch_mm = cpu_v7_hvc_switch_mm; - spectre_v2_method = "hypervisor"; + method = SPECTRE_V2_METHOD_HVC; break; case SMCCC_CONDUIT_SMC: - per_cpu(harden_branch_predictor_fn, cpu) = - call_smc_arch_workaround_1; - cpu_do_switch_mm = cpu_v7_smc_switch_mm; - spectre_v2_method = "firmware"; + method = SPECTRE_V2_METHOD_SMC; break; default: + state = SPECTRE_VULNERABLE; break; } } -#endif + + if (state == SPECTRE_MITIGATED) + state = spectre_v2_install_workaround(method); + + spectre_v2_update_state(state, method); +} + +#ifdef CONFIG_HARDEN_BRANCH_HISTORY +static int spectre_bhb_method; + +static const char *spectre_bhb_method_name(int method) +{ + switch (method) { + case SPECTRE_V2_METHOD_LOOP8: + return "loop"; + + case SPECTRE_V2_METHOD_BPIALL: + return "BPIALL"; + + default: + return "unknown"; } +} - if (spectre_v2_method) - pr_info("CPU%u: Spectre v2: using %s workaround\n", - smp_processor_id(), spectre_v2_method); +static int spectre_bhb_install_workaround(int method) +{ + if (spectre_bhb_method != method) { + if (spectre_bhb_method) { + pr_err("CPU%u: Spectre BHB: method disagreement, system vulnerable\n", + smp_processor_id()); + + return SPECTRE_VULNERABLE; + } + + if (spectre_bhb_update_vectors(method) == SPECTRE_VULNERABLE) + return SPECTRE_VULNERABLE; + + spectre_bhb_method = method; + } + + pr_info("CPU%u: Spectre BHB: using %s workaround\n", + smp_processor_id(), spectre_bhb_method_name(method)); + + return SPECTRE_MITIGATED; } #else -static void cpu_v7_spectre_init(void) +static int spectre_bhb_install_workaround(int method) { + return SPECTRE_VULNERABLE; } #endif +static void cpu_v7_spectre_bhb_init(void) +{ + unsigned int state, method = 0; + + switch (read_cpuid_part()) { + case ARM_CPU_PART_CORTEX_A15: + case ARM_CPU_PART_BRAHMA_B15: + case ARM_CPU_PART_CORTEX_A57: + case ARM_CPU_PART_CORTEX_A72: + state = SPECTRE_MITIGATED; + method = SPECTRE_V2_METHOD_LOOP8; + break; + + case ARM_CPU_PART_CORTEX_A73: + case ARM_CPU_PART_CORTEX_A75: + state = SPECTRE_MITIGATED; + method = SPECTRE_V2_METHOD_BPIALL; + break; + + default: + state = SPECTRE_UNAFFECTED; + break; + } + + if (state == SPECTRE_MITIGATED) + state = spectre_bhb_install_workaround(method); + + spectre_v2_update_state(state, method); +} + static __maybe_unused bool cpu_v7_check_auxcr_set(bool *warned, u32 mask, const char *msg) { @@ -142,16 +281,17 @@ static bool check_spectre_auxcr(bool *warned, u32 bit) void cpu_v7_ca8_ibe(void) { if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(6))) - cpu_v7_spectre_init(); + cpu_v7_spectre_v2_init(); } void cpu_v7_ca15_ibe(void) { if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0))) - cpu_v7_spectre_init(); + cpu_v7_spectre_v2_init(); } void cpu_v7_bugs_init(void) { - cpu_v7_spectre_init(); + cpu_v7_spectre_v2_init(); + cpu_v7_spectre_bhb_init(); } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f2b5a4abef21..c842878f8133 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -672,6 +672,7 @@ config ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE config ARM64_ERRATUM_2051678 bool "Cortex-A510: 2051678: disable Hardware Update of the page table dirty bit" + default y help This options adds the workaround for ARM Cortex-A510 erratum ARM64_ERRATUM_2051678. Affected Coretex-A510 might not respect the ordering rules for @@ -680,6 +681,22 @@ config ARM64_ERRATUM_2051678 If unsure, say Y. +config ARM64_ERRATUM_2077057 + bool "Cortex-A510: 2077057: workaround software-step corrupting SPSR_EL2" + help + This option adds the workaround for ARM Cortex-A510 erratum 2077057. + Affected Cortex-A510 may corrupt SPSR_EL2 when the a step exception is + expected, but a Pointer Authentication trap is taken instead. The + erratum causes SPSR_EL1 to be copied to SPSR_EL2, which could allow + EL1 to cause a return to EL2 with a guest controlled ELR_EL2. + + This can only happen when EL2 is stepping EL1. + + When these conditions occur, the SPSR_EL2 value is unchanged from the + previous guest entry, and can be restored from the in-memory copy. + + If unsure, say Y. + config ARM64_ERRATUM_2119858 bool "Cortex-A710/X2: 2119858: workaround TRBE overwriting trace data in FILL mode" default y @@ -1235,9 +1252,6 @@ config HW_PERF_EVENTS def_bool y depends on ARM_PMU -config ARCH_HAS_FILTER_PGPROT - def_bool y - # Supported by clang >= 7.0 config CC_HAVE_SHADOW_CALL_STACK def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18) @@ -1366,6 +1380,15 @@ config UNMAP_KERNEL_AT_EL0 If unsure, say Y. +config MITIGATE_SPECTRE_BRANCH_HISTORY + bool "Mitigate Spectre style attacks against branch history" if EXPERT + default y + help + Speculation attacks against some high-performance processors can + make use of branch history to influence future speculation. + When taking an exception from user-space, a sequence of branches + or a firmware call overwrites the branch history. + config RODATA_FULL_DEFAULT_ENABLED bool "Apply r/o permissions of VM areas also to their linear aliases" default y diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 7d5d58800170..21697449d762 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -309,9 +309,6 @@ config ARCH_VISCONTI help This enables support for Toshiba Visconti SoCs Family. -config ARCH_VULCAN - def_bool n - config ARCH_XGENE bool "AppliedMicro X-Gene SOC Family" help diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi index 517519e6e87f..f84d4b489e0b 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi @@ -107,6 +107,12 @@ no-map; }; + /* 32 MiB reserved for ARM Trusted Firmware (BL32) */ + secmon_reserved_bl32: secmon@5300000 { + reg = <0x0 0x05300000 0x0 0x2000000>; + no-map; + }; + linux,cma { compatible = "shared-dma-pool"; reusable; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts index d8838dde0f0f..4fb31c2ba31c 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts +++ b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts @@ -157,14 +157,6 @@ regulator-always-on; }; - reserved-memory { - /* TEE Reserved Memory */ - bl32_reserved: bl32@5000000 { - reg = <0x0 0x05300000 0x0 0x2000000>; - no-map; - }; - }; - sdio_pwrseq: sdio-pwrseq { compatible = "mmc-pwrseq-simple"; reset-gpios = <&gpio GPIOX_6 GPIO_ACTIVE_LOW>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi index 3e968b244191..fd3fa82e4c33 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi @@ -17,7 +17,7 @@ rtc1 = &vrtc; }; - dioo2133: audio-amplifier-0 { + dio2133: audio-amplifier-0 { compatible = "simple-audio-amplifier"; enable-gpios = <&gpio_ao GPIOAO_2 GPIO_ACTIVE_HIGH>; VCC-supply = <&vcc_5v>; @@ -219,7 +219,7 @@ audio-widgets = "Line", "Lineout"; audio-aux-devs = <&tdmout_b>, <&tdmout_c>, <&tdmin_a>, <&tdmin_b>, <&tdmin_c>, <&tdmin_lb>, - <&dioo2133>; + <&dio2133>; audio-routing = "TDMOUT_B IN 0", "FRDDR_A OUT 1", "TDMOUT_B IN 1", "FRDDR_B OUT 1", "TDMOUT_B IN 2", "FRDDR_C OUT 1", diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index 6b457b2c30a4..aa14ea017a61 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -49,6 +49,12 @@ no-map; }; + /* 32 MiB reserved for ARM Trusted Firmware (BL32) */ + secmon_reserved_bl32: secmon@5300000 { + reg = <0x0 0x05300000 0x0 0x2000000>; + no-map; + }; + linux,cma { compatible = "shared-dma-pool"; reusable; diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts index 212c6aa5a3b8..5751c48620ed 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts +++ b/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts @@ -123,7 +123,7 @@ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; - enable-gpio = <&gpio GPIOE_2 GPIO_ACTIVE_HIGH>; + enable-gpio = <&gpio_ao GPIOE_2 GPIO_ACTIVE_HIGH>; enable-active-high; regulator-always-on; diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi index 0bd1e98a0eef..ddb1b345397f 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi @@ -48,7 +48,7 @@ regulator-max-microvolt = <3300000>; vin-supply = <&vcc_5v>; - enable-gpio = <&gpio GPIOE_2 GPIO_ACTIVE_HIGH>; + enable-gpio = <&gpio_ao GPIOE_2 GPIO_OPEN_DRAIN>; enable-active-high; regulator-always-on; diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts index 427475846fc7..a5d79f2f7c19 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts +++ b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts @@ -203,14 +203,6 @@ regulator-always-on; }; - reserved-memory { - /* TEE Reserved Memory */ - bl32_reserved: bl32@5000000 { - reg = <0x0 0x05300000 0x0 0x2000000>; - no-map; - }; - }; - sdio_pwrseq: sdio-pwrseq { compatible = "mmc-pwrseq-simple"; reset-gpios = <&gpio GPIOX_6 GPIO_ACTIVE_LOW>; diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi b/arch/arm64/boot/dts/arm/juno-base.dtsi index 6288e104a089..a2635b14da30 100644 --- a/arch/arm64/boot/dts/arm/juno-base.dtsi +++ b/arch/arm64/boot/dts/arm/juno-base.dtsi @@ -543,8 +543,7 @@ <0x02000000 0x00 0x50000000 0x00 0x50000000 0x0 0x08000000>, <0x42000000 0x40 0x00000000 0x40 0x00000000 0x1 0x00000000>; /* Standard AXI Translation entries as programmed by EDK2 */ - dma-ranges = <0x02000000 0x0 0x2c1c0000 0x0 0x2c1c0000 0x0 0x00040000>, - <0x02000000 0x0 0x80000000 0x0 0x80000000 0x0 0x80000000>, + dma-ranges = <0x02000000 0x0 0x80000000 0x0 0x80000000 0x0 0x80000000>, <0x43000000 0x8 0x00000000 0x8 0x00000000 0x2 0x00000000>; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 7>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts index d74e738e4070..c03f4e183389 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts @@ -157,6 +157,10 @@ }; }; +&ftm_alarm0 { + status = "okay"; +}; + &gpio1 { gpio-line-names = "", "", "", "", "", "", "", "", diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index f77f90ed416f..0c7a72c51a31 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -707,7 +707,6 @@ clocks = <&clk IMX8MM_CLK_VPU_DEC_ROOT>; assigned-clocks = <&clk IMX8MM_CLK_VPU_BUS>; assigned-clock-parents = <&clk IMX8MM_SYS_PLL1_800M>; - resets = <&src IMX8MQ_RESET_VPU_RESET>; }; pgc_vpu_g1: power-domain@7 { diff --git a/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi index f3e3418f7edc..2d4a472af6a9 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi @@ -1115,8 +1115,8 @@ status = "okay"; ports { - port@1 { - reg = <1>; + port@0 { + reg = <0>; mipi1_sensor_ep: endpoint { remote-endpoint = <&camera1_ep>; diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 2df2510d0118..e92ebb6147e6 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -554,7 +554,7 @@ assigned-clock-rates = <0>, <0>, <0>, <594000000>; status = "disabled"; - port@0 { + port { lcdif_mipi_dsi: endpoint { remote-endpoint = <&mipi_dsi_lcdif_in>; }; @@ -1151,8 +1151,8 @@ #address-cells = <1>; #size-cells = <0>; - port@0 { - reg = <0>; + port@1 { + reg = <1>; csi1_mipi_ep: endpoint { remote-endpoint = <&csi1_ep>; @@ -1203,8 +1203,8 @@ #address-cells = <1>; #size-cells = <0>; - port@0 { - reg = <0>; + port@1 { + reg = <1>; csi2_mipi_ep: endpoint { remote-endpoint = <&csi2_ep>; diff --git a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi index a987ff7156bd..09f7364dd1d0 100644 --- a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi @@ -132,7 +132,7 @@ scmi_sensor: protocol@15 { reg = <0x15>; - #thermal-sensor-cells = <0>; + #thermal-sensor-cells = <1>; }; }; }; diff --git a/arch/arm64/boot/dts/freescale/mba8mx.dtsi b/arch/arm64/boot/dts/freescale/mba8mx.dtsi index f27e3c8de916..ce6d5bdba0a8 100644 --- a/arch/arm64/boot/dts/freescale/mba8mx.dtsi +++ b/arch/arm64/boot/dts/freescale/mba8mx.dtsi @@ -91,7 +91,7 @@ sound { compatible = "fsl,imx-audio-tlv320aic32x4"; - model = "tqm-tlv320aic32"; + model = "imx-audio-tlv320aic32x4"; ssi-controller = <&sai3>; audio-codec = <&tlv320aic3x04>; }; diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi index 0dd2d2ee765a..f4270cf18996 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi +++ b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi @@ -502,7 +502,7 @@ }; usb0: usb@ffb00000 { - compatible = "snps,dwc2"; + compatible = "intel,socfpga-agilex-hsotg", "snps,dwc2"; reg = <0xffb00000 0x40000>; interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>; phys = <&usbphy0>; @@ -515,7 +515,7 @@ }; usb1: usb@ffb40000 { - compatible = "snps,dwc2"; + compatible = "intel,socfpga-agilex-hsotg", "snps,dwc2"; reg = <0xffb40000 0x40000>; interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>; phys = <&usbphy0>; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts index 04da07ae4420..1cee26479bfe 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts @@ -18,6 +18,7 @@ aliases { spi0 = &spi0; + ethernet0 = ð0; ethernet1 = ð1; mmc0 = &sdhci0; mmc1 = &sdhci1; @@ -138,7 +139,9 @@ /* * U-Boot port for Turris Mox has a bug which always expects that "ranges" DT property * contains exactly 2 ranges with 3 (child) address cells, 2 (parent) address cells and - * 2 size cells and also expects that the second range starts at 16 MB offset. If these + * 2 size cells and also expects that the second range starts at 16 MB offset. Also it + * expects that first range uses same address for PCI (child) and CPU (parent) cells (so + * no remapping) and that this address is the lowest from all specified ranges. If these * conditions are not met then U-Boot crashes during loading kernel DTB file. PCIe address * space is 128 MB long, so the best split between MEM and IO is to use fixed 16 MB window * for IO and the rest 112 MB (64+32+16) for MEM, despite that maximal IO size is just 64 kB. @@ -147,6 +150,9 @@ * https://source.denx.de/u-boot/u-boot/-/commit/cb2ddb291ee6fcbddd6d8f4ff49089dfe580f5d7 * https://source.denx.de/u-boot/u-boot/-/commit/c64ac3b3185aeb3846297ad7391fc6df8ecd73bf * https://source.denx.de/u-boot/u-boot/-/commit/4a82fca8e330157081fc132a591ebd99ba02ee33 + * Bug related to requirement of same child and parent addresses for first range is fixed + * in U-Boot version 2022.04 by following commit: + * https://source.denx.de/u-boot/u-boot/-/commit/1fd54253bca7d43d046bba4853fe5fafd034bc17 */ #address-cells = <3>; #size-cells = <2>; diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi index 673f4906eef9..fb78ef613b29 100644 --- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi @@ -499,7 +499,7 @@ * (totaling 127 MiB) for MEM. */ ranges = <0x82000000 0 0xe8000000 0 0xe8000000 0 0x07f00000 /* Port 0 MEM */ - 0x81000000 0 0xefff0000 0 0xefff0000 0 0x00010000>; /* Port 0 IO */ + 0x81000000 0 0x00000000 0 0xefff0000 0 0x00010000>; /* Port 0 IO */ interrupt-map-mask = <0 0 0 7>; interrupt-map = <0 0 0 1 &pcie_intc 0>, <0 0 0 2 &pcie_intc 1>, diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index 2d48c3715fc6..aaa00da5351d 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -1584,7 +1584,7 @@ #iommu-cells = <1>; nvidia,memory-controller = <&mc>; - status = "okay"; + status = "disabled"; }; smmu: iommu@12000000 { diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts index 58845a14805f..e2b9ec134cb1 100644 --- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts +++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts @@ -807,3 +807,8 @@ qcom,snoc-host-cap-8bit-quirk; }; + +&crypto { + /* FIXME: qce_start triggers an SError */ + status= "disable"; +}; diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi index 53b39e718fb6..4b19744bcfb3 100644 --- a/arch/arm64/boot/dts/qcom/sm8350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi @@ -35,6 +35,24 @@ clock-frequency = <32000>; #clock-cells = <0>; }; + + ufs_phy_rx_symbol_0_clk: ufs-phy-rx-symbol-0 { + compatible = "fixed-clock"; + clock-frequency = <1000>; + #clock-cells = <0>; + }; + + ufs_phy_rx_symbol_1_clk: ufs-phy-rx-symbol-1 { + compatible = "fixed-clock"; + clock-frequency = <1000>; + #clock-cells = <0>; + }; + + ufs_phy_tx_symbol_0_clk: ufs-phy-tx-symbol-0 { + compatible = "fixed-clock"; + clock-frequency = <1000>; + #clock-cells = <0>; + }; }; cpus { @@ -603,9 +621,9 @@ <0>, <0>, <0>, - <0>, - <0>, - <0>, + <&ufs_phy_rx_symbol_0_clk>, + <&ufs_phy_rx_symbol_1_clk>, + <&ufs_phy_tx_symbol_0_clk>, <0>, <0>; }; @@ -1923,8 +1941,8 @@ <75000000 300000000>, <0 0>, <0 0>, - <75000000 300000000>, - <75000000 300000000>; + <0 0>, + <0 0>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index 10c25ad2d0c7..02b97e838c47 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -726,7 +726,7 @@ compatible = "qcom,sm8450-smmu-500", "arm,mmu-500"; reg = <0 0x15000000 0 0x100000>; #iommu-cells = <2>; - #global-interrupts = <2>; + #global-interrupts = <1>; interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>, @@ -813,6 +813,7 @@ <GIC_SPI 412 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 421 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 707 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 423 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 424 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 425 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 690 IRQ_TYPE_LEVEL_HIGH>, @@ -1072,9 +1073,10 @@ <&gcc GCC_USB30_PRIM_MASTER_CLK>, <&gcc GCC_AGGRE_USB3_PRIM_AXI_CLK>, <&gcc GCC_USB30_PRIM_MOCK_UTMI_CLK>, - <&gcc GCC_USB30_PRIM_SLEEP_CLK>; + <&gcc GCC_USB30_PRIM_SLEEP_CLK>, + <&gcc GCC_USB3_0_CLKREF_EN>; clock-names = "cfg_noc", "core", "iface", "mock_utmi", - "sleep"; + "sleep", "xo"; assigned-clocks = <&gcc GCC_USB30_PRIM_MOCK_UTMI_CLK>, <&gcc GCC_USB30_PRIM_MASTER_CLK>; diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi index f972704dfe7a..56dfbb2e2fa6 100644 --- a/arch/arm64/boot/dts/rockchip/px30.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30.dtsi @@ -711,7 +711,7 @@ clock-names = "pclk", "timer"; }; - dmac: dmac@ff240000 { + dmac: dma-controller@ff240000 { compatible = "arm,pl330", "arm,primecell"; reg = <0x0 0xff240000 0x0 0x4000>; interrupts = <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 39db0b85b4da..b822533dc7f1 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -489,7 +489,7 @@ status = "disabled"; }; - dmac: dmac@ff1f0000 { + dmac: dma-controller@ff1f0000 { compatible = "arm,pl330", "arm,primecell"; reg = <0x0 0xff1f0000 0x0 0x4000>; interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi index 45a5ae5d2027..162f08bca0d4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi @@ -286,7 +286,7 @@ sound: sound { compatible = "rockchip,rk3399-gru-sound"; - rockchip,cpu = <&i2s0 &i2s2>; + rockchip,cpu = <&i2s0 &spdif>; }; }; @@ -437,10 +437,6 @@ ap_i2c_audio: &i2c8 { status = "okay"; }; -&i2s2 { - status = "okay"; -}; - &io_domains { status = "okay"; @@ -537,6 +533,17 @@ ap_i2c_audio: &i2c8 { vqmmc-supply = <&ppvar_sd_card_io>; }; +&spdif { + status = "okay"; + + /* + * SPDIF is routed internally to DP; we either don't use these pins, or + * mux them to something else. + */ + /delete-property/ pinctrl-0; + /delete-property/ pinctrl-names; +}; + &spi1 { status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts index 292bb7e80cf3..3ae5d727e367 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts @@ -232,6 +232,7 @@ &usbdrd_dwc3_0 { dr_mode = "otg"; + extcon = <&extcon_usb3>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index fb67db4619ea..08fa00364b42 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -25,6 +25,13 @@ }; }; + extcon_usb3: extcon-usb3 { + compatible = "linux,extcon-usb-gpio"; + id-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_HIGH>; + pinctrl-names = "default"; + pinctrl-0 = <&usb3_id>; + }; + clkin_gmac: external-gmac-clock { compatible = "fixed-clock"; clock-frequency = <125000000>; @@ -422,9 +429,22 @@ <4 RK_PA3 RK_FUNC_GPIO &pcfg_pull_none>; }; }; + + usb3 { + usb3_id: usb3-id { + rockchip,pins = + <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>; + }; + }; }; &sdhci { + /* + * Signal integrity isn't great at 200MHz but 100MHz has proven stable + * enough. + */ + max-frequency = <100000000>; + bus-width = <8>; mmc-hs400-1_8v; mmc-hs400-enhanced-strobe; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index d3cdf6f42a30..080457a68e3c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1881,10 +1881,10 @@ interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH 0>; clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_SFR>, - <&cru PLL_VPLL>, + <&cru SCLK_HDMI_CEC>, <&cru PCLK_VIO_GRF>, - <&cru SCLK_HDMI_CEC>; - clock-names = "iahb", "isfr", "vpll", "grf", "cec"; + <&cru PLL_VPLL>; + clock-names = "iahb", "isfr", "cec", "grf", "vpll"; power-domains = <&power RK3399_PD_HDCP>; reg-io-width = <4>; rockchip,grf = <&grf>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts index 166399b7f13f..d9eb92d59099 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts @@ -285,8 +285,6 @@ vcc_ddr: DCDC_REG3 { regulator-always-on; regulator-boot-on; - regulator-min-microvolt = <1100000>; - regulator-max-microvolt = <1100000>; regulator-initial-mode = <0x2>; regulator-name = "vcc_ddr"; regulator-state-mem { diff --git a/arch/arm64/boot/dts/rockchip/rk3568.dtsi b/arch/arm64/boot/dts/rockchip/rk3568.dtsi index 2fd313a295f8..d91df1cde736 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3568.dtsi @@ -32,13 +32,11 @@ clocks = <&cru SCLK_GMAC0>, <&cru SCLK_GMAC0_RX_TX>, <&cru SCLK_GMAC0_RX_TX>, <&cru CLK_MAC0_REFOUT>, <&cru ACLK_GMAC0>, <&cru PCLK_GMAC0>, - <&cru SCLK_GMAC0_RX_TX>, <&cru CLK_GMAC0_PTP_REF>, - <&cru PCLK_XPCS>; + <&cru SCLK_GMAC0_RX_TX>, <&cru CLK_GMAC0_PTP_REF>; clock-names = "stmmaceth", "mac_clk_rx", "mac_clk_tx", "clk_mac_refout", "aclk_mac", "pclk_mac", - "clk_mac_speed", "ptp_ref", - "pclk_xpcs"; + "clk_mac_speed", "ptp_ref"; resets = <&cru SRST_A_GMAC0>; reset-names = "stmmaceth"; rockchip,grf = <&grf>; diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi index a68033a23975..8ccce54ee8e7 100644 --- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi @@ -651,7 +651,7 @@ status = "disabled"; }; - dmac0: dmac@fe530000 { + dmac0: dma-controller@fe530000 { compatible = "arm,pl330", "arm,primecell"; reg = <0x0 0xfe530000 0x0 0x4000>; interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>, @@ -662,7 +662,7 @@ #dma-cells = <1>; }; - dmac1: dmac@fe550000 { + dmac1: dma-controller@fe550000 { compatible = "arm,pl330", "arm,primecell"; reg = <0x0 0xfe550000 0x0 0x4000>; interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts b/arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts index a5a24f9f46c5..b210cc07c539 100644 --- a/arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts +++ b/arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts @@ -15,8 +15,18 @@ model = "Texas Instruments J721S2 EVM"; chosen { - stdout-path = "serial10:115200n8"; - bootargs = "console=ttyS10,115200n8 earlycon=ns16550a,mmio32,2880000"; + stdout-path = "serial2:115200n8"; + bootargs = "console=ttyS2,115200n8 earlycon=ns16550a,mmio32,2880000"; + }; + + aliases { + serial1 = &mcu_uart0; + serial2 = &main_uart8; + mmc0 = &main_sdhci0; + mmc1 = &main_sdhci1; + can0 = &main_mcan16; + can1 = &mcu_mcan0; + can2 = &mcu_mcan1; }; evm_12v0: fixedregulator-evm12v0 { diff --git a/arch/arm64/boot/dts/ti/k3-j721s2.dtsi b/arch/arm64/boot/dts/ti/k3-j721s2.dtsi index 80d3cae03e88..fe5234c40f6c 100644 --- a/arch/arm64/boot/dts/ti/k3-j721s2.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721s2.dtsi @@ -21,28 +21,6 @@ #address-cells = <2>; #size-cells = <2>; - aliases { - serial0 = &wkup_uart0; - serial1 = &mcu_uart0; - serial2 = &main_uart0; - serial3 = &main_uart1; - serial4 = &main_uart2; - serial5 = &main_uart3; - serial6 = &main_uart4; - serial7 = &main_uart5; - serial8 = &main_uart6; - serial9 = &main_uart7; - serial10 = &main_uart8; - serial11 = &main_uart9; - mmc0 = &main_sdhci0; - mmc1 = &main_sdhci1; - can0 = &main_mcan16; - can1 = &mcu_mcan0; - can2 = &mcu_mcan1; - can3 = &main_mcan3; - can4 = &main_mcan5; - }; - chosen { }; cpus { diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index e8bd0af0141c..6ebdc0f834a7 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -109,6 +109,13 @@ .endm /* + * Clear Branch History instruction + */ + .macro clearbhb + hint #22 + .endm + +/* * Speculation barrier */ .macro sb @@ -850,4 +857,50 @@ alternative_endif #endif /* GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT */ + .macro __mitigate_spectre_bhb_loop tmp +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY +alternative_cb spectre_bhb_patch_loop_iter + mov \tmp, #32 // Patched to correct the immediate +alternative_cb_end +.Lspectre_bhb_loop\@: + b . + 4 + subs \tmp, \tmp, #1 + b.ne .Lspectre_bhb_loop\@ + sb +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + .endm + + .macro mitigate_spectre_bhb_loop tmp +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY +alternative_cb spectre_bhb_patch_loop_mitigation_enable + b .L_spectre_bhb_loop_done\@ // Patched to NOP +alternative_cb_end + __mitigate_spectre_bhb_loop \tmp +.L_spectre_bhb_loop_done\@: +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + .endm + + /* Save/restores x0-x3 to the stack */ + .macro __mitigate_spectre_bhb_fw +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + stp x0, x1, [sp, #-16]! + stp x2, x3, [sp, #-16]! + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_3 +alternative_cb smccc_patch_fw_mitigation_conduit + nop // Patched to SMC/HVC #0 +alternative_cb_end + ldp x2, x3, [sp], #16 + ldp x0, x1, [sp], #16 +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + .endm + + .macro mitigate_spectre_bhb_clear_insn +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY +alternative_cb spectre_bhb_patch_clearbhb + /* Patched to NOP when not supported */ + clearbhb + isb +alternative_cb_end +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + .endm #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index ef6be92b1921..a77b5f49b3a6 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -637,6 +637,35 @@ static inline bool cpu_supports_mixed_endian_el0(void) return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); } + +static inline bool supports_csv2p3(int scope) +{ + u64 pfr0; + u8 csv2_val; + + if (scope == SCOPE_LOCAL_CPU) + pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1); + else + pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + + csv2_val = cpuid_feature_extract_unsigned_field(pfr0, + ID_AA64PFR0_CSV2_SHIFT); + return csv2_val == 3; +} + +static inline bool supports_clearbhb(int scope) +{ + u64 isar2; + + if (scope == SCOPE_LOCAL_CPU) + isar2 = read_sysreg_s(SYS_ID_AA64ISAR2_EL1); + else + isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); + + return cpuid_feature_extract_unsigned_field(isar2, + ID_AA64ISAR2_CLEARBHB_SHIFT); +} + const struct cpumask *system_32bit_el0_cpumask(void); DECLARE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0); diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 999b9149f856..bfbf0c4c7c5e 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -73,10 +73,14 @@ #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_NEOVERSE_V1 0xD40 +#define ARM_CPU_PART_CORTEX_A78 0xD41 +#define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 #define ARM_CPU_PART_CORTEX_A710 0xD47 #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 +#define ARM_CPU_PART_CORTEX_A78C 0xD4B #define APM_CPU_PART_POTENZA 0x000 @@ -117,10 +121,14 @@ #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) +#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) +#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) +#define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 3198acb2aad8..7f3c87f7a0ce 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -106,7 +106,7 @@ msr_s SYS_ICC_SRE_EL2, x0 isb // Make sure SRE is now set mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, - tbz x0, #0, 1f // and check that it sticks + tbz x0, #0, .Lskip_gicv3_\@ // and check that it sticks msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults .Lskip_gicv3_\@: .endm diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 4335800201c9..daff882883f9 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -62,9 +62,11 @@ enum fixed_addresses { #endif /* CONFIG_ACPI_APEI_GHES */ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + FIX_ENTRY_TRAMP_TEXT3, + FIX_ENTRY_TRAMP_TEXT2, + FIX_ENTRY_TRAMP_TEXT1, FIX_ENTRY_TRAMP_DATA, - FIX_ENTRY_TRAMP_TEXT, -#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT)) +#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT1)) #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ __end_of_permanent_fixed_addresses, diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 6b776c8667b2..b02f0c328c8e 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -65,6 +65,7 @@ enum aarch64_insn_hint_cr_op { AARCH64_INSN_HINT_PSB = 0x11 << 5, AARCH64_INSN_HINT_TSB = 0x12 << 5, AARCH64_INSN_HINT_CSDB = 0x14 << 5, + AARCH64_INSN_HINT_CLEARBHB = 0x16 << 5, AARCH64_INSN_HINT_BTI = 0x20 << 5, AARCH64_INSN_HINT_BTIC = 0x22 << 5, diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5bc01e62c08a..031e3a2537fc 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -714,6 +714,11 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr(); } +static inline bool kvm_system_needs_idmapped_vectors(void) +{ + return cpus_have_const_cap(ARM64_SPECTRE_V3A); +} + void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu); static inline void kvm_arch_hardware_unsetup(void) {} diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index b77e9b3f5371..43f8c25b3fda 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -39,28 +39,4 @@ SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ bti c ; -/* - * Annotate a function as position independent, i.e., safe to be called before - * the kernel virtual mapping is activated. - */ -#define SYM_FUNC_START_PI(x) \ - SYM_FUNC_START_ALIAS(__pi_##x); \ - SYM_FUNC_START(x) - -#define SYM_FUNC_START_WEAK_PI(x) \ - SYM_FUNC_START_ALIAS(__pi_##x); \ - SYM_FUNC_START_WEAK(x) - -#define SYM_FUNC_START_WEAK_ALIAS_PI(x) \ - SYM_FUNC_START_ALIAS(__pi_##x); \ - SYM_START(x, SYM_L_WEAK, SYM_A_ALIGN) - -#define SYM_FUNC_END_PI(x) \ - SYM_FUNC_END(x); \ - SYM_FUNC_END_ALIAS(__pi_##x) - -#define SYM_FUNC_END_ALIAS_PI(x) \ - SYM_FUNC_END_ALIAS(x); \ - SYM_FUNC_END_ALIAS(__pi_##x) - #endif diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h index a11ccadd47d2..094701ec5500 100644 --- a/arch/arm64/include/asm/module.lds.h +++ b/arch/arm64/include/asm/module.lds.h @@ -1,8 +1,8 @@ SECTIONS { #ifdef CONFIG_ARM64_MODULE_PLTS - .plt 0 (NOLOAD) : { BYTE(0) } - .init.plt 0 (NOLOAD) : { BYTE(0) } - .text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) } + .plt 0 : { BYTE(0) } + .init.plt 0 : { BYTE(0) } + .text.ftrace_trampoline 0 : { BYTE(0) } #endif #ifdef CONFIG_KASAN_SW_TAGS diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h index e4704a403237..a857bcacf0fe 100644 --- a/arch/arm64/include/asm/mte-kasan.h +++ b/arch/arm64/include/asm/mte-kasan.h @@ -5,6 +5,7 @@ #ifndef __ASM_MTE_KASAN_H #define __ASM_MTE_KASAN_H +#include <asm/compiler.h> #include <asm/mte-def.h> #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 7032f04c8ac6..b1e1b74d993c 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -92,7 +92,7 @@ extern bool arm64_use_ng_mappings; #define __P001 PAGE_READONLY #define __P010 PAGE_READONLY #define __P011 PAGE_READONLY -#define __P100 PAGE_EXECONLY +#define __P100 PAGE_READONLY_EXEC /* PAGE_EXECONLY if Enhanced PAN */ #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_READONLY_EXEC #define __P111 PAGE_READONLY_EXEC @@ -101,7 +101,7 @@ extern bool arm64_use_ng_mappings; #define __S001 PAGE_READONLY #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED -#define __S100 PAGE_EXECONLY +#define __S100 PAGE_READONLY_EXEC /* PAGE_EXECONLY if Enhanced PAN */ #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c4ba047a82d2..94e147e5456c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1017,17 +1017,6 @@ static inline bool arch_wants_old_prefaulted_pte(void) } #define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte -static inline pgprot_t arch_filter_pgprot(pgprot_t prot) -{ - if (cpus_have_const_cap(ARM64_HAS_EPAN)) - return prot; - - if (pgprot_val(prot) != pgprot_val(PAGE_EXECONLY)) - return prot; - - return PAGE_READONLY_EXEC; -} - static inline bool pud_sect_supported(void) { return PAGE_SIZE == SZ_4K; diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h index 1bce62fa908a..56f7b1d4d54b 100644 --- a/arch/arm64/include/asm/rwonce.h +++ b/arch/arm64/include/asm/rwonce.h @@ -5,7 +5,7 @@ #ifndef __ASM_RWONCE_H #define __ASM_RWONCE_H -#ifdef CONFIG_LTO +#if defined(CONFIG_LTO) && !defined(__ASSEMBLY__) #include <linux/compiler_types.h> #include <asm/alternative-macros.h> @@ -66,7 +66,7 @@ }) #endif /* !BUILD_VDSO */ -#endif /* CONFIG_LTO */ +#endif /* CONFIG_LTO && !__ASSEMBLY__ */ #include <asm-generic/rwonce.h> diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index 152cb35bf9df..40971ac1303f 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -23,4 +23,9 @@ extern char __mmuoff_data_start[], __mmuoff_data_end[]; extern char __entry_tramp_text_start[], __entry_tramp_text_end[]; extern char __relocate_new_kernel_start[], __relocate_new_kernel_end[]; +static inline size_t entry_tramp_text_size(void) +{ + return __entry_tramp_text_end - __entry_tramp_text_start; +} + #endif /* __ASM_SECTIONS_H */ diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index f62ca39da6c5..86e0cc9b9c68 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -93,5 +93,9 @@ void spectre_v4_enable_task_mitigation(struct task_struct *tsk); enum mitigation_state arm64_get_meltdown_state(void); +enum mitigation_state arm64_get_spectre_bhb_state(void); +bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope); +u8 spectre_bhb_loop_affected(int scope); +void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); #endif /* __ASSEMBLY__ */ #endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 898bee0004ae..932d45b17877 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -773,6 +773,7 @@ #define ID_AA64ISAR1_GPI_IMP_DEF 0x1 /* id_aa64isar2 */ +#define ID_AA64ISAR2_CLEARBHB_SHIFT 28 #define ID_AA64ISAR2_RPRES_SHIFT 4 #define ID_AA64ISAR2_WFXT_SHIFT 0 @@ -904,6 +905,7 @@ #endif /* id_aa64mmfr1 */ +#define ID_AA64MMFR1_ECBHB_SHIFT 60 #define ID_AA64MMFR1_AFP_SHIFT 44 #define ID_AA64MMFR1_ETS_SHIFT 36 #define ID_AA64MMFR1_TWED_SHIFT 32 diff --git a/arch/arm64/include/asm/vectors.h b/arch/arm64/include/asm/vectors.h new file mode 100644 index 000000000000..f64613a96d53 --- /dev/null +++ b/arch/arm64/include/asm/vectors.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 ARM Ltd. + */ +#ifndef __ASM_VECTORS_H +#define __ASM_VECTORS_H + +#include <linux/bug.h> +#include <linux/percpu.h> + +#include <asm/fixmap.h> + +extern char vectors[]; +extern char tramp_vectors[]; +extern char __bp_harden_el1_vectors[]; + +/* + * Note: the order of this enum corresponds to two arrays in entry.S: + * tramp_vecs and __bp_harden_el1_vectors. By default the canonical + * 'full fat' vectors are used directly. + */ +enum arm64_bp_harden_el1_vectors { +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + /* + * Perform the BHB loop mitigation, before branching to the canonical + * vectors. + */ + EL1_VECTOR_BHB_LOOP, + + /* + * Make the SMC call for firmware mitigation, before branching to the + * canonical vectors. + */ + EL1_VECTOR_BHB_FW, + + /* + * Use the ClearBHB instruction, before branching to the canonical + * vectors. + */ + EL1_VECTOR_BHB_CLEAR_INSN, +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + + /* + * Remap the kernel before branching to the canonical vectors. + */ + EL1_VECTOR_KPTI, +}; + +#ifndef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY +#define EL1_VECTOR_BHB_LOOP -1 +#define EL1_VECTOR_BHB_FW -1 +#define EL1_VECTOR_BHB_CLEAR_INSN -1 +#endif /* !CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + +/* The vectors to use on return from EL0. e.g. to remap the kernel */ +DECLARE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector); + +#ifndef CONFIG_UNMAP_KERNEL_AT_EL0 +#define TRAMP_VALIAS 0 +#endif + +static inline const char * +arm64_get_bp_hardening_vector(enum arm64_bp_harden_el1_vectors slot) +{ + if (arm64_kernel_unmapped_at_el0()) + return (char *)TRAMP_VALIAS + SZ_2K * slot; + + WARN_ON_ONCE(slot == EL1_VECTOR_KPTI); + + return __bp_harden_el1_vectors + SZ_2K * slot; +} + +#endif /* __ASM_VECTORS_H */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index b3edde68bc3e..323e251ed37b 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -281,6 +281,11 @@ struct kvm_arm_copy_mte_tags { #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 KVM_REG_ARM_FW_REG(3) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED 2 + /* SVE registers */ #define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 066098198c24..a401180e8d66 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -502,6 +502,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .matches = has_spectre_v4, .cpu_enable = spectre_v4_enable_mitigation, }, + { + .desc = "Spectre-BHB", + .capability = ARM64_SPECTRE_BHB, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = is_spectre_bhb_affected, + .cpu_enable = spectre_bhb_enable_mitigation, + }, #ifdef CONFIG_ARM64_ERRATUM_1418040 { .desc = "ARM erratum 1418040", @@ -600,6 +607,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus), }, #endif +#ifdef CONFIG_ARM64_ERRATUM_2077057 + { + .desc = "ARM erratum 2077057", + .capability = ARM64_WORKAROUND_2077057, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2), + }, +#endif #ifdef CONFIG_ARM64_ERRATUM_2064142 { .desc = "ARM erratum 2064142", diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e5f23dab1c8d..d33687673f6b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -73,6 +73,8 @@ #include <linux/mm.h> #include <linux/cpu.h> #include <linux/kasan.h> +#include <linux/percpu.h> + #include <asm/cpu.h> #include <asm/cpufeature.h> #include <asm/cpu_ops.h> @@ -85,6 +87,7 @@ #include <asm/smp.h> #include <asm/sysreg.h> #include <asm/traps.h> +#include <asm/vectors.h> #include <asm/virt.h> /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ @@ -110,6 +113,8 @@ DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE); bool arm64_use_ng_mappings = false; EXPORT_SYMBOL(arm64_use_ng_mappings); +DEFINE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector) = vectors; + /* * Permit PER_LINUX32 and execve() of 32-bit binaries even if not all CPUs * support it? @@ -226,6 +231,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_CLEARBHB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -1590,6 +1596,12 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) int cpu = smp_processor_id(); + if (__this_cpu_read(this_cpu_vector) == vectors) { + const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI); + + __this_cpu_write(this_cpu_vector, v); + } + /* * We don't need to rewrite the page-tables if either we've done * it already or we have KASLR enabled and therefore have not diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 772ec2ecf488..4a3a653df07e 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -37,18 +37,21 @@ .macro kernel_ventry, el:req, ht:req, regsize:req, label:req .align 7 -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +.Lventry_start\@: .if \el == 0 -alternative_if ARM64_UNMAP_KERNEL_AT_EL0 + /* + * This must be the first instruction of the EL0 vector entries. It is + * skipped by the trampoline vectors, to trigger the cleanup. + */ + b .Lskip_tramp_vectors_cleanup\@ .if \regsize == 64 mrs x30, tpidrro_el0 msr tpidrro_el0, xzr .else mov x30, xzr .endif -alternative_else_nop_endif +.Lskip_tramp_vectors_cleanup\@: .endif -#endif sub sp, sp, #PT_REGS_SIZE #ifdef CONFIG_VMAP_STACK @@ -95,11 +98,15 @@ alternative_else_nop_endif mrs x0, tpidrro_el0 #endif b el\el\ht\()_\regsize\()_\label +.org .Lventry_start\@ + 128 // Did we overflow the ventry slot? .endm - .macro tramp_alias, dst, sym + .macro tramp_alias, dst, sym, tmp mov_q \dst, TRAMP_VALIAS - add \dst, \dst, #(\sym - .entry.tramp.text) + adr_l \tmp, \sym + add \dst, \dst, \tmp + adr_l \tmp, .entry.tramp.text + sub \dst, \dst, \tmp .endm /* @@ -116,7 +123,7 @@ alternative_cb_end tbnz \tmp2, #TIF_SSBD, .L__asm_ssbd_skip\@ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 mov w1, #\state -alternative_cb spectre_v4_patch_fw_mitigation_conduit +alternative_cb smccc_patch_fw_mitigation_conduit nop // Patched to SMC/HVC #0 alternative_cb_end .L__asm_ssbd_skip\@: @@ -413,21 +420,26 @@ alternative_else_nop_endif ldp x24, x25, [sp, #16 * 12] ldp x26, x27, [sp, #16 * 13] ldp x28, x29, [sp, #16 * 14] - ldr lr, [sp, #S_LR] - add sp, sp, #PT_REGS_SIZE // restore sp .if \el == 0 -alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 +alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 + ldr lr, [sp, #S_LR] + add sp, sp, #PT_REGS_SIZE // restore sp + eret +alternative_else_nop_endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 bne 4f - msr far_el1, x30 - tramp_alias x30, tramp_exit_native + msr far_el1, x29 + tramp_alias x30, tramp_exit_native, x29 br x30 4: - tramp_alias x30, tramp_exit_compat + tramp_alias x30, tramp_exit_compat, x29 br x30 #endif .else + ldr lr, [sp, #S_LR] + add sp, sp, #PT_REGS_SIZE // restore sp + /* Ensure any device/NC reads complete */ alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412 @@ -594,12 +606,6 @@ SYM_CODE_END(ret_to_user) .popsection // .entry.text -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -/* - * Exception vectors trampoline. - */ - .pushsection ".entry.tramp.text", "ax" - // Move from tramp_pg_dir to swapper_pg_dir .macro tramp_map_kernel, tmp mrs \tmp, ttbr1_el1 @@ -633,12 +639,47 @@ alternative_else_nop_endif */ .endm - .macro tramp_ventry, regsize = 64 + .macro tramp_data_page dst + adr_l \dst, .entry.tramp.text + sub \dst, \dst, PAGE_SIZE + .endm + + .macro tramp_data_read_var dst, var +#ifdef CONFIG_RANDOMIZE_BASE + tramp_data_page \dst + add \dst, \dst, #:lo12:__entry_tramp_data_\var + ldr \dst, [\dst] +#else + ldr \dst, =\var +#endif + .endm + +#define BHB_MITIGATION_NONE 0 +#define BHB_MITIGATION_LOOP 1 +#define BHB_MITIGATION_FW 2 +#define BHB_MITIGATION_INSN 3 + + .macro tramp_ventry, vector_start, regsize, kpti, bhb .align 7 1: .if \regsize == 64 msr tpidrro_el0, x30 // Restored in kernel_ventry .endif + + .if \bhb == BHB_MITIGATION_LOOP + /* + * This sequence must appear before the first indirect branch. i.e. the + * ret out of tramp_ventry. It appears here because x30 is free. + */ + __mitigate_spectre_bhb_loop x30 + .endif // \bhb == BHB_MITIGATION_LOOP + + .if \bhb == BHB_MITIGATION_INSN + clearbhb + isb + .endif // \bhb == BHB_MITIGATION_INSN + + .if \kpti == 1 /* * Defend against branch aliasing attacks by pushing a dummy * entry onto the return stack and using a RET instruction to @@ -648,46 +689,75 @@ alternative_else_nop_endif b . 2: tramp_map_kernel x30 -#ifdef CONFIG_RANDOMIZE_BASE - adr x30, tramp_vectors + PAGE_SIZE alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003 - ldr x30, [x30] -#else - ldr x30, =vectors -#endif + tramp_data_read_var x30, vectors alternative_if_not ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM - prfm plil1strm, [x30, #(1b - tramp_vectors)] + prfm plil1strm, [x30, #(1b - \vector_start)] alternative_else_nop_endif + msr vbar_el1, x30 - add x30, x30, #(1b - tramp_vectors) isb + .else + ldr x30, =vectors + .endif // \kpti == 1 + + .if \bhb == BHB_MITIGATION_FW + /* + * The firmware sequence must appear before the first indirect branch. + * i.e. the ret out of tramp_ventry. But it also needs the stack to be + * mapped to save/restore the registers the SMC clobbers. + */ + __mitigate_spectre_bhb_fw + .endif // \bhb == BHB_MITIGATION_FW + + add x30, x30, #(1b - \vector_start + 4) ret +.org 1b + 128 // Did we overflow the ventry slot? .endm .macro tramp_exit, regsize = 64 - adr x30, tramp_vectors + tramp_data_read_var x30, this_cpu_vector + get_this_cpu_offset x29 + ldr x30, [x30, x29] + msr vbar_el1, x30 - tramp_unmap_kernel x30 + ldr lr, [sp, #S_LR] + tramp_unmap_kernel x29 .if \regsize == 64 - mrs x30, far_el1 + mrs x29, far_el1 .endif + add sp, sp, #PT_REGS_SIZE // restore sp eret sb .endm - .align 11 -SYM_CODE_START_NOALIGN(tramp_vectors) + .macro generate_tramp_vector, kpti, bhb +.Lvector_start\@: .space 0x400 - tramp_ventry - tramp_ventry - tramp_ventry - tramp_ventry + .rept 4 + tramp_ventry .Lvector_start\@, 64, \kpti, \bhb + .endr + .rept 4 + tramp_ventry .Lvector_start\@, 32, \kpti, \bhb + .endr + .endm - tramp_ventry 32 - tramp_ventry 32 - tramp_ventry 32 - tramp_ventry 32 +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +/* + * Exception vectors trampoline. + * The order must match __bp_harden_el1_vectors and the + * arm64_bp_harden_el1_vectors enum. + */ + .pushsection ".entry.tramp.text", "ax" + .align 11 +SYM_CODE_START_NOALIGN(tramp_vectors) +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_LOOP + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_FW + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_INSN +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_NONE SYM_CODE_END(tramp_vectors) SYM_CODE_START(tramp_exit_native) @@ -704,13 +774,57 @@ SYM_CODE_END(tramp_exit_compat) .pushsection ".rodata", "a" .align PAGE_SHIFT SYM_DATA_START(__entry_tramp_data_start) +__entry_tramp_data_vectors: .quad vectors +#ifdef CONFIG_ARM_SDE_INTERFACE +__entry_tramp_data___sdei_asm_handler: + .quad __sdei_asm_handler +#endif /* CONFIG_ARM_SDE_INTERFACE */ +__entry_tramp_data_this_cpu_vector: + .quad this_cpu_vector SYM_DATA_END(__entry_tramp_data_start) .popsection // .rodata #endif /* CONFIG_RANDOMIZE_BASE */ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ /* + * Exception vectors for spectre mitigations on entry from EL1 when + * kpti is not in use. + */ + .macro generate_el1_vector, bhb +.Lvector_start\@: + kernel_ventry 1, t, 64, sync // Synchronous EL1t + kernel_ventry 1, t, 64, irq // IRQ EL1t + kernel_ventry 1, t, 64, fiq // FIQ EL1h + kernel_ventry 1, t, 64, error // Error EL1t + + kernel_ventry 1, h, 64, sync // Synchronous EL1h + kernel_ventry 1, h, 64, irq // IRQ EL1h + kernel_ventry 1, h, 64, fiq // FIQ EL1h + kernel_ventry 1, h, 64, error // Error EL1h + + .rept 4 + tramp_ventry .Lvector_start\@, 64, 0, \bhb + .endr + .rept 4 + tramp_ventry .Lvector_start\@, 32, 0, \bhb + .endr + .endm + +/* The order must match tramp_vecs and the arm64_bp_harden_el1_vectors enum. */ + .pushsection ".entry.text", "ax" + .align 11 +SYM_CODE_START(__bp_harden_el1_vectors) +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + generate_el1_vector bhb=BHB_MITIGATION_LOOP + generate_el1_vector bhb=BHB_MITIGATION_FW + generate_el1_vector bhb=BHB_MITIGATION_INSN +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ +SYM_CODE_END(__bp_harden_el1_vectors) + .popsection + + +/* * Register switch for AArch64. The callee-saved registers need to be saved * and restored. On entry: * x0 = previous task_struct (must be preserved across the switch) @@ -835,14 +949,7 @@ SYM_CODE_START(__sdei_asm_entry_trampoline) * Remember whether to unmap the kernel on exit. */ 1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)] - -#ifdef CONFIG_RANDOMIZE_BASE - adr x4, tramp_vectors + PAGE_SIZE - add x4, x4, #:lo12:__sdei_asm_trampoline_next_handler - ldr x4, [x4] -#else - ldr x4, =__sdei_asm_handler -#endif + tramp_data_read_var x4, __sdei_asm_handler br x4 SYM_CODE_END(__sdei_asm_entry_trampoline) NOKPROBE(__sdei_asm_entry_trampoline) @@ -865,13 +972,6 @@ SYM_CODE_END(__sdei_asm_exit_trampoline) NOKPROBE(__sdei_asm_exit_trampoline) .ltorg .popsection // .entry.tramp.text -#ifdef CONFIG_RANDOMIZE_BASE -.pushsection ".rodata", "a" -SYM_DATA_START(__sdei_asm_trampoline_next_handler) - .quad __sdei_asm_handler -SYM_DATA_END(__sdei_asm_trampoline_next_handler) -.popsection // .rodata -#endif /* CONFIG_RANDOMIZE_BASE */ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ /* @@ -981,7 +1081,7 @@ alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 alternative_else_nop_endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline + tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline, tmp=x3 br x5 #endif SYM_CODE_END(__sdei_asm_handler) diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 7eaf1f7c4168..55a1ced8eb77 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -66,6 +66,10 @@ KVM_NVHE_ALIAS(kvm_patch_vector_branch); KVM_NVHE_ALIAS(kvm_update_va_mask); KVM_NVHE_ALIAS(kvm_get_kimage_voffset); KVM_NVHE_ALIAS(kvm_compute_final_ctr_el0); +KVM_NVHE_ALIAS(spectre_bhb_patch_loop_iter); +KVM_NVHE_ALIAS(spectre_bhb_patch_loop_mitigation_enable); +KVM_NVHE_ALIAS(spectre_bhb_patch_wa3); +KVM_NVHE_ALIAS(spectre_bhb_patch_clearbhb); /* Global kernel state accessed by nVHE hyp code. */ KVM_NVHE_ALIAS(kvm_vgic_global_state); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 902e4084c477..6d45c63c6454 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -18,15 +18,18 @@ */ #include <linux/arm-smccc.h> +#include <linux/bpf.h> #include <linux/cpu.h> #include <linux/device.h> #include <linux/nospec.h> #include <linux/prctl.h> #include <linux/sched/task_stack.h> +#include <asm/debug-monitors.h> #include <asm/insn.h> #include <asm/spectre.h> #include <asm/traps.h> +#include <asm/vectors.h> #include <asm/virt.h> /* @@ -96,14 +99,51 @@ static bool spectre_v2_mitigations_off(void) return ret; } +static const char *get_bhb_affected_string(enum mitigation_state bhb_state) +{ + switch (bhb_state) { + case SPECTRE_UNAFFECTED: + return ""; + default: + case SPECTRE_VULNERABLE: + return ", but not BHB"; + case SPECTRE_MITIGATED: + return ", BHB"; + } +} + +static bool _unprivileged_ebpf_enabled(void) +{ +#ifdef CONFIG_BPF_SYSCALL + return !sysctl_unprivileged_bpf_disabled; +#else + return false; +#endif +} + ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { + enum mitigation_state bhb_state = arm64_get_spectre_bhb_state(); + const char *bhb_str = get_bhb_affected_string(bhb_state); + const char *v2_str = "Branch predictor hardening"; + switch (spectre_v2_state) { case SPECTRE_UNAFFECTED: - return sprintf(buf, "Not affected\n"); + if (bhb_state == SPECTRE_UNAFFECTED) + return sprintf(buf, "Not affected\n"); + + /* + * Platforms affected by Spectre-BHB can't report + * "Not affected" for Spectre-v2. + */ + v2_str = "CSV2"; + fallthrough; case SPECTRE_MITIGATED: - return sprintf(buf, "Mitigation: Branch predictor hardening\n"); + if (bhb_state == SPECTRE_MITIGATED && _unprivileged_ebpf_enabled()) + return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n"); + + return sprintf(buf, "Mitigation: %s%s\n", v2_str, bhb_str); case SPECTRE_VULNERABLE: fallthrough; default: @@ -554,9 +594,9 @@ void __init spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt, * Patch a NOP in the Spectre-v4 mitigation code with an SMC/HVC instruction * to call into firmware to adjust the mitigation state. */ -void __init spectre_v4_patch_fw_mitigation_conduit(struct alt_instr *alt, - __le32 *origptr, - __le32 *updptr, int nr_inst) +void __init smccc_patch_fw_mitigation_conduit(struct alt_instr *alt, + __le32 *origptr, + __le32 *updptr, int nr_inst) { u32 insn; @@ -770,3 +810,344 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) return -ENODEV; } } + +/* + * Spectre BHB. + * + * A CPU is either: + * - Mitigated by a branchy loop a CPU specific number of times, and listed + * in our "loop mitigated list". + * - Mitigated in software by the firmware Spectre v2 call. + * - Has the ClearBHB instruction to perform the mitigation. + * - Has the 'Exception Clears Branch History Buffer' (ECBHB) feature, so no + * software mitigation in the vectors is needed. + * - Has CSV2.3, so is unaffected. + */ +static enum mitigation_state spectre_bhb_state; + +enum mitigation_state arm64_get_spectre_bhb_state(void) +{ + return spectre_bhb_state; +} + +enum bhb_mitigation_bits { + BHB_LOOP, + BHB_FW, + BHB_HW, + BHB_INSN, +}; +static unsigned long system_bhb_mitigations; + +/* + * This must be called with SCOPE_LOCAL_CPU for each type of CPU, before any + * SCOPE_SYSTEM call will give the right answer. + */ +u8 spectre_bhb_loop_affected(int scope) +{ + u8 k = 0; + static u8 max_bhb_k; + + if (scope == SCOPE_LOCAL_CPU) { + static const struct midr_range spectre_bhb_k32_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + {}, + }; + static const struct midr_range spectre_bhb_k24_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), + {}, + }; + static const struct midr_range spectre_bhb_k8_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + {}, + }; + + if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list)) + k = 32; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list)) + k = 24; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list)) + k = 8; + + max_bhb_k = max(max_bhb_k, k); + } else { + k = max_bhb_k; + } + + return k; +} + +static enum mitigation_state spectre_bhb_get_cpu_fw_mitigation_state(void) +{ + int ret; + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_3, &res); + + ret = res.a0; + switch (ret) { + case SMCCC_RET_SUCCESS: + return SPECTRE_MITIGATED; + case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED: + return SPECTRE_UNAFFECTED; + default: + fallthrough; + case SMCCC_RET_NOT_SUPPORTED: + return SPECTRE_VULNERABLE; + } +} + +static bool is_spectre_bhb_fw_affected(int scope) +{ + static bool system_affected; + enum mitigation_state fw_state; + bool has_smccc = arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_NONE; + static const struct midr_range spectre_bhb_firmware_mitigated_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), + {}, + }; + bool cpu_in_list = is_midr_in_range_list(read_cpuid_id(), + spectre_bhb_firmware_mitigated_list); + + if (scope != SCOPE_LOCAL_CPU) + return system_affected; + + fw_state = spectre_bhb_get_cpu_fw_mitigation_state(); + if (cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED)) { + system_affected = true; + return true; + } + + return false; +} + +static bool supports_ecbhb(int scope) +{ + u64 mmfr1; + + if (scope == SCOPE_LOCAL_CPU) + mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1); + else + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + + return cpuid_feature_extract_unsigned_field(mmfr1, + ID_AA64MMFR1_ECBHB_SHIFT); +} + +bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, + int scope) +{ + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + + if (supports_csv2p3(scope)) + return false; + + if (supports_clearbhb(scope)) + return true; + + if (spectre_bhb_loop_affected(scope)) + return true; + + if (is_spectre_bhb_fw_affected(scope)) + return true; + + return false; +} + +static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) +{ + const char *v = arm64_get_bp_hardening_vector(slot); + + if (slot < 0) + return; + + __this_cpu_write(this_cpu_vector, v); + + /* + * When KPTI is in use, the vectors are switched when exiting to + * user-space. + */ + if (arm64_kernel_unmapped_at_el0()) + return; + + write_sysreg(v, vbar_el1); + isb(); +} + +void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) +{ + bp_hardening_cb_t cpu_cb; + enum mitigation_state fw_state, state = SPECTRE_VULNERABLE; + struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); + + if (!is_spectre_bhb_affected(entry, SCOPE_LOCAL_CPU)) + return; + + if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) { + /* No point mitigating Spectre-BHB alone. */ + } else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) { + pr_info_once("spectre-bhb mitigation disabled by compile time option\n"); + } else if (cpu_mitigations_off()) { + pr_info_once("spectre-bhb mitigation disabled by command line option\n"); + } else if (supports_ecbhb(SCOPE_LOCAL_CPU)) { + state = SPECTRE_MITIGATED; + set_bit(BHB_HW, &system_bhb_mitigations); + } else if (supports_clearbhb(SCOPE_LOCAL_CPU)) { + /* + * Ensure KVM uses the indirect vector which will have ClearBHB + * added. + */ + if (!data->slot) + data->slot = HYP_VECTOR_INDIRECT; + + this_cpu_set_vectors(EL1_VECTOR_BHB_CLEAR_INSN); + state = SPECTRE_MITIGATED; + set_bit(BHB_INSN, &system_bhb_mitigations); + } else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) { + /* + * Ensure KVM uses the indirect vector which will have the + * branchy-loop added. A57/A72-r0 will already have selected + * the spectre-indirect vector, which is sufficient for BHB + * too. + */ + if (!data->slot) + data->slot = HYP_VECTOR_INDIRECT; + + this_cpu_set_vectors(EL1_VECTOR_BHB_LOOP); + state = SPECTRE_MITIGATED; + set_bit(BHB_LOOP, &system_bhb_mitigations); + } else if (is_spectre_bhb_fw_affected(SCOPE_LOCAL_CPU)) { + fw_state = spectre_bhb_get_cpu_fw_mitigation_state(); + if (fw_state == SPECTRE_MITIGATED) { + /* + * Ensure KVM uses one of the spectre bp_hardening + * vectors. The indirect vector doesn't include the EL3 + * call, so needs upgrading to + * HYP_VECTOR_SPECTRE_INDIRECT. + */ + if (!data->slot || data->slot == HYP_VECTOR_INDIRECT) + data->slot += 1; + + this_cpu_set_vectors(EL1_VECTOR_BHB_FW); + + /* + * The WA3 call in the vectors supersedes the WA1 call + * made during context-switch. Uninstall any firmware + * bp_hardening callback. + */ + cpu_cb = spectre_v2_get_sw_mitigation_cb(); + if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb) + __this_cpu_write(bp_hardening_data.fn, NULL); + + state = SPECTRE_MITIGATED; + set_bit(BHB_FW, &system_bhb_mitigations); + } + } + + update_mitigation_state(&spectre_bhb_state, state); +} + +/* Patched to NOP when enabled */ +void noinstr spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt, + __le32 *origptr, + __le32 *updptr, int nr_inst) +{ + BUG_ON(nr_inst != 1); + + if (test_bit(BHB_LOOP, &system_bhb_mitigations)) + *updptr++ = cpu_to_le32(aarch64_insn_gen_nop()); +} + +/* Patched to NOP when enabled */ +void noinstr spectre_bhb_patch_fw_mitigation_enabled(struct alt_instr *alt, + __le32 *origptr, + __le32 *updptr, int nr_inst) +{ + BUG_ON(nr_inst != 1); + + if (test_bit(BHB_FW, &system_bhb_mitigations)) + *updptr++ = cpu_to_le32(aarch64_insn_gen_nop()); +} + +/* Patched to correct the immediate */ +void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + u8 rd; + u32 insn; + u16 loop_count = spectre_bhb_loop_affected(SCOPE_SYSTEM); + + BUG_ON(nr_inst != 1); /* MOV -> MOV */ + + if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) + return; + + insn = le32_to_cpu(*origptr); + rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn); + insn = aarch64_insn_gen_movewide(rd, loop_count, 0, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_ZERO); + *updptr++ = cpu_to_le32(insn); +} + +/* Patched to mov WA3 when supported */ +void noinstr spectre_bhb_patch_wa3(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + u8 rd; + u32 insn; + + BUG_ON(nr_inst != 1); /* MOV -> MOV */ + + if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) || + !test_bit(BHB_FW, &system_bhb_mitigations)) + return; + + insn = le32_to_cpu(*origptr); + rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn); + + insn = aarch64_insn_gen_logical_immediate(AARCH64_INSN_LOGIC_ORR, + AARCH64_INSN_VARIANT_32BIT, + AARCH64_INSN_REG_ZR, rd, + ARM_SMCCC_ARCH_WORKAROUND_3); + if (WARN_ON_ONCE(insn == AARCH64_BREAK_FAULT)) + return; + + *updptr++ = cpu_to_le32(insn); +} + +/* Patched to NOP when not supported */ +void __init spectre_bhb_patch_clearbhb(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + BUG_ON(nr_inst != 2); + + if (test_bit(BHB_INSN, &system_bhb_mitigations)) + return; + + *updptr++ = cpu_to_le32(aarch64_insn_gen_nop()); + *updptr++ = cpu_to_le32(aarch64_insn_gen_nop()); +} + +#ifdef CONFIG_BPF_SYSCALL +#define EBPF_WARN "Unprivileged eBPF is enabled, data leaks possible via Spectre v2 BHB attacks!\n" +void unpriv_ebpf_notify(int new_state) +{ + if (spectre_v2_state == SPECTRE_VULNERABLE || + spectre_bhb_state != SPECTRE_MITIGATED) + return; + + if (!new_state) + pr_err("WARNING: %s", EBPF_WARN); +} +#endif diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 50bab186c49b..edaf0faf766f 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -341,7 +341,7 @@ ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1)) <= SZ_4K, "Hibernate exit text too big or misaligned") #endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE, +ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) <= 3*PAGE_SIZE, "Entry trampoline text too big") #endif #ifdef CONFIG_KVM diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index a4a0063df456..4dca6ffd03d4 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -797,6 +797,24 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret) xfer_to_guest_mode_work_pending(); } +/* + * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while + * the vCPU is running. + * + * This must be noinstr as instrumentation may make use of RCU, and this is not + * safe during the EQS. + */ +static int noinstr kvm_arm_vcpu_enter_exit(struct kvm_vcpu *vcpu) +{ + int ret; + + guest_state_enter_irqoff(); + ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu); + guest_state_exit_irqoff(); + + return ret; +} + /** * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code * @vcpu: The VCPU pointer @@ -881,9 +899,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) * Enter the guest */ trace_kvm_entry(*vcpu_pc(vcpu)); - guest_enter_irqoff(); + guest_timing_enter_irqoff(); - ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu); + ret = kvm_arm_vcpu_enter_exit(vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->stat.exits++; @@ -918,26 +936,23 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_arch_vcpu_ctxsync_fp(vcpu); /* - * We may have taken a host interrupt in HYP mode (ie - * while executing the guest). This interrupt is still - * pending, as we haven't serviced it yet! + * We must ensure that any pending interrupts are taken before + * we exit guest timing so that timer ticks are accounted as + * guest time. Transiently unmask interrupts so that any + * pending interrupts are taken. * - * We're now back in SVC mode, with interrupts - * disabled. Enabling the interrupts now will have - * the effect of taking the interrupt again, in SVC - * mode this time. + * Per ARM DDI 0487G.b section D1.13.4, an ISB (or other + * context synchronization event) is necessary to ensure that + * pending interrupts are taken. */ local_irq_enable(); + isb(); + local_irq_disable(); + + guest_timing_exit_irqoff(); + + local_irq_enable(); - /* - * We do local_irq_enable() before calling guest_exit() so - * that if a timer interrupt hits while running the guest we - * account that tick as being spent in the guest. We enable - * preemption after calling guest_exit() so that if we get - * preempted we make sure ticks after that is not counted as - * guest time. - */ - guest_exit(); trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); /* Exit types that need handling before we can be preempted */ @@ -1476,10 +1491,7 @@ static int kvm_init_vector_slots(void) base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT); - if (!cpus_have_const_cap(ARM64_SPECTRE_V3A)) - return 0; - - if (!has_vhe()) { + if (kvm_system_needs_idmapped_vectors() && !has_vhe()) { err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs), __BP_HARDEN_HYP_VECS_SZ, &base); if (err) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index fd2dd26caf91..e3140abd2e2e 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -228,6 +228,14 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) { struct kvm_run *run = vcpu->run; + if (ARM_SERROR_PENDING(exception_index)) { + /* + * The SError is handled by handle_exit_early(). If the guest + * survives it will re-execute the original instruction. + */ + return 1; + } + exception_index = ARM_EXCEPTION_CODE(exception_index); switch (exception_index) { diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index b6b6801d96d5..7839d075729b 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -62,6 +62,10 @@ el1_sync: // Guest trapped into EL2 /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */ eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \ ARM_SMCCC_ARCH_WORKAROUND_2) + cbz w1, wa_epilogue + + eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_2 ^ \ + ARM_SMCCC_ARCH_WORKAROUND_3) cbnz w1, el1_trap wa_epilogue: @@ -192,7 +196,10 @@ SYM_CODE_END(__kvm_hyp_vector) sub sp, sp, #(8 * 4) stp x2, x3, [sp, #(8 * 0)] stp x0, x1, [sp, #(8 * 2)] + alternative_cb spectre_bhb_patch_wa3 + /* Patched to mov WA3 when supported */ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 + alternative_cb_end smc #0 ldp x2, x3, [sp, #(8 * 0)] add sp, sp, #(8 * 2) @@ -205,6 +212,8 @@ SYM_CODE_END(__kvm_hyp_vector) spectrev2_smccc_wa1_smc .else stp x0, x1, [sp, #-16]! + mitigate_spectre_bhb_loop x0 + mitigate_spectre_bhb_clear_insn .endif .if \indirect != 0 alternative_cb kvm_patch_vector_branch diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 58e14f8ead23..701cfb964905 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -402,6 +402,24 @@ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code) return false; } +static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + /* + * Check for the conditions of Cortex-A510's #2077057. When these occur + * SPSR_EL2 can't be trusted, but isn't needed either as it is + * unchanged from the value in vcpu_gp_regs(vcpu)->pstate. + * Are we single-stepping the guest, and took a PAC exception from the + * active-not-pending state? + */ + if (cpus_have_final_cap(ARM64_WORKAROUND_2077057) && + vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && + *vcpu_cpsr(vcpu) & DBG_SPSR_SS && + ESR_ELx_EC(read_sysreg_el2(SYS_ESR)) == ESR_ELx_EC_PAC) + write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR); + + vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR); +} + /* * Return true when we were able to fixup the guest exit and should return to * the guest, false when we should restore the host state and return to the @@ -413,7 +431,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) * Save PSTATE early so that we can evaluate the vcpu mode * early on. */ - vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR); + synchronize_vcpu_pstate(vcpu, exit_code); /* * Check whether we want to repaint the state one way or @@ -424,7 +442,8 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); - if (ARM_SERROR_PENDING(*exit_code)) { + if (ARM_SERROR_PENDING(*exit_code) && + ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) { u8 esr_ec = kvm_vcpu_trap_get_class(vcpu); /* diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S index 958734f4d6b0..0c367eb5f4e2 100644 --- a/arch/arm64/kvm/hyp/nvhe/cache.S +++ b/arch/arm64/kvm/hyp/nvhe/cache.S @@ -7,7 +7,8 @@ #include <asm/assembler.h> #include <asm/alternative.h> -SYM_FUNC_START_PI(dcache_clean_inval_poc) +SYM_FUNC_START(__pi_dcache_clean_inval_poc) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(dcache_clean_inval_poc) +SYM_FUNC_END(__pi_dcache_clean_inval_poc) +SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc) diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index 526a7d6fa86f..cdbe8e246418 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -148,8 +148,10 @@ int hyp_map_vectors(void) phys_addr_t phys; void *bp_base; - if (!cpus_have_const_cap(ARM64_SPECTRE_V3A)) + if (!kvm_system_needs_idmapped_vectors()) { + __hyp_bp_vect_base = __bp_harden_hyp_vecs; return 0; + } phys = __hyp_pa(__bp_harden_hyp_vecs); bp_base = (void *)__pkvm_create_private_mapping(phys, diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 11d053fdd604..54af47005e45 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -10,6 +10,7 @@ #include <linux/kvm_host.h> #include <linux/types.h> #include <linux/jump_label.h> +#include <linux/percpu.h> #include <uapi/linux/psci.h> #include <kvm/arm_psci.h> @@ -24,6 +25,8 @@ #include <asm/fpsimd.h> #include <asm/debug-monitors.h> #include <asm/processor.h> +#include <asm/thread_info.h> +#include <asm/vectors.h> /* VHE specific context */ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data); @@ -67,7 +70,7 @@ NOKPROBE_SYMBOL(__activate_traps); static void __deactivate_traps(struct kvm_vcpu *vcpu) { - extern char vectors[]; /* kernel exception vectors */ + const char *host_vectors = vectors; ___deactivate_traps(vcpu); @@ -81,7 +84,10 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); - write_sysreg(vectors, vbar_el1); + + if (!arm64_kernel_unmapped_at_el0()) + host_vectors = __this_cpu_read(this_cpu_vector); + write_sysreg(host_vectors, vbar_el1); } NOKPROBE_SYMBOL(__deactivate_traps); diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 30da78f72b3b..202b8c455724 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -107,6 +107,18 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) break; } break; + case ARM_SMCCC_ARCH_WORKAROUND_3: + switch (arm64_get_spectre_bhb_state()) { + case SPECTRE_VULNERABLE: + break; + case SPECTRE_MITIGATED: + val[0] = SMCCC_RET_SUCCESS; + break; + case SPECTRE_UNAFFECTED: + val[0] = SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED; + break; + } + break; case ARM_SMCCC_HV_PV_TIME_FEATURES: val[0] = SMCCC_RET_SUCCESS; break; diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 3eae32876897..5918095c90a5 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -46,8 +46,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) * specification (ARM DEN 0022A). This means all suspend states * for KVM will preserve the register state. */ - kvm_vcpu_halt(vcpu); - kvm_clear_request(KVM_REQ_UNHALT, vcpu); + kvm_vcpu_wfi(vcpu); return PSCI_RET_SUCCESS; } @@ -406,7 +405,7 @@ int kvm_psci_call(struct kvm_vcpu *vcpu) int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) { - return 3; /* PSCI version and two workaround registers */ + return 4; /* PSCI version and three workaround registers */ } int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) @@ -420,6 +419,9 @@ int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++)) return -EFAULT; + if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3, uindices++)) + return -EFAULT; + return 0; } @@ -459,6 +461,17 @@ static int get_kernel_wa_level(u64 regid) case SPECTRE_VULNERABLE: return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; } + break; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: + switch (arm64_get_spectre_bhb_state()) { + case SPECTRE_VULNERABLE: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL; + case SPECTRE_MITIGATED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL; + case SPECTRE_UNAFFECTED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED; + } + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL; } return -EINVAL; @@ -475,6 +488,7 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) break; case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; break; default: @@ -520,6 +534,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: if (val & ~KVM_REG_FEATURE_LEVEL_MASK) return -EINVAL; diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index 7068da080799..49837d3a3ef5 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -248,6 +248,8 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, IRQCHIP_STATE_PENDING, &val); WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + } else if (vgic_irq_is_mapped_level(irq)) { + val = vgic_get_phys_line_level(irq); } else { val = irq_is_pending(irq); } diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S index 1fd5d790ab80..ebde40e7fa2b 100644 --- a/arch/arm64/lib/clear_page.S +++ b/arch/arm64/lib/clear_page.S @@ -14,7 +14,7 @@ * Parameters: * x0 - dest */ -SYM_FUNC_START_PI(clear_page) +SYM_FUNC_START(__pi_clear_page) mrs x1, dczid_el0 tbnz x1, #4, 2f /* Branch if DC ZVA is prohibited */ and w1, w1, #0xf @@ -35,5 +35,6 @@ SYM_FUNC_START_PI(clear_page) tst x0, #(PAGE_SIZE - 1) b.ne 2b ret -SYM_FUNC_END_PI(clear_page) +SYM_FUNC_END(__pi_clear_page) +SYM_FUNC_ALIAS(clear_page, __pi_clear_page) EXPORT_SYMBOL(clear_page) diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index 29144f4cd449..c336d2ffdec5 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -17,7 +17,7 @@ * x0 - dest * x1 - src */ -SYM_FUNC_START_PI(copy_page) +SYM_FUNC_START(__pi_copy_page) alternative_if ARM64_HAS_NO_HW_PREFETCH // Prefetch three cache lines ahead. prfm pldl1strm, [x1, #128] @@ -75,5 +75,6 @@ alternative_else_nop_endif stnp x16, x17, [x0, #112 - 256] ret -SYM_FUNC_END_PI(copy_page) +SYM_FUNC_END(__pi_copy_page) +SYM_FUNC_ALIAS(copy_page, __pi_copy_page) EXPORT_SYMBOL(copy_page) diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S index 7c2276fdab54..37a9f2a4f7f4 100644 --- a/arch/arm64/lib/memchr.S +++ b/arch/arm64/lib/memchr.S @@ -38,7 +38,7 @@ .p2align 4 nop -SYM_FUNC_START_WEAK_PI(memchr) +SYM_FUNC_START(__pi_memchr) and chrin, chrin, #0xff lsr wordcnt, cntin, #3 cbz wordcnt, L(byte_loop) @@ -71,5 +71,6 @@ CPU_LE( rev tmp, tmp) L(not_found): mov result, #0 ret -SYM_FUNC_END_PI(memchr) +SYM_FUNC_END(__pi_memchr) +SYM_FUNC_ALIAS_WEAK(memchr, __pi_memchr) EXPORT_SYMBOL_NOKASAN(memchr) diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S index 7d956384222f..a5ccf2c55f91 100644 --- a/arch/arm64/lib/memcmp.S +++ b/arch/arm64/lib/memcmp.S @@ -32,7 +32,7 @@ #define tmp1 x7 #define tmp2 x8 -SYM_FUNC_START_WEAK_PI(memcmp) +SYM_FUNC_START(__pi_memcmp) subs limit, limit, 8 b.lo L(less8) @@ -134,6 +134,6 @@ L(byte_loop): b.eq L(byte_loop) sub result, data1w, data2w ret - -SYM_FUNC_END_PI(memcmp) +SYM_FUNC_END(__pi_memcmp) +SYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp) EXPORT_SYMBOL_NOKASAN(memcmp) diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index b82fd64ee1e1..4ab48d49c451 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -57,10 +57,7 @@ The loop tail is handled by always copying 64 bytes from the end. */ -SYM_FUNC_START_ALIAS(__memmove) -SYM_FUNC_START_WEAK_ALIAS_PI(memmove) -SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_WEAK_PI(memcpy) +SYM_FUNC_START(__pi_memcpy) add srcend, src, count add dstend, dstin, count cmp count, 128 @@ -241,12 +238,16 @@ L(copy64_from_start): stp B_l, B_h, [dstin, 16] stp C_l, C_h, [dstin] ret +SYM_FUNC_END(__pi_memcpy) -SYM_FUNC_END_PI(memcpy) -EXPORT_SYMBOL(memcpy) -SYM_FUNC_END_ALIAS(__memcpy) +SYM_FUNC_ALIAS(__memcpy, __pi_memcpy) EXPORT_SYMBOL(__memcpy) -SYM_FUNC_END_ALIAS_PI(memmove) -EXPORT_SYMBOL(memmove) -SYM_FUNC_END_ALIAS(__memmove) +SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) +EXPORT_SYMBOL(memcpy) + +SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy) + +SYM_FUNC_ALIAS(__memmove, __pi_memmove) EXPORT_SYMBOL(__memmove) +SYM_FUNC_ALIAS_WEAK(memmove, __memmove) +EXPORT_SYMBOL(memmove) diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index a9c1c9a01ea9..a5aebe82ad73 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -42,8 +42,7 @@ dst .req x8 tmp3w .req w9 tmp3 .req x9 -SYM_FUNC_START_ALIAS(__memset) -SYM_FUNC_START_WEAK_PI(memset) +SYM_FUNC_START(__pi_memset) mov dst, dstin /* Preserve return value. */ and A_lw, val, #255 orr A_lw, A_lw, A_lw, lsl #8 @@ -202,7 +201,10 @@ SYM_FUNC_START_WEAK_PI(memset) ands count, count, zva_bits_x b.ne .Ltail_maybe_long ret -SYM_FUNC_END_PI(memset) -EXPORT_SYMBOL(memset) -SYM_FUNC_END_ALIAS(__memset) +SYM_FUNC_END(__pi_memset) + +SYM_FUNC_ALIAS(__memset, __pi_memset) EXPORT_SYMBOL(__memset) + +SYM_FUNC_ALIAS_WEAK(memset, __pi_memset) +EXPORT_SYMBOL(memset) diff --git a/arch/arm64/lib/strchr.S b/arch/arm64/lib/strchr.S index 1f47eae3b0d6..94ee67a6b212 100644 --- a/arch/arm64/lib/strchr.S +++ b/arch/arm64/lib/strchr.S @@ -18,7 +18,7 @@ * Returns: * x0 - address of first occurrence of 'c' or 0 */ -SYM_FUNC_START_WEAK(strchr) +SYM_FUNC_START(__pi_strchr) and w1, w1, #0xff 1: ldrb w2, [x0], #1 cmp w2, w1 @@ -28,5 +28,7 @@ SYM_FUNC_START_WEAK(strchr) cmp w2, w1 csel x0, x0, xzr, eq ret -SYM_FUNC_END(strchr) +SYM_FUNC_END(__pi_strchr) + +SYM_FUNC_ALIAS_WEAK(strchr, __pi_strchr) EXPORT_SYMBOL_NOKASAN(strchr) diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S index 83bcad72ec97..cda7de747efc 100644 --- a/arch/arm64/lib/strcmp.S +++ b/arch/arm64/lib/strcmp.S @@ -41,7 +41,7 @@ /* Start of performance-critical section -- one 64B cache line. */ .align 6 -SYM_FUNC_START_WEAK_PI(strcmp) +SYM_FUNC_START(__pi_strcmp) eor tmp1, src1, src2 mov zeroones, #REP8_01 tst tmp1, #7 @@ -171,6 +171,6 @@ L(loop_misaligned): L(done): sub result, data1, data2 ret - -SYM_FUNC_END_PI(strcmp) +SYM_FUNC_END(__pi_strcmp) +SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp) EXPORT_SYMBOL_NOHWKASAN(strcmp) diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S index 1648790e91b3..4919fe81ae54 100644 --- a/arch/arm64/lib/strlen.S +++ b/arch/arm64/lib/strlen.S @@ -79,7 +79,7 @@ whether the first fetch, which may be misaligned, crosses a page boundary. */ -SYM_FUNC_START_WEAK_PI(strlen) +SYM_FUNC_START(__pi_strlen) and tmp1, srcin, MIN_PAGE_SIZE - 1 mov zeroones, REP8_01 cmp tmp1, MIN_PAGE_SIZE - 16 @@ -208,6 +208,6 @@ L(page_cross): csel data1, data1, tmp4, eq csel data2, data2, tmp2, eq b L(page_cross_entry) - -SYM_FUNC_END_PI(strlen) +SYM_FUNC_END(__pi_strlen) +SYM_FUNC_ALIAS_WEAK(strlen, __pi_strlen) EXPORT_SYMBOL_NOKASAN(strlen) diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S index e42bcfcd37e6..a848abcec975 100644 --- a/arch/arm64/lib/strncmp.S +++ b/arch/arm64/lib/strncmp.S @@ -44,7 +44,7 @@ #define endloop x15 #define count mask -SYM_FUNC_START_WEAK_PI(strncmp) +SYM_FUNC_START(__pi_strncmp) cbz limit, L(ret0) eor tmp1, src1, src2 mov zeroones, #REP8_01 @@ -256,6 +256,6 @@ L(done_loop): L(ret0): mov result, #0 ret - -SYM_FUNC_END_PI(strncmp) +SYM_FUNC_END(__pi_strncmp) +SYM_FUNC_ALIAS_WEAK(strncmp, __pi_strncmp) EXPORT_SYMBOL_NOHWKASAN(strncmp) diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S index b72913a99038..d5ac0e10a01d 100644 --- a/arch/arm64/lib/strnlen.S +++ b/arch/arm64/lib/strnlen.S @@ -47,7 +47,7 @@ limit_wd .req x14 #define REP8_7f 0x7f7f7f7f7f7f7f7f #define REP8_80 0x8080808080808080 -SYM_FUNC_START_WEAK_PI(strnlen) +SYM_FUNC_START(__pi_strnlen) cbz limit, .Lhit_limit mov zeroones, #REP8_01 bic src, srcin, #15 @@ -156,5 +156,7 @@ CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */ .Lhit_limit: mov len, limit ret -SYM_FUNC_END_PI(strnlen) +SYM_FUNC_END(__pi_strnlen) + +SYM_FUNC_ALIAS_WEAK(strnlen, __pi_strnlen) EXPORT_SYMBOL_NOKASAN(strnlen) diff --git a/arch/arm64/lib/strrchr.S b/arch/arm64/lib/strrchr.S index 13132d1ed6d1..a5123cf0ce12 100644 --- a/arch/arm64/lib/strrchr.S +++ b/arch/arm64/lib/strrchr.S @@ -18,7 +18,7 @@ * Returns: * x0 - address of last occurrence of 'c' or 0 */ -SYM_FUNC_START_WEAK_PI(strrchr) +SYM_FUNC_START(__pi_strrchr) mov x3, #0 and w1, w1, #0xff 1: ldrb w2, [x0], #1 @@ -29,5 +29,6 @@ SYM_FUNC_START_WEAK_PI(strrchr) b 1b 2: mov x0, x3 ret -SYM_FUNC_END_PI(strrchr) +SYM_FUNC_END(__pi_strrchr) +SYM_FUNC_ALIAS_WEAK(strrchr, __pi_strrchr) EXPORT_SYMBOL_NOKASAN(strrchr) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 7d0563db4201..0ea6cc25dc66 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -107,10 +107,11 @@ SYM_FUNC_END(icache_inval_pou) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START_PI(dcache_clean_inval_poc) +SYM_FUNC_START(__pi_dcache_clean_inval_poc) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(dcache_clean_inval_poc) +SYM_FUNC_END(__pi_dcache_clean_inval_poc) +SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc) /* * dcache_clean_pou(start, end) @@ -140,7 +141,7 @@ SYM_FUNC_END(dcache_clean_pou) * - start - kernel start address of region * - end - kernel end address of region */ -SYM_FUNC_START_PI(dcache_inval_poc) +SYM_FUNC_START(__pi_dcache_inval_poc) dcache_line_size x2, x3 sub x3, x2, #1 tst x1, x3 // end cache line aligned? @@ -158,7 +159,8 @@ SYM_FUNC_START_PI(dcache_inval_poc) b.lo 2b dsb sy ret -SYM_FUNC_END_PI(dcache_inval_poc) +SYM_FUNC_END(__pi_dcache_inval_poc) +SYM_FUNC_ALIAS(dcache_inval_poc, __pi_dcache_inval_poc) /* * dcache_clean_poc(start, end) @@ -169,10 +171,11 @@ SYM_FUNC_END_PI(dcache_inval_poc) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START_PI(dcache_clean_poc) +SYM_FUNC_START(__pi_dcache_clean_poc) dcache_by_line_op cvac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(dcache_clean_poc) +SYM_FUNC_END(__pi_dcache_clean_poc) +SYM_FUNC_ALIAS(dcache_clean_poc, __pi_dcache_clean_poc) /* * dcache_clean_pop(start, end) @@ -183,13 +186,14 @@ SYM_FUNC_END_PI(dcache_clean_poc) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START_PI(dcache_clean_pop) +SYM_FUNC_START(__pi_dcache_clean_pop) alternative_if_not ARM64_HAS_DCPOP b dcache_clean_poc alternative_else_nop_endif dcache_by_line_op cvap, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(dcache_clean_pop) +SYM_FUNC_END(__pi_dcache_clean_pop) +SYM_FUNC_ALIAS(dcache_clean_pop, __pi_dcache_clean_pop) /* * __dma_flush_area(start, size) @@ -199,11 +203,12 @@ SYM_FUNC_END_PI(dcache_clean_pop) * - start - virtual start address of region * - size - size in question */ -SYM_FUNC_START_PI(__dma_flush_area) +SYM_FUNC_START(__pi___dma_flush_area) add x1, x0, x1 dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(__dma_flush_area) +SYM_FUNC_END(__pi___dma_flush_area) +SYM_FUNC_ALIAS(__dma_flush_area, __pi___dma_flush_area) /* * __dma_map_area(start, size, dir) @@ -211,12 +216,13 @@ SYM_FUNC_END_PI(__dma_flush_area) * - size - size of region * - dir - DMA direction */ -SYM_FUNC_START_PI(__dma_map_area) +SYM_FUNC_START(__pi___dma_map_area) add x1, x0, x1 cmp w2, #DMA_FROM_DEVICE b.eq __pi_dcache_inval_poc b __pi_dcache_clean_poc -SYM_FUNC_END_PI(__dma_map_area) +SYM_FUNC_END(__pi___dma_map_area) +SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area) /* * __dma_unmap_area(start, size, dir) @@ -224,9 +230,10 @@ SYM_FUNC_END_PI(__dma_map_area) * - size - size of region * - dir - DMA direction */ -SYM_FUNC_START_PI(__dma_unmap_area) +SYM_FUNC_START(__pi___dma_unmap_area) add x1, x0, x1 cmp w2, #DMA_TO_DEVICE b.ne __pi_dcache_inval_poc ret -SYM_FUNC_END_PI(__dma_unmap_area) +SYM_FUNC_END(__pi___dma_unmap_area) +SYM_FUNC_ALIAS(__dma_unmap_area, __pi___dma_unmap_area) diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index a38f54cd638c..77ada00280d9 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -7,8 +7,10 @@ #include <linux/io.h> #include <linux/memblock.h> +#include <linux/mm.h> #include <linux/types.h> +#include <asm/cpufeature.h> #include <asm/page.h> /* @@ -38,3 +40,18 @@ int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) { return !(((pfn << PAGE_SHIFT) + size) & ~PHYS_MASK); } + +static int __init adjust_protection_map(void) +{ + /* + * With Enhanced PAN we can honour the execute-only permissions as + * there is no PAN override with such mappings. + */ + if (cpus_have_const_cap(ARM64_HAS_EPAN)) { + protection_map[VM_EXEC] = PAGE_EXECONLY; + protection_map[VM_EXEC | VM_SHARED] = PAGE_EXECONLY; + } + + return 0; +} +arch_initcall(adjust_protection_map); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index acfae9b41cc8..49abbf43bf35 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -617,6 +617,8 @@ early_param("rodata", parse_rodata); #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 static int __init map_entry_trampoline(void) { + int i; + pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); @@ -625,11 +627,15 @@ static int __init map_entry_trampoline(void) /* Map only the text into the trampoline page table */ memset(tramp_pg_dir, 0, PGD_SIZE); - __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE, - prot, __pgd_pgtable_alloc, 0); + __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, + entry_tramp_text_size(), prot, + __pgd_pgtable_alloc, NO_BLOCK_MAPPINGS); /* Map both the text and data into the kernel page table */ - __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot); + for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++) + __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, + pa_start + i * PAGE_SIZE, prot); + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { extern char __entry_tramp_data_start[]; diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index e7719e8f18de..cea7533cb304 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -44,6 +44,7 @@ MTE_ASYMM SPECTRE_V2 SPECTRE_V3A SPECTRE_V4 +SPECTRE_BHB SSBS SVE UNMAP_KERNEL_AT_EL0 @@ -55,9 +56,10 @@ WORKAROUND_1418040 WORKAROUND_1463225 WORKAROUND_1508412 WORKAROUND_1542419 -WORKAROUND_2064142 -WORKAROUND_2038923 WORKAROUND_1902691 +WORKAROUND_2038923 +WORKAROUND_2064142 +WORKAROUND_2077057 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE WORKAROUND_TRBE_WRITE_OUT_OF_RANGE diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts index 3e336b3dbb10..ab6e3dc0bc1d 100644 --- a/arch/mips/boot/dts/ingenic/ci20.dts +++ b/arch/mips/boot/dts/ingenic/ci20.dts @@ -83,6 +83,8 @@ label = "HDMI OUT"; type = "a"; + ddc-en-gpios = <&gpa 25 GPIO_ACTIVE_HIGH>; + port { hdmi_con: endpoint { remote-endpoint = <&dw_hdmi_out>; @@ -114,17 +116,6 @@ gpio = <&gpf 14 GPIO_ACTIVE_LOW>; enable-active-high; }; - - hdmi_power: fixedregulator@3 { - compatible = "regulator-fixed"; - - regulator-name = "hdmi_power"; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; - - gpio = <&gpa 25 0>; - enable-active-high; - }; }; &ext { @@ -576,8 +567,6 @@ pinctrl-names = "default"; pinctrl-0 = <&pins_hdmi_ddc>; - hdmi-5v-supply = <&hdmi_power>; - ports { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/mips/cavium-octeon/octeon-memcpy.S b/arch/mips/cavium-octeon/octeon-memcpy.S index 0a515cde1c18..25860fba6218 100644 --- a/arch/mips/cavium-octeon/octeon-memcpy.S +++ b/arch/mips/cavium-octeon/octeon-memcpy.S @@ -74,7 +74,7 @@ #define EXC(inst_reg,addr,handler) \ 9: inst_reg, addr; \ .section __ex_table,"a"; \ - PTR 9b, handler; \ + PTR_WD 9b, handler; \ .previous /* diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index f979adfd4fc2..ef73ba1e0ec1 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -803,7 +803,7 @@ early_param("coherentio", setcoherentio); static int __init setnocoherentio(char *str) { - dma_default_coherent = true; + dma_default_coherent = false; pr_info("Software DMA cache coherency (command line)\n"); return 0; } diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index d542fb7af3ba..1986d1309410 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -351,6 +351,9 @@ asmlinkage void start_secondary(void) cpu = smp_processor_id(); cpu_data[cpu].udelay_val = loops_per_jiffy; + set_cpu_sibling_map(cpu); + set_cpu_core_map(cpu); + cpumask_set_cpu(cpu, &cpu_coherent_mask); notify_cpu_starting(cpu); @@ -362,9 +365,6 @@ asmlinkage void start_secondary(void) /* The CPU is running and counters synchronised, now mark it online */ set_cpu_online(cpu, true); - set_cpu_sibling_map(cpu); - set_cpu_core_map(cpu); - calculate_cpu_foreign_map(); /* diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index e59cb6246f76..a25e0b73ee70 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -414,6 +414,24 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, return -ENOIOCTLCMD; } +/* + * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while + * the vCPU is running. + * + * This must be noinstr as instrumentation may make use of RCU, and this is not + * safe during the EQS. + */ +static int noinstr kvm_mips_vcpu_enter_exit(struct kvm_vcpu *vcpu) +{ + int ret; + + guest_state_enter_irqoff(); + ret = kvm_mips_callbacks->vcpu_run(vcpu); + guest_state_exit_irqoff(); + + return ret; +} + int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { int r = -EINTR; @@ -434,7 +452,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) lose_fpu(1); local_irq_disable(); - guest_enter_irqoff(); + guest_timing_enter_irqoff(); trace_kvm_enter(vcpu); /* @@ -445,10 +463,23 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ smp_store_mb(vcpu->mode, IN_GUEST_MODE); - r = kvm_mips_callbacks->vcpu_run(vcpu); + r = kvm_mips_vcpu_enter_exit(vcpu); + + /* + * We must ensure that any pending interrupts are taken before + * we exit guest timing so that timer ticks are accounted as + * guest time. Transiently unmask interrupts so that any + * pending interrupts are taken. + * + * TODO: is there a barrier which ensures that pending interrupts are + * recognised? Currently this just hopes that the CPU takes any pending + * interrupts between the enable and disable. + */ + local_irq_enable(); + local_irq_disable(); trace_kvm_out(vcpu); - guest_exit_irqoff(); + guest_timing_exit_irqoff(); local_irq_enable(); out: @@ -1168,7 +1199,7 @@ static void kvm_mips_set_c0_status(void) /* * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV) */ -int kvm_mips_handle_exit(struct kvm_vcpu *vcpu) +static int __kvm_mips_handle_exit(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; u32 cause = vcpu->arch.host_cp0_cause; @@ -1357,6 +1388,17 @@ int kvm_mips_handle_exit(struct kvm_vcpu *vcpu) return ret; } +int noinstr kvm_mips_handle_exit(struct kvm_vcpu *vcpu) +{ + int ret; + + guest_state_exit_irqoff(); + ret = __kvm_mips_handle_exit(vcpu); + guest_state_enter_irqoff(); + + return ret; +} + /* Enable FPU for guest and restore context */ void kvm_own_fpu(struct kvm_vcpu *vcpu) { diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c index 4adca5abbc72..c706f5890a05 100644 --- a/arch/mips/kvm/vz.c +++ b/arch/mips/kvm/vz.c @@ -458,8 +458,8 @@ void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu) /** * _kvm_vz_save_htimer() - Switch to software emulation of guest timer. * @vcpu: Virtual CPU. - * @compare: Pointer to write compare value to. - * @cause: Pointer to write cause value to. + * @out_compare: Pointer to write compare value to. + * @out_cause: Pointer to write cause value to. * * Save VZ guest timer state and switch to software emulation of guest CP0 * timer. The hard timer must already be in use, so preemption should be @@ -1541,11 +1541,14 @@ static int kvm_trap_vz_handle_guest_exit(struct kvm_vcpu *vcpu) } /** - * kvm_trap_vz_handle_cop_unusuable() - Guest used unusable coprocessor. + * kvm_trap_vz_handle_cop_unusable() - Guest used unusable coprocessor. * @vcpu: Virtual CPU context. * * Handle when the guest attempts to use a coprocessor which hasn't been allowed * by the root context. + * + * Return: value indicating whether to resume the host or the guest + * (RESUME_HOST or RESUME_GUEST) */ static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu) { @@ -1592,6 +1595,9 @@ static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu) * * Handle when the guest attempts to use MSA when it is disabled in the root * context. + * + * Return: value indicating whether to resume the host or the guest + * (RESUME_HOST or RESUME_GUEST) */ static int kvm_trap_vz_handle_msa_disabled(struct kvm_vcpu *vcpu) { diff --git a/arch/mips/ralink/mt7621.c b/arch/mips/ralink/mt7621.c index d6efffd4dd20..fb0565bc34fd 100644 --- a/arch/mips/ralink/mt7621.c +++ b/arch/mips/ralink/mt7621.c @@ -22,7 +22,9 @@ #include "common.h" -static void *detect_magic __initdata = detect_memory_region; +#define MT7621_MEM_TEST_PATTERN 0xaa5555aa + +static u32 detect_magic __initdata; int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) { @@ -58,24 +60,32 @@ phys_addr_t mips_cpc_default_phys_base(void) panic("Cannot detect cpc address"); } +static bool __init mt7621_addr_wraparound_test(phys_addr_t size) +{ + void *dm = (void *)KSEG1ADDR(&detect_magic); + + if (CPHYSADDR(dm + size) >= MT7621_LOWMEM_MAX_SIZE) + return true; + __raw_writel(MT7621_MEM_TEST_PATTERN, dm); + if (__raw_readl(dm) != __raw_readl(dm + size)) + return false; + __raw_writel(~MT7621_MEM_TEST_PATTERN, dm); + return __raw_readl(dm) == __raw_readl(dm + size); +} + static void __init mt7621_memory_detect(void) { - void *dm = &detect_magic; phys_addr_t size; - for (size = 32 * SZ_1M; size < 256 * SZ_1M; size <<= 1) { - if (!__builtin_memcmp(dm, dm + size, sizeof(detect_magic))) - break; + for (size = 32 * SZ_1M; size <= 256 * SZ_1M; size <<= 1) { + if (mt7621_addr_wraparound_test(size)) { + memblock_add(MT7621_LOWMEM_BASE, size); + return; + } } - if ((size == 256 * SZ_1M) && - (CPHYSADDR(dm + size) < MT7621_LOWMEM_MAX_SIZE) && - __builtin_memcmp(dm, dm + size, sizeof(detect_magic))) { - memblock_add(MT7621_LOWMEM_BASE, MT7621_LOWMEM_MAX_SIZE); - memblock_add(MT7621_HIGHMEM_BASE, MT7621_HIGHMEM_SIZE); - } else { - memblock_add(MT7621_LOWMEM_BASE, size); - } + memblock_add(MT7621_LOWMEM_BASE, MT7621_LOWMEM_MAX_SIZE); + memblock_add(MT7621_HIGHMEM_BASE, MT7621_HIGHMEM_SIZE); } void __init ralink_of_remap(void) diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h index 0ec9cfc5131f..56ffd260c669 100644 --- a/arch/parisc/include/asm/bitops.h +++ b/arch/parisc/include/asm/bitops.h @@ -12,6 +12,14 @@ #include <asm/barrier.h> #include <linux/atomic.h> +/* compiler build environment sanity checks: */ +#if !defined(CONFIG_64BIT) && defined(__LP64__) +#error "Please use 'ARCH=parisc' to build the 32-bit kernel." +#endif +#if defined(CONFIG_64BIT) && !defined(__LP64__) +#error "Please use 'ARCH=parisc64' to build the 64-bit kernel." +#endif + /* See http://marc.theaimsgroup.com/?t=108826637900003 for discussion * on use of volatile and __*_bit() (set/clear/change): * *_bit() want use of volatile. diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index ebf8a845b017..123d5f16cd9d 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -89,8 +89,8 @@ struct exception_table_entry { __asm__("1: " ldx " 0(" sr "%2),%0\n" \ "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - : "=r"(__gu_val), "=r"(__gu_err) \ - : "r"(ptr), "1"(__gu_err)); \ + : "=r"(__gu_val), "+r"(__gu_err) \ + : "r"(ptr)); \ \ (val) = (__force __typeof__(*(ptr))) __gu_val; \ } @@ -123,8 +123,8 @@ struct exception_table_entry { "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ - : "=&r"(__gu_tmp.l), "=r"(__gu_err) \ - : "r"(ptr), "1"(__gu_err)); \ + : "=&r"(__gu_tmp.l), "+r"(__gu_err) \ + : "r"(ptr)); \ \ (val) = __gu_tmp.t; \ } @@ -135,13 +135,12 @@ struct exception_table_entry { #define __put_user_internal(sr, x, ptr) \ ({ \ ASM_EXCEPTIONTABLE_VAR(__pu_err); \ - __typeof__(*(ptr)) __x = (__typeof__(*(ptr)))(x); \ \ switch (sizeof(*(ptr))) { \ - case 1: __put_user_asm(sr, "stb", __x, ptr); break; \ - case 2: __put_user_asm(sr, "sth", __x, ptr); break; \ - case 4: __put_user_asm(sr, "stw", __x, ptr); break; \ - case 8: STD_USER(sr, __x, ptr); break; \ + case 1: __put_user_asm(sr, "stb", x, ptr); break; \ + case 2: __put_user_asm(sr, "sth", x, ptr); break; \ + case 4: __put_user_asm(sr, "stw", x, ptr); break; \ + case 8: STD_USER(sr, x, ptr); break; \ default: BUILD_BUG(); \ } \ \ @@ -150,7 +149,9 @@ struct exception_table_entry { #define __put_user(x, ptr) \ ({ \ - __put_user_internal("%%sr3,", x, ptr); \ + __typeof__(&*(ptr)) __ptr = ptr; \ + __typeof__(*(__ptr)) __x = (__typeof__(*(__ptr)))(x); \ + __put_user_internal("%%sr3,", __x, __ptr); \ }) #define __put_kernel_nofault(dst, src, type, err_label) \ @@ -180,8 +181,8 @@ struct exception_table_entry { "1: " stx " %2,0(" sr "%1)\n" \ "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - : "=r"(__pu_err) \ - : "r"(ptr), "r"(x), "0"(__pu_err)) + : "+r"(__pu_err) \ + : "r"(ptr), "r"(x)) #if !defined(CONFIG_64BIT) @@ -193,8 +194,8 @@ struct exception_table_entry { "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ - : "=r"(__pu_err) \ - : "r"(ptr), "r"(__val), "0"(__pu_err)); \ + : "+r"(__pu_err) \ + : "r"(ptr), "r"(__val)); \ } while (0) #endif /* !defined(CONFIG_64BIT) */ diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 237d20dd5622..286cec4d86d7 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -340,7 +340,7 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1", FIXUP_BRANCH_CLOBBER ); - return 0; + return ret; } static int emulate_std(struct pt_regs *regs, int frreg, int flop) { @@ -397,7 +397,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) __asm__ __volatile__ ( " mtsp %4, %%sr1\n" " zdep %2, 29, 2, %%r19\n" -" dep %%r0, 31, 2, %2\n" +" dep %%r0, 31, 2, %3\n" " mtsar %%r19\n" " zvdepi -2, 32, %%r19\n" "1: ldw 0(%%sr1,%3),%%r20\n" @@ -409,7 +409,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) " andcm %%r21, %%r19, %%r21\n" " or %1, %%r20, %1\n" " or %2, %%r21, %2\n" -"3: stw %1,0(%%sr1,%1)\n" +"3: stw %1,0(%%sr1,%3)\n" "4: stw %%r1,4(%%sr1,%3)\n" "5: stw %2,8(%%sr1,%3)\n" " copy %%r0, %0\n" @@ -596,7 +596,6 @@ void handle_unaligned(struct pt_regs *regs) ret = ERR_NOTHANDLED; /* "undefined", but lets kill them. */ break; } -#ifdef CONFIG_PA20 switch (regs->iir & OPCODE2_MASK) { case OPCODE_FLDD_L: @@ -607,22 +606,23 @@ void handle_unaligned(struct pt_regs *regs) flop=1; ret = emulate_std(regs, R2(regs->iir),1); break; +#ifdef CONFIG_PA20 case OPCODE_LDD_L: ret = emulate_ldd(regs, R2(regs->iir),0); break; case OPCODE_STD_L: ret = emulate_std(regs, R2(regs->iir),0); break; - } #endif + } switch (regs->iir & OPCODE3_MASK) { case OPCODE_FLDW_L: flop=1; - ret = emulate_ldw(regs, R2(regs->iir),0); + ret = emulate_ldw(regs, R2(regs->iir), 1); break; case OPCODE_LDW_M: - ret = emulate_ldw(regs, R2(regs->iir),1); + ret = emulate_ldw(regs, R2(regs->iir), 0); break; case OPCODE_FSTW_L: diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c index 367f6397bda7..860385058085 100644 --- a/arch/parisc/lib/iomap.c +++ b/arch/parisc/lib/iomap.c @@ -346,6 +346,16 @@ u64 ioread64be(const void __iomem *addr) return *((u64 *)addr); } +u64 ioread64_lo_hi(const void __iomem *addr) +{ + u32 low, high; + + low = ioread32(addr); + high = ioread32(addr + sizeof(u32)); + + return low + ((u64)high << 32); +} + u64 ioread64_hi_lo(const void __iomem *addr) { u32 low, high; @@ -419,6 +429,12 @@ void iowrite64be(u64 datum, void __iomem *addr) } } +void iowrite64_lo_hi(u64 val, void __iomem *addr) +{ + iowrite32(val, addr); + iowrite32(val >> 32, addr + sizeof(u32)); +} + void iowrite64_hi_lo(u64 val, void __iomem *addr) { iowrite32(val >> 32, addr + sizeof(u32)); @@ -530,6 +546,7 @@ EXPORT_SYMBOL(ioread32); EXPORT_SYMBOL(ioread32be); EXPORT_SYMBOL(ioread64); EXPORT_SYMBOL(ioread64be); +EXPORT_SYMBOL(ioread64_lo_hi); EXPORT_SYMBOL(ioread64_hi_lo); EXPORT_SYMBOL(iowrite8); EXPORT_SYMBOL(iowrite16); @@ -538,6 +555,7 @@ EXPORT_SYMBOL(iowrite32); EXPORT_SYMBOL(iowrite32be); EXPORT_SYMBOL(iowrite64); EXPORT_SYMBOL(iowrite64be); +EXPORT_SYMBOL(iowrite64_lo_hi); EXPORT_SYMBOL(iowrite64_hi_lo); EXPORT_SYMBOL(ioread8_rep); EXPORT_SYMBOL(ioread16_rep); diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 1ae31db9988f..1dc2e88e7b04 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -337,9 +337,9 @@ static void __init setup_bootmem(void) static bool kernel_set_to_readonly; -static void __init map_pages(unsigned long start_vaddr, - unsigned long start_paddr, unsigned long size, - pgprot_t pgprot, int force) +static void __ref map_pages(unsigned long start_vaddr, + unsigned long start_paddr, unsigned long size, + pgprot_t pgprot, int force) { pmd_t *pmd; pte_t *pg_table; @@ -449,7 +449,7 @@ void __init set_kernel_text_rw(int enable_read_write) flush_tlb_all(); } -void __ref free_initmem(void) +void free_initmem(void) { unsigned long init_begin = (unsigned long)__init_begin; unsigned long init_end = (unsigned long)__init_end; @@ -463,7 +463,6 @@ void __ref free_initmem(void) /* The init text pages are marked R-X. We have to * flush the icache and mark them RW- * - * This is tricky, because map_pages is in the init section. * Do a dummy remap of the data section first (the data * section is already PAGE_KERNEL) to pull in the TLB entries * for map_kernel */ diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index ba5b1becf518..006cbec70ffe 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -202,7 +202,6 @@ static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx) /* * The current system page and segment sizes */ -extern int mmu_linear_psize; extern int mmu_virtual_psize; extern int mmu_vmalloc_psize; extern int mmu_io_psize; @@ -213,6 +212,7 @@ extern int mmu_io_psize; #define mmu_virtual_psize MMU_PAGE_4K #endif #endif +extern int mmu_linear_psize; extern int mmu_vmemmap_psize; /* MMU initialization */ diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h index 7a90000f8d15..f83866a19e87 100644 --- a/arch/powerpc/include/asm/kexec_ranges.h +++ b/arch/powerpc/include/asm/kexec_ranges.h @@ -9,7 +9,7 @@ struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges); int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); int add_tce_mem_ranges(struct crash_mem **mem_ranges); int add_initrd_mem_range(struct crash_mem **mem_ranges); -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU int add_htab_mem_range(struct crash_mem **mem_ranges); #else static inline int add_htab_mem_range(struct crash_mem **mem_ranges) diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h index 4fe018cc207b..7b9dcd51af32 100644 --- a/arch/powerpc/include/asm/livepatch.h +++ b/arch/powerpc/include/asm/livepatch.h @@ -19,16 +19,6 @@ static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip) regs_set_return_ip(regs, ip); } -#define klp_get_ftrace_location klp_get_ftrace_location -static inline unsigned long klp_get_ftrace_location(unsigned long faddr) -{ - /* - * Live patch works only with -mprofile-kernel on PPC. In this case, - * the ftrace location is always within the first 16 bytes. - */ - return ftrace_location_range(faddr, faddr + 16); -} - static inline void klp_init_thread_info(struct task_struct *p) { /* + 1 to account for STACK_END_MAGIC */ diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index 160abcb8e9fa..ea0e487f87b1 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -9,7 +9,7 @@ long soft_nmi_interrupt(struct pt_regs *regs); static inline void arch_touch_nmi_watchdog(void) {} #endif -#if defined(CONFIG_NMI_IPI) && defined(CONFIG_STACKTRACE) +#ifdef CONFIG_NMI_IPI extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index fa84744d6b24..b876ef8c70a7 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -421,14 +421,14 @@ InstructionTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS -#ifdef CONFIG_MODULES +#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 #endif mfspr r2, SPRN_SDR1 li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER rlwinm r2, r2, 28, 0xfffff000 -#ifdef CONFIG_MODULES +#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 9a492fdec1df..7dae0b01abfb 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -105,6 +105,27 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) return addr; } +static bool arch_kprobe_on_func_entry(unsigned long offset) +{ +#ifdef PPC64_ELF_ABI_v2 +#ifdef CONFIG_KPROBES_ON_FTRACE + return offset <= 16; +#else + return offset <= 8; +#endif +#else + return !offset; +#endif +} + +/* XXX try and fold the magic of kprobe_lookup_name() in this */ +kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, + bool *on_func_entry) +{ + *on_func_entry = arch_kprobe_on_func_entry(offset); + return (kprobe_opcode_t *)(addr + offset); +} + void *alloc_insn_page(void) { void *page; @@ -218,19 +239,6 @@ static nokprobe_inline void set_current_kprobe(struct kprobe *p, struct pt_regs kcb->kprobe_saved_msr = regs->msr; } -bool arch_kprobe_on_func_entry(unsigned long offset) -{ -#ifdef PPC64_ELF_ABI_v2 -#ifdef CONFIG_KPROBES_ON_FTRACE - return offset <= 16; -#else - return offset <= 8; -#endif -#else - return !offset; -#endif -} - void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { ri->ret_addr = (kprobe_opcode_t *)regs->link; diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index a94b0cd0bdc5..bd3734d5be89 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -3264,12 +3264,14 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) case BARRIER_EIEIO: eieio(); break; +#ifdef CONFIG_PPC64 case BARRIER_LWSYNC: asm volatile("lwsync" : : : "memory"); break; case BARRIER_PTESYNC: asm volatile("ptesync" : : : "memory"); break; +#endif } break; diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas index b44d6ecdb46e..0aacd7052585 100644 --- a/arch/riscv/Kconfig.erratas +++ b/arch/riscv/Kconfig.erratas @@ -2,6 +2,7 @@ menu "CPU errata selection" config RISCV_ERRATA_ALTERNATIVE bool "RISC-V alternative scheme" + depends on !XIP_KERNEL default y help This Kconfig allows the kernel to automatically patch the diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 6ec44a22278a..c112ab2a9052 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -14,8 +14,8 @@ config SOC_SIFIVE select CLK_SIFIVE select CLK_SIFIVE_PRCI select SIFIVE_PLIC - select RISCV_ERRATA_ALTERNATIVE - select ERRATA_SIFIVE + select RISCV_ERRATA_ALTERNATIVE if !XIP_KERNEL + select ERRATA_SIFIVE if !XIP_KERNEL help This enables support for SiFive SoC platform hardware. diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 8a107ed18b0d..7d81102cffd4 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -50,6 +50,12 @@ riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c + +# Newer binutils versions default to ISA spec version 20191213 which moves some +# instructions from the I extension to the Zicsr and Zifencei extensions. +toolchain-need-zicsr-zifencei := $(call cc-option-yn, -march=$(riscv-march-y)_zicsr_zifencei) +riscv-march-$(toolchain-need-zicsr-zifencei) := $(riscv-march-y)_zicsr_zifencei + KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y)) KBUILD_AFLAGS += -march=$(riscv-march-y) diff --git a/arch/riscv/boot/dts/canaan/k210.dtsi b/arch/riscv/boot/dts/canaan/k210.dtsi index 56f57118c633..44d338514761 100644 --- a/arch/riscv/boot/dts/canaan/k210.dtsi +++ b/arch/riscv/boot/dts/canaan/k210.dtsi @@ -113,7 +113,8 @@ compatible = "canaan,k210-plic", "sifive,plic-1.0.0"; reg = <0xC000000 0x4000000>; interrupt-controller; - interrupts-extended = <&cpu0_intc 11>, <&cpu1_intc 11>; + interrupts-extended = <&cpu0_intc 11>, <&cpu0_intc 9>, + <&cpu1_intc 11>, <&cpu1_intc 9>; riscv,ndev = <65>; }; diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig index 2a82a3b2992b..af64b95e88cc 100644 --- a/arch/riscv/configs/nommu_k210_sdcard_defconfig +++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig @@ -23,7 +23,7 @@ CONFIG_SLOB=y CONFIG_SOC_CANAAN=y CONFIG_SMP=y CONFIG_NR_CPUS=2 -CONFIG_CMDLINE="earlycon console=ttySIF0 rootdelay=2 root=/dev/mmcblk0p1 ro" +CONFIG_CMDLINE="earlycon console=ttySIF0 root=/dev/mmcblk0p1 rootwait ro" CONFIG_CMDLINE_FORCE=y # CONFIG_SECCOMP is not set # CONFIG_STACKPROTECTOR is not set diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 160e3a1e8f8b..004372f8da54 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -119,7 +119,7 @@ extern phys_addr_t phys_ram_base; ((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size)) #define is_linear_mapping(x) \ - ((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < kernel_map.virt_addr)) + ((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < PAGE_OFFSET + KERN_VIRT_SIZE)) #define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset)) #define kernel_mapping_pa_to_va(y) ({ \ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 7e949f25c933..e3549e50de95 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -13,6 +13,7 @@ #ifndef CONFIG_MMU #define KERNEL_LINK_ADDR PAGE_OFFSET +#define KERN_VIRT_SIZE (UL(-1)) #else #define ADDRESS_SPACE_END (UL(-1)) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 612556faa527..ffc87e76b1dd 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -51,6 +51,8 @@ obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o +obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o + obj-$(CONFIG_RISCV_BASE_PMU) += perf_event.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c index be7f05b542bb..f7a832e3a1d1 100644 --- a/arch/riscv/kernel/cpu-hotplug.c +++ b/arch/riscv/kernel/cpu-hotplug.c @@ -12,6 +12,7 @@ #include <linux/sched/hotplug.h> #include <asm/irq.h> #include <asm/cpu_ops.h> +#include <asm/numa.h> #include <asm/sbi.h> bool cpu_has_hotplug(unsigned int cpu) @@ -40,6 +41,7 @@ int __cpu_disable(void) return ret; remove_cpu_topology(cpu); + numa_remove_cpu(cpu); set_cpu_online(cpu, false); irq_migrate_all_off_this_cpu(); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index ed29e9c8f660..d6a46ed0bf05 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -108,7 +108,7 @@ _save_context: .option pop #ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_off + call __trace_hardirqs_off #endif #ifdef CONFIG_CONTEXT_TRACKING @@ -143,7 +143,7 @@ skip_context_tracking: li t0, EXC_BREAKPOINT beq s4, t0, 1f #ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_on + call __trace_hardirqs_on #endif csrs CSR_STATUS, SR_IE @@ -234,7 +234,7 @@ ret_from_exception: REG_L s0, PT_STATUS(sp) csrc CSR_STATUS, SR_IE #ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_off + call __trace_hardirqs_off #endif #ifdef CONFIG_RISCV_M_MODE /* the MPP value is too large to be used as an immediate arg for addi */ @@ -270,10 +270,10 @@ restore_all: REG_L s1, PT_STATUS(sp) andi t0, s1, SR_PIE beqz t0, 1f - call trace_hardirqs_on + call __trace_hardirqs_on j 2f 1: - call trace_hardirqs_off + call __trace_hardirqs_off 2: #endif REG_L a0, PT_STATUS(sp) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 2363b43312fc..ec07f991866a 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -22,14 +22,13 @@ add \reg, \reg, t0 .endm .macro XIP_FIXUP_FLASH_OFFSET reg - la t1, __data_loc - li t0, XIP_OFFSET_MASK - and t1, t1, t0 - li t1, XIP_OFFSET - sub t0, t0, t1 - sub \reg, \reg, t0 + la t0, __data_loc + REG_L t1, _xip_phys_offset + sub \reg, \reg, t1 + add \reg, \reg, t0 .endm _xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET +_xip_phys_offset: .dword CONFIG_XIP_PHYS_ADDR + XIP_OFFSET #else .macro XIP_FIXUP_OFFSET reg .endm diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 68a9e3d1fe16..4a48287513c3 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -13,6 +13,19 @@ #include <linux/pgtable.h> #include <asm/sections.h> +/* + * The auipc+jalr instruction pair can reach any PC-relative offset + * in the range [-2^31 - 2^11, 2^31 - 2^11) + */ +static bool riscv_insn_valid_32bit_offset(ptrdiff_t val) +{ +#ifdef CONFIG_32BIT + return true; +#else + return (-(1L << 31) - (1L << 11)) <= val && val < ((1L << 31) - (1L << 11)); +#endif +} + static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v) { if (v != (u32)v) { @@ -95,7 +108,7 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, ptrdiff_t offset = (void *)v - (void *)location; s32 hi20; - if (offset != (s32)offset) { + if (!riscv_insn_valid_32bit_offset(offset)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", me->name, (long long)v, location); @@ -197,10 +210,9 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, Elf_Addr v) { ptrdiff_t offset = (void *)v - (void *)location; - s32 fill_v = offset; u32 hi20, lo12; - if (offset != fill_v) { + if (!riscv_insn_valid_32bit_offset(offset)) { /* Only emit the plt entry if offset over 32-bit range */ if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { offset = module_emit_plt_entry(me, v); @@ -224,10 +236,9 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location, Elf_Addr v) { ptrdiff_t offset = (void *)v - (void *)location; - s32 fill_v = offset; u32 hi20, lo12; - if (offset != fill_v) { + if (!riscv_insn_valid_32bit_offset(offset)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", me->name, (long long)v, location); diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index f72527fcb347..775d3322b422 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -5,6 +5,7 @@ * Copyright (c) 2020 Western Digital Corporation or its affiliates. */ +#include <linux/bits.h> #include <linux/init.h> #include <linux/pm.h> #include <linux/reboot.h> @@ -85,7 +86,7 @@ static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mas pr_warn("Unable to send any request to hartid > BITS_PER_LONG for SBI v0.1\n"); break; } - hmask |= 1 << hartid; + hmask |= BIT(hartid); } return hmask; @@ -160,7 +161,7 @@ static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask) { unsigned long hart_mask; - if (!cpu_mask) + if (!cpu_mask || cpumask_empty(cpu_mask)) cpu_mask = cpu_online_mask; hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask); @@ -176,7 +177,7 @@ static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask, int result = 0; unsigned long hart_mask; - if (!cpu_mask) + if (!cpu_mask || cpumask_empty(cpu_mask)) cpu_mask = cpu_online_mask; hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask); @@ -249,26 +250,37 @@ static void __sbi_set_timer_v02(uint64_t stime_value) static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask) { - unsigned long hartid, cpuid, hmask = 0, hbase = 0; + unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0; struct sbiret ret = {0}; int result; - if (!cpu_mask) + if (!cpu_mask || cpumask_empty(cpu_mask)) cpu_mask = cpu_online_mask; for_each_cpu(cpuid, cpu_mask) { hartid = cpuid_to_hartid_map(cpuid); - if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) { - ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI, - hmask, hbase, 0, 0, 0, 0); - if (ret.error) - goto ecall_failed; - hmask = 0; - hbase = 0; + if (hmask) { + if (hartid + BITS_PER_LONG <= htop || + hbase + BITS_PER_LONG <= hartid) { + ret = sbi_ecall(SBI_EXT_IPI, + SBI_EXT_IPI_SEND_IPI, hmask, + hbase, 0, 0, 0, 0); + if (ret.error) + goto ecall_failed; + hmask = 0; + } else if (hartid < hbase) { + /* shift the mask to fit lower hartid */ + hmask <<= hbase - hartid; + hbase = hartid; + } } - if (!hmask) + if (!hmask) { hbase = hartid; - hmask |= 1UL << (hartid - hbase); + htop = hartid; + } else if (hartid > htop) { + htop = hartid; + } + hmask |= BIT(hartid - hbase); } if (hmask) { @@ -344,25 +356,35 @@ static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long arg4, unsigned long arg5) { - unsigned long hartid, cpuid, hmask = 0, hbase = 0; + unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0; int result; - if (!cpu_mask) + if (!cpu_mask || cpumask_empty(cpu_mask)) cpu_mask = cpu_online_mask; for_each_cpu(cpuid, cpu_mask) { hartid = cpuid_to_hartid_map(cpuid); - if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) { - result = __sbi_rfence_v02_call(fid, hmask, hbase, - start, size, arg4, arg5); - if (result) - return result; - hmask = 0; - hbase = 0; + if (hmask) { + if (hartid + BITS_PER_LONG <= htop || + hbase + BITS_PER_LONG <= hartid) { + result = __sbi_rfence_v02_call(fid, hmask, + hbase, start, size, arg4, arg5); + if (result) + return result; + hmask = 0; + } else if (hartid < hbase) { + /* shift the mask to fit lower hartid */ + hmask <<= hbase - hartid; + hbase = hartid; + } } - if (!hmask) + if (!hmask) { hbase = hartid; - hmask |= 1UL << (hartid - hbase); + htop = hartid; + } else if (hartid > htop) { + htop = hartid; + } + hmask |= BIT(hartid - hbase); } if (hmask) { diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 201ee206fb57..14d2b53ec322 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -22,15 +22,16 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) { unsigned long fp, sp, pc; + int level = 0; if (regs) { fp = frame_pointer(regs); sp = user_stack_pointer(regs); pc = instruction_pointer(regs); } else if (task == NULL || task == current) { - fp = (unsigned long)__builtin_frame_address(1); - sp = (unsigned long)__builtin_frame_address(0); - pc = (unsigned long)__builtin_return_address(0); + fp = (unsigned long)__builtin_frame_address(0); + sp = sp_in_global; + pc = (unsigned long)walk_stackframe; } else { /* task blocked in __switch_to */ fp = task->thread.s[0]; @@ -42,7 +43,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, unsigned long low, high; struct stackframe *frame; - if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) + if (unlikely(!__kernel_text_address(pc) || (level++ >= 1 && !fn(arg, pc)))) break; /* Validate frame pointer */ diff --git a/arch/riscv/kernel/trace_irq.c b/arch/riscv/kernel/trace_irq.c new file mode 100644 index 000000000000..095ac976d7da --- /dev/null +++ b/arch/riscv/kernel/trace_irq.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com> + */ + +#include <linux/irqflags.h> +#include <linux/kprobes.h> +#include "trace_irq.h" + +/* + * trace_hardirqs_on/off require the caller to setup frame pointer properly. + * Otherwise, CALLER_ADDR1 might trigger an pagging exception in kernel. + * Here we add one extra level so they can be safely called by low + * level entry code which $fp is used for other purpose. + */ + +void __trace_hardirqs_on(void) +{ + trace_hardirqs_on(); +} +NOKPROBE_SYMBOL(__trace_hardirqs_on); + +void __trace_hardirqs_off(void) +{ + trace_hardirqs_off(); +} +NOKPROBE_SYMBOL(__trace_hardirqs_off); diff --git a/arch/riscv/kernel/trace_irq.h b/arch/riscv/kernel/trace_irq.h new file mode 100644 index 000000000000..99fe67377e5e --- /dev/null +++ b/arch/riscv/kernel/trace_irq.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com> + */ +#ifndef __TRACE_IRQ_H +#define __TRACE_IRQ_H + +void __trace_hardirqs_on(void); +void __trace_hardirqs_off(void); + +#endif /* __TRACE_IRQ_H */ diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 0c5239e05721..624166004e36 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -90,6 +90,7 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *cntx; + struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr; /* Mark this VCPU never ran */ vcpu->arch.ran_atleast_once = false; @@ -106,6 +107,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) cntx->hstatus |= HSTATUS_SPVP; cntx->hstatus |= HSTATUS_SPV; + /* By default, make CY, TM, and IR counters accessible in VU mode */ + reset_csr->scounteren = 0x7; + /* Setup VCPU timer */ kvm_riscv_vcpu_timer_init(vcpu); @@ -699,6 +703,20 @@ static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu) csr_write(CSR_HVIP, csr->hvip); } +/* + * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while + * the vCPU is running. + * + * This must be noinstr as instrumentation may make use of RCU, and this is not + * safe during the EQS. + */ +static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu) +{ + guest_state_enter_irqoff(); + __kvm_riscv_switch_to(&vcpu->arch); + guest_state_exit_irqoff(); +} + int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { int ret; @@ -790,9 +808,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) continue; } - guest_enter_irqoff(); + guest_timing_enter_irqoff(); - __kvm_riscv_switch_to(&vcpu->arch); + kvm_riscv_vcpu_enter_exit(vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->stat.exits++; @@ -812,25 +830,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_riscv_vcpu_sync_interrupts(vcpu); /* - * We may have taken a host interrupt in VS/VU-mode (i.e. - * while executing the guest). This interrupt is still - * pending, as we haven't serviced it yet! + * We must ensure that any pending interrupts are taken before + * we exit guest timing so that timer ticks are accounted as + * guest time. Transiently unmask interrupts so that any + * pending interrupts are taken. * - * We're now back in HS-mode with interrupts disabled - * so enabling the interrupts now will have the effect - * of taking the interrupt again, in HS-mode this time. + * There's no barrier which ensures that pending interrupts are + * recognised, so we just hope that the CPU takes any pending + * interrupts between the enable and disable. */ local_irq_enable(); + local_irq_disable(); - /* - * We do local_irq_enable() before calling guest_exit() so - * that if a timer interrupt hits while running the guest - * we account that tick as being spent in the guest. We - * enable preemption after calling guest_exit() so that if - * we get preempted we make sure ticks after that is not - * counted as guest time. - */ - guest_exit(); + guest_timing_exit_irqoff(); + + local_irq_enable(); preempt_enable(); diff --git a/arch/riscv/kvm/vcpu_sbi_base.c b/arch/riscv/kvm/vcpu_sbi_base.c index 4ecf377f483b..48f431091cdb 100644 --- a/arch/riscv/kvm/vcpu_sbi_base.c +++ b/arch/riscv/kvm/vcpu_sbi_base.c @@ -9,6 +9,7 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/kvm_host.h> +#include <linux/version.h> #include <asm/csr.h> #include <asm/sbi.h> #include <asm/kvm_vcpu_timer.h> @@ -32,7 +33,7 @@ static int kvm_sbi_ext_base_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, *out_val = KVM_SBI_IMPID; break; case SBI_EXT_BASE_GET_IMP_VERSION: - *out_val = 0; + *out_val = LINUX_VERSION_CODE; break; case SBI_EXT_BASE_PROBE_EXT: if ((cp->a0 >= SBI_EXT_EXPERIMENTAL_START && diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 7ebaef10ea1b..ac7a25298a04 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -24,6 +24,9 @@ obj-$(CONFIG_KASAN) += kasan_init.o ifdef CONFIG_KASAN KASAN_SANITIZE_kasan_init.o := n KASAN_SANITIZE_init.o := n +ifdef CONFIG_DEBUG_VIRTUAL +KASAN_SANITIZE_physaddr.o := n +endif endif obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c index 05978f78579f..35484d830fd6 100644 --- a/arch/riscv/mm/extable.c +++ b/arch/riscv/mm/extable.c @@ -33,7 +33,7 @@ static inline void regs_set_gpr(struct pt_regs *regs, unsigned int offset, if (unlikely(offset > MAX_REG_OFFSET)) return; - if (!offset) + if (offset) *(unsigned long *)((unsigned long)regs + offset) = val; } @@ -43,8 +43,8 @@ static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex, int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); int reg_zero = FIELD_GET(EX_DATA_REG_ZERO, ex->data); - regs_set_gpr(regs, reg_err, -EFAULT); - regs_set_gpr(regs, reg_zero, 0); + regs_set_gpr(regs, reg_err * sizeof(unsigned long), -EFAULT); + regs_set_gpr(regs, reg_zero * sizeof(unsigned long), 0); regs->epc = get_ex_fixup(ex); return true; diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index cf4d018b7d66..0d588032d6e6 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -125,7 +125,6 @@ void __init mem_init(void) else swiotlb_force = SWIOTLB_NO_FORCE; #endif - high_memory = (void *)(__va(PFN_PHYS(max_low_pfn))); memblock_free_all(); print_vm_layout(); @@ -195,6 +194,7 @@ static void __init setup_bootmem(void) min_low_pfn = PFN_UP(phys_ram_base); max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); + high_memory = (void *)(__va(PFN_PHYS(max_low_pfn))); dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); @@ -232,6 +232,7 @@ static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAG #ifdef CONFIG_XIP_KERNEL #define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops)) +#define riscv_pfn_base (*(unsigned long *)XIP_FIXUP(&riscv_pfn_base)) #define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir)) #define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte)) #define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir)) @@ -522,6 +523,7 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) } #ifdef CONFIG_XIP_KERNEL +#define phys_ram_base (*(phys_addr_t *)XIP_FIXUP(&phys_ram_base)) extern char _xiprom[], _exiprom[], __data_loc; /* called from head.S with MMU off */ diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index f61f7ca6fe0f..cd1a145257b7 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -113,8 +113,11 @@ static void __init kasan_populate_pud(pgd_t *pgd, base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd))); } else { base_pud = (pud_t *)pgd_page_vaddr(*pgd); - if (base_pud == lm_alias(kasan_early_shadow_pud)) + if (base_pud == lm_alias(kasan_early_shadow_pud)) { base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE); + memcpy(base_pud, (void *)kasan_early_shadow_pud, + sizeof(pud_t) * PTRS_PER_PUD); + } } pudp = base_pud + pud_index(vaddr); @@ -202,8 +205,7 @@ asmlinkage void __init kasan_early_init(void) for (i = 0; i < PTRS_PER_PTE; ++i) set_pte(kasan_early_shadow_pte + i, - mk_pte(virt_to_page(kasan_early_shadow_page), - PAGE_KERNEL)); + pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL)); for (i = 0; i < PTRS_PER_PMD; ++i) set_pmd(kasan_early_shadow_pmd + i, diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c index e7fd0c253c7b..19cf25a74ee2 100644 --- a/arch/riscv/mm/physaddr.c +++ b/arch/riscv/mm/physaddr.c @@ -8,12 +8,10 @@ phys_addr_t __virt_to_phys(unsigned long x) { - phys_addr_t y = x - PAGE_OFFSET; - /* * Boundary checking aginst the kernel linear mapping space. */ - WARN(y >= KERN_VIRT_SIZE, + WARN(!is_linear_mapping(x) && !is_kernel_mapping(x), "virt_to_phys used for non-linear address: %pK (%pS)\n", (void *)x, (void *)x); diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h index 16dc57dd90b3..8511f0e59290 100644 --- a/arch/s390/include/asm/extable.h +++ b/arch/s390/include/asm/extable.h @@ -69,8 +69,13 @@ static inline void swap_ex_entry_fixup(struct exception_table_entry *a, { a->fixup = b->fixup + delta; b->fixup = tmp.fixup - delta; - a->handler = b->handler + delta; - b->handler = tmp.handler - delta; + a->handler = b->handler; + if (a->handler) + a->handler += delta; + b->handler = tmp.handler; + if (b->handler) + b->handler -= delta; } +#define swap_ex_entry_fixup swap_ex_entry_fixup #endif diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 267f70f4393f..6f80ec9c04be 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -47,15 +47,17 @@ struct ftrace_regs { static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { - return &fregs->regs; + struct pt_regs *regs = &fregs->regs; + + if (test_pt_regs_flag(regs, PIF_FTRACE_FULL_REGS)) + return regs; + return NULL; } static __always_inline void ftrace_instruction_pointer_set(struct ftrace_regs *fregs, unsigned long ip) { - struct pt_regs *regs = arch_ftrace_get_regs(fregs); - - regs->psw.addr = ip; + fregs->regs.psw.addr = ip; } /* diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 4ffa8e7f0ed3..ddb70fb13fbc 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -15,11 +15,13 @@ #define PIF_EXECVE_PGSTE_RESTART 1 /* restart execve for PGSTE binaries */ #define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */ #define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */ +#define PIF_FTRACE_FULL_REGS 4 /* all register contents valid (ftrace) */ #define _PIF_SYSCALL BIT(PIF_SYSCALL) #define _PIF_EXECVE_PGSTE_RESTART BIT(PIF_EXECVE_PGSTE_RESTART) #define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET) #define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT) +#define _PIF_FTRACE_FULL_REGS BIT(PIF_FTRACE_FULL_REGS) #ifndef __ASSEMBLY__ diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 21d62d8b6b9a..89c0870d5679 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -159,9 +159,38 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) return 0; } +static struct ftrace_hotpatch_trampoline *ftrace_get_trampoline(struct dyn_ftrace *rec) +{ + struct ftrace_hotpatch_trampoline *trampoline; + struct ftrace_insn insn; + s64 disp; + u16 opc; + + if (copy_from_kernel_nofault(&insn, (void *)rec->ip, sizeof(insn))) + return ERR_PTR(-EFAULT); + disp = (s64)insn.disp * 2; + trampoline = (void *)(rec->ip + disp); + if (get_kernel_nofault(opc, &trampoline->brasl_opc)) + return ERR_PTR(-EFAULT); + if (opc != 0xc015) + return ERR_PTR(-EINVAL); + return trampoline; +} + int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { + struct ftrace_hotpatch_trampoline *trampoline; + u64 old; + + trampoline = ftrace_get_trampoline(rec); + if (IS_ERR(trampoline)) + return PTR_ERR(trampoline); + if (get_kernel_nofault(old, &trampoline->interceptor)) + return -EFAULT; + if (old != old_addr) + return -EINVAL; + s390_kernel_write(&trampoline->interceptor, &addr, sizeof(addr)); return 0; } @@ -188,6 +217,12 @@ static void brcl_enable(void *brcl) int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { + struct ftrace_hotpatch_trampoline *trampoline; + + trampoline = ftrace_get_trampoline(rec); + if (IS_ERR(trampoline)) + return PTR_ERR(trampoline); + s390_kernel_write(&trampoline->interceptor, &addr, sizeof(addr)); brcl_enable((void *)rec->ip); return 0; } @@ -291,7 +326,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, regs = ftrace_get_regs(fregs); p = get_kprobe((kprobe_opcode_t *)ip); - if (unlikely(!p) || kprobe_disabled(p)) + if (!regs || unlikely(!p) || kprobe_disabled(p)) goto out; if (kprobe_running()) { diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 39bcc0e39a10..a24177dcd12a 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -27,6 +27,7 @@ ENDPROC(ftrace_stub) #define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) #define STACK_PTREGS_ORIG_GPR2 (STACK_PTREGS + __PT_ORIG_GPR2) +#define STACK_PTREGS_FLAGS (STACK_PTREGS + __PT_FLAGS) #ifdef __PACK_STACK /* allocate just enough for r14, r15 and backchain */ #define TRACED_FUNC_FRAME_SIZE 24 @@ -57,6 +58,14 @@ ENDPROC(ftrace_stub) .if \allregs == 1 stg %r14,(STACK_PTREGS_PSW)(%r15) stosm (STACK_PTREGS_PSW)(%r15),0 +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + mvghi STACK_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS +#else + lghi %r14,_PIF_FTRACE_FULL_REGS + stg %r14,STACK_PTREGS_FLAGS(%r15) +#endif + .else + xc STACK_PTREGS_FLAGS(8,%r15),STACK_PTREGS_FLAGS(%r15) .endif lg %r14,(__SF_GPRS+8*8)(%r1) # restore original return address diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f2c25d113e7b..05327be3a982 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -800,6 +800,8 @@ static void __init check_initrd(void) static void __init reserve_kernel(void) { memblock_reserve(0, STARTUP_NORMAL_OFFSET); + memblock_reserve(OLDMEM_BASE, sizeof(unsigned long)); + memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long)); memblock_reserve(__amode31_base, __eamode31 - __samode31); memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP); memblock_reserve(__pa(_stext), _end - _stext); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 577f1ead6a51..2296b1ff1e02 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4667,6 +4667,8 @@ static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu, return -EINVAL; if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block)) return -E2BIG; + if (!kvm_s390_pv_cpu_is_protected(vcpu)) + return -EINVAL; switch (mop->op) { case KVM_S390_MEMOP_SIDA_READ: diff --git a/arch/s390/lib/test_modules.c b/arch/s390/lib/test_modules.c index d056baa8fbb0..9894009fc1f2 100644 --- a/arch/s390/lib/test_modules.c +++ b/arch/s390/lib/test_modules.c @@ -5,9 +5,6 @@ #include "test_modules.h" -#define DECLARE_RETURN(i) int test_modules_return_ ## i(void) -REPEAT_10000(DECLARE_RETURN); - /* * Test that modules with many relocations are loaded properly. */ diff --git a/arch/s390/lib/test_modules.h b/arch/s390/lib/test_modules.h index 43b5e4b4af3e..6371fcf17684 100644 --- a/arch/s390/lib/test_modules.h +++ b/arch/s390/lib/test_modules.h @@ -47,4 +47,7 @@ __REPEAT_10000_1(f, 8); \ __REPEAT_10000_1(f, 9) +#define DECLARE_RETURN(i) int test_modules_return_ ## i(void) +REPEAT_10000(DECLARE_RETURN); + #endif diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index abceeabe29b9..0760e24f2eba 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -424,6 +424,10 @@ void __init check_bugs(void) os_check_bugs(); } +void apply_ibt_endbr(s32 *start, s32 *end) +{ +} + void apply_retpolines(s32 *start, s32 *end) { } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9f5bd41bf660..0f0672d2c816 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1861,6 +1861,33 @@ config X86_UMIP specific cases in protected and virtual-8086 modes. Emulated results are dummy. +config CC_HAS_IBT + # GCC >= 9 and binutils >= 2.29 + # Retpoline check to work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93654 + # Clang/LLVM >= 14 + # fentry check to work around https://reviews.llvm.org/D111108 + def_bool ((CC_IS_GCC && $(cc-option, -fcf-protection=branch -mindirect-branch-register)) || \ + (CC_IS_CLANG && $(success,echo "void a(void) {}" | $(CC) -Werror $(CLANG_FLAGS) -fcf-protection=branch -mfentry -pg -x c - -c -o /dev/null))) && \ + $(as-instr,endbr64) + +config X86_KERNEL_IBT + prompt "Indirect Branch Tracking" + bool + depends on X86_64 && CC_HAS_IBT && STACK_VALIDATION + help + Build the kernel with support for Indirect Branch Tracking, a + hardware support course-grain forward-edge Control Flow Integrity + protection. It enforces that all indirect calls must land on + an ENDBR instruction, as such, the compiler will instrument the + code with them to make this happen. + + In addition to building the kernel with IBT, seal all functions that + are not indirect call targets, avoiding them ever becomming one. + + This requires LTO like objtool runs and will slow down the build. It + does significantly reduce the number of ENDBR instructions in the + kernel image. + config X86_INTEL_MEMORY_PROTECTION_KEYS prompt "Memory Protection Keys" def_bool y @@ -2834,19 +2861,20 @@ config IA32_AOUT help Support old a.out binaries in the 32bit emulation. -config X86_X32 +config X86_X32_ABI bool "x32 ABI for 64-bit mode" depends on X86_64 + # llvm-objcopy does not convert x86_64 .note.gnu.property or + # compressed debug sections to x86_x32 properly: + # https://github.com/ClangBuiltLinux/linux/issues/514 + # https://github.com/ClangBuiltLinux/linux/issues/1141 + depends on $(success,$(OBJCOPY) --version | head -n1 | grep -qv llvm) help Include code to run binaries for the x32 native 32-bit ABI for 64-bit processors. An x32 process gets access to the full 64-bit register file and wide data path while leaving pointers at 32 bits for smaller memory footprint. - You will need a recent binutils (2.22 or later) with - elf32_x86_64 support enabled to compile a kernel with this - option set. - config COMPAT_32 def_bool y depends on IA32_EMULATION || X86_32 @@ -2855,7 +2883,7 @@ config COMPAT_32 config COMPAT def_bool y - depends on IA32_EMULATION || X86_X32 + depends on IA32_EMULATION || X86_X32_ABI if COMPAT config COMPAT_FOR_U64_ALIGNMENT diff --git a/arch/x86/Makefile b/arch/x86/Makefile index e84cdd409b64..63d50f65b828 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -36,7 +36,7 @@ endif # How to compile the 16-bit code. Note we always compile for -march=i386; # that way we can complain to the user if the CPU is insufficient. -REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING \ +REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING -D__DISABLE_EXPORTS \ -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none) @@ -62,8 +62,20 @@ export BITS # KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -# Intel CET isn't enabled in the kernel +ifeq ($(CONFIG_X86_KERNEL_IBT),y) +# +# Kernel IBT has S_CET.NOTRACK_EN=0, as such the compilers must not generate +# NOTRACK prefixes. Current generation compilers unconditionally employ NOTRACK +# for jump-tables, as such, disable jump-tables for now. +# +# (jump-tables are implicitly disabled by RETPOLINE) +# +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104816 +# +KBUILD_CFLAGS += $(call cc-option,-fcf-protection=branch -fno-jump-tables) +else KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) +endif ifeq ($(CONFIG_X86_32),y) BITS := 32 @@ -140,22 +152,6 @@ else KBUILD_CFLAGS += -mcmodel=kernel endif -ifdef CONFIG_X86_X32 - x32_ld_ok := $(call try-run,\ - /bin/echo -e '1: .quad 1b' | \ - $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" - && \ - $(OBJCOPY) -O elf32-x86-64 "$$TMP" "$$TMP.o" && \ - $(LD) -m elf32_x86_64 "$$TMP.o" -o "$$TMP",y,n) - ifeq ($(x32_ld_ok),y) - CONFIG_X86_X32_ABI := y - KBUILD_AFLAGS += -DCONFIG_X86_X32_ABI - KBUILD_CFLAGS += -DCONFIG_X86_X32_ABI - else - $(warning CONFIG_X86_X32 enabled but no binutils support) - endif -endif -export CONFIG_X86_X32_ABI - # # If the function graph tracer is used with mcount instead of fentry, # '-maccumulate-outgoing-args' is needed to prevent a GCC bug diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 659fad53ca82..3b354eb9516d 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -152,14 +152,13 @@ SYM_FUNC_END(startup_32) #ifdef CONFIG_EFI_STUB SYM_FUNC_START(efi32_stub_entry) -SYM_FUNC_START_ALIAS(efi_stub_entry) add $0x4, %esp movl 8(%esp), %esi /* save boot_params pointer */ call efi_main /* efi_main returns the possibly relocated address of startup_32 */ jmp *%eax SYM_FUNC_END(efi32_stub_entry) -SYM_FUNC_END_ALIAS(efi_stub_entry) +SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry) #endif .text diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index fd9441f40457..dea95301196b 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -535,7 +535,6 @@ SYM_CODE_END(startup_64) #ifdef CONFIG_EFI_STUB .org 0x390 SYM_FUNC_START(efi64_stub_entry) -SYM_FUNC_START_ALIAS(efi_stub_entry) and $~0xf, %rsp /* realign the stack */ movq %rdx, %rbx /* save boot_params pointer */ call efi_main @@ -543,7 +542,7 @@ SYM_FUNC_START_ALIAS(efi_stub_entry) leaq rva(startup_64)(%rax), %rax jmp *%rax SYM_FUNC_END(efi64_stub_entry) -SYM_FUNC_END_ALIAS(efi_stub_entry) +SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry) #endif .text diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 363699dd7220..837c1e0aa021 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -1751,8 +1751,6 @@ SYM_FUNC_END(aesni_gcm_finalize) #endif - -SYM_FUNC_START_LOCAL_ALIAS(_key_expansion_128) SYM_FUNC_START_LOCAL(_key_expansion_256a) pshufd $0b11111111, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 @@ -1764,7 +1762,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256a) add $0x10, TKEYP RET SYM_FUNC_END(_key_expansion_256a) -SYM_FUNC_END_ALIAS(_key_expansion_128) +SYM_FUNC_ALIAS_LOCAL(_key_expansion_128, _key_expansion_256a) SYM_FUNC_START_LOCAL(_key_expansion_192a) pshufd $0b01010101, %xmm1, %xmm1 diff --git a/arch/x86/crypto/blake2s-shash.c b/arch/x86/crypto/blake2s-shash.c index f9e2fecdb761..59ae28abe35c 100644 --- a/arch/x86/crypto/blake2s-shash.c +++ b/arch/x86/crypto/blake2s-shash.c @@ -18,12 +18,12 @@ static int crypto_blake2s_update_x86(struct shash_desc *desc, const u8 *in, unsigned int inlen) { - return crypto_blake2s_update(desc, in, inlen, blake2s_compress); + return crypto_blake2s_update(desc, in, inlen, false); } static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out) { - return crypto_blake2s_final(desc, out, blake2s_compress); + return crypto_blake2s_final(desc, out, false); } #define BLAKE2S_ALG(name, driver_name, digest_size) \ diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index 80c0d22fc42c..ec35915f0901 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -195,6 +195,7 @@ crc_array: .altmacro LABEL crc_ %i .noaltmacro + ENDBR crc32q -i*8(block_0), crc_init crc32q -i*8(block_1), crc1 crc32q -i*8(block_2), crc2 @@ -204,6 +205,7 @@ LABEL crc_ %i .altmacro LABEL crc_ %i .noaltmacro + ENDBR crc32q -i*8(block_0), crc_init crc32q -i*8(block_1), crc1 # SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet @@ -237,6 +239,7 @@ LABEL crc_ %i ################################################################ LABEL crc_ 0 + ENDBR mov tmp, len cmp $128*24, tmp jae full_block diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 466df3e50276..4faac48ebec5 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -86,6 +86,7 @@ SYM_CODE_START(entry_SYSCALL_64) UNWIND_HINT_EMPTY + ENDBR swapgs /* tss.sp2 is scratch space. */ @@ -94,6 +95,7 @@ SYM_CODE_START(entry_SYSCALL_64) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR /* Construct struct pt_regs on stack */ pushq $__USER_DS /* pt_regs->ss */ @@ -276,6 +278,7 @@ SYM_FUNC_END(__switch_to_asm) .pushsection .text, "ax" SYM_CODE_START(ret_from_fork) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR // copy_thread movq %rax, %rdi call schedule_tail /* rdi: 'prev' task parameter */ @@ -350,6 +353,7 @@ SYM_CODE_END(ret_from_fork) .macro idtentry vector asmsym cfunc has_error_code:req SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 + ENDBR ASM_CLAC .if \has_error_code == 0 @@ -417,6 +421,7 @@ SYM_CODE_END(\asmsym) .macro idtentry_mce_db vector asmsym cfunc SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS + ENDBR ASM_CLAC pushq $-1 /* ORIG_RAX: no syscall to restart */ @@ -472,6 +477,7 @@ SYM_CODE_END(\asmsym) .macro idtentry_vc vector asmsym cfunc SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS + ENDBR ASM_CLAC /* @@ -533,6 +539,7 @@ SYM_CODE_END(\asmsym) .macro idtentry_df vector asmsym cfunc SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS offset=8 + ENDBR ASM_CLAC /* paranoid_entry returns GS information for paranoid_exit in EBX. */ @@ -544,6 +551,9 @@ SYM_CODE_START(\asmsym) movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ call \cfunc + /* For some configurations \cfunc ends up being a noreturn. */ + REACHABLE + jmp paranoid_exit _ASM_NOKPROBE(\asmsym) @@ -564,6 +574,7 @@ __irqentry_text_start: .align 16 .globl __irqentry_text_end __irqentry_text_end: + ANNOTATE_NOENDBR SYM_CODE_START_LOCAL(common_interrupt_return) SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) @@ -608,8 +619,8 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) /* Restore RDI. */ popq %rdi - SWAPGS - INTERRUPT_RETURN + swapgs + jmp .Lnative_iret SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL) @@ -626,9 +637,14 @@ SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL) * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization * when returning from IPI handler. */ - INTERRUPT_RETURN +#ifdef CONFIG_XEN_PV +SYM_INNER_LABEL(early_xen_iret_patch, SYM_L_GLOBAL) + ANNOTATE_NOENDBR + .byte 0xe9 + .long .Lnative_iret - (. + 4) +#endif -SYM_INNER_LABEL_ALIGN(native_iret, SYM_L_GLOBAL) +.Lnative_iret: UNWIND_HINT_IRET_REGS /* * Are we returning to a stack segment from the LDT? Note: in @@ -640,6 +656,7 @@ SYM_INNER_LABEL_ALIGN(native_iret, SYM_L_GLOBAL) #endif SYM_INNER_LABEL(native_irq_return_iret, SYM_L_GLOBAL) + ANNOTATE_NOENDBR // exc_double_fault /* * This may fault. Non-paranoid faults on return to userspace are * handled by fixup_bad_iret. These include #SS, #GP, and #NP. @@ -734,6 +751,7 @@ SYM_FUNC_START(asm_load_gs_index) FRAME_BEGIN swapgs .Lgs_change: + ANNOTATE_NOENDBR // error_entry movl %edi, %gs 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE swapgs @@ -804,6 +822,7 @@ SYM_CODE_END(exc_xen_hypervisor_callback) */ SYM_CODE_START(xen_failsafe_callback) UNWIND_HINT_EMPTY + ENDBR movl %ds, %ecx cmpw %cx, 0x10(%rsp) jne 1f @@ -1063,6 +1082,7 @@ SYM_CODE_END(error_return) */ SYM_CODE_START(asm_exc_nmi) UNWIND_HINT_IRET_REGS + ENDBR /* * We allow breakpoints in NMIs. If a breakpoint occurs, then @@ -1310,6 +1330,7 @@ first_nmi: #endif repeat_nmi: + ANNOTATE_NOENDBR // this code /* * If there was a nested NMI, the first NMI's iret will return * here. But NMIs are still enabled and we can take another @@ -1338,6 +1359,7 @@ repeat_nmi: .endr subq $(5*8), %rsp end_repeat_nmi: + ANNOTATE_NOENDBR // this code /* * Everything below this point can be preempted by a nested NMI. @@ -1421,6 +1443,7 @@ SYM_CODE_END(asm_exc_nmi) */ SYM_CODE_START(ignore_sysret) UNWIND_HINT_EMPTY + ENDBR mov $-ENOSYS, %eax sysretl SYM_CODE_END(ignore_sysret) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 0051cf5c792d..4fdb007cddbd 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -48,6 +48,7 @@ */ SYM_CODE_START(entry_SYSENTER_compat) UNWIND_HINT_EMPTY + ENDBR /* Interrupts are off on entry. */ SWAPGS @@ -147,6 +148,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) popfq jmp .Lsysenter_flags_fixed SYM_INNER_LABEL(__end_entry_SYSENTER_compat, SYM_L_GLOBAL) + ANNOTATE_NOENDBR // is_sysenter_singlestep SYM_CODE_END(entry_SYSENTER_compat) /* @@ -198,6 +200,7 @@ SYM_CODE_END(entry_SYSENTER_compat) */ SYM_CODE_START(entry_SYSCALL_compat) UNWIND_HINT_EMPTY + ENDBR /* Interrupts are off on entry. */ swapgs @@ -211,6 +214,7 @@ SYM_CODE_START(entry_SYSCALL_compat) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR /* Construct struct pt_regs on stack */ pushq $__USER32_DS /* pt_regs->ss */ @@ -340,6 +344,7 @@ SYM_CODE_END(entry_SYSCALL_compat) */ SYM_CODE_START(entry_INT80_compat) UNWIND_HINT_EMPTY + ENDBR /* * Interrupts are off on entry. */ diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile index 5b3efed0e4e8..7f3886eeb2ff 100644 --- a/arch/x86/entry/syscalls/Makefile +++ b/arch/x86/entry/syscalls/Makefile @@ -67,7 +67,7 @@ uapisyshdr-y += unistd_32.h unistd_64.h unistd_x32.h syshdr-y += syscalls_32.h syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h unistd_64_x32.h syshdr-$(CONFIG_X86_64) += syscalls_64.h -syshdr-$(CONFIG_X86_X32) += syscalls_x32.h +syshdr-$(CONFIG_X86_X32_ABI) += syscalls_x32.h syshdr-$(CONFIG_XEN) += xen-hypercalls.h uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y)) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index c91434056c29..a3c7ca876aeb 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4703,6 +4703,19 @@ static __initconst const struct x86_pmu intel_pmu = { .lbr_read = intel_pmu_lbr_read_64, .lbr_save = intel_pmu_lbr_save, .lbr_restore = intel_pmu_lbr_restore, + + /* + * SMM has access to all 4 rings and while traditionally SMM code only + * ran in CPL0, 2021-era firmware is starting to make use of CPL3 in SMM. + * + * Since the EVENTSEL.{USR,OS} CPL filtering makes no distinction + * between SMM or not, this results in what should be pure userspace + * counters including SMM data. + * + * This is a clear privilege issue, therefore globally disable + * counting SMM by default. + */ + .attr_freeze_on_smi = 1, }; static __init void intel_clovertown_quirk(void) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 7f406c14715f..2d33bba9a144 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -897,8 +897,9 @@ static void pt_handle_status(struct pt *pt) * means we are already losing data; need to let the decoder * know. */ - if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) || - buf->output_off == pt_buffer_region_size(buf)) { + if (!buf->single && + (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) || + buf->output_off == pt_buffer_region_size(buf))) { perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_TRUNCATED); advance++; diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 58eee6402832..9b10c8c76087 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -76,6 +76,7 @@ extern int alternatives_patched; extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); extern void apply_retpolines(s32 *start, s32 *end); +extern void apply_ibt_endbr(s32 *start, s32 *end); struct module; diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 84b87538a15d..4d20a293c6fd 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -4,6 +4,7 @@ #include <linux/stringify.h> #include <linux/instrumentation.h> +#include <linux/objtool.h> /* * Despite that some emulators terminate on UD2, we use it for WARN(). @@ -22,7 +23,7 @@ #ifdef CONFIG_DEBUG_BUGVERBOSE -#define _BUG_FLAGS(ins, flags) \ +#define _BUG_FLAGS(ins, flags, extra) \ do { \ asm_inline volatile("1:\t" ins "\n" \ ".pushsection __bug_table,\"aw\"\n" \ @@ -31,7 +32,8 @@ do { \ "\t.word %c1" "\t# bug_entry::line\n" \ "\t.word %c2" "\t# bug_entry::flags\n" \ "\t.org 2b+%c3\n" \ - ".popsection" \ + ".popsection\n" \ + extra \ : : "i" (__FILE__), "i" (__LINE__), \ "i" (flags), \ "i" (sizeof(struct bug_entry))); \ @@ -39,14 +41,15 @@ do { \ #else /* !CONFIG_DEBUG_BUGVERBOSE */ -#define _BUG_FLAGS(ins, flags) \ +#define _BUG_FLAGS(ins, flags, extra) \ do { \ asm_inline volatile("1:\t" ins "\n" \ ".pushsection __bug_table,\"aw\"\n" \ "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \ "\t.word %c0" "\t# bug_entry::flags\n" \ "\t.org 2b+%c1\n" \ - ".popsection" \ + ".popsection\n" \ + extra \ : : "i" (flags), \ "i" (sizeof(struct bug_entry))); \ } while (0) @@ -55,7 +58,7 @@ do { \ #else -#define _BUG_FLAGS(ins, flags) asm volatile(ins) +#define _BUG_FLAGS(ins, flags, extra) asm volatile(ins) #endif /* CONFIG_GENERIC_BUG */ @@ -63,8 +66,8 @@ do { \ #define BUG() \ do { \ instrumentation_begin(); \ - _BUG_FLAGS(ASM_UD2, 0); \ - unreachable(); \ + _BUG_FLAGS(ASM_UD2, 0, ""); \ + __builtin_unreachable(); \ } while (0) /* @@ -75,9 +78,9 @@ do { \ */ #define __WARN_FLAGS(flags) \ do { \ + __auto_type f = BUGFLAG_WARNING|(flags); \ instrumentation_begin(); \ - _BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags)); \ - annotate_reachable(); \ + _BUG_FLAGS(ASM_UD2, f, ASM_REACHABLE); \ instrumentation_end(); \ } while (0) diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 33d41e350c79..86e5e4e26fcb 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -7,6 +7,7 @@ #include <linux/topology.h> #include <linux/nodemask.h> #include <linux/percpu.h> +#include <asm/ibt.h> #ifdef CONFIG_SMP @@ -72,4 +73,7 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c); #else static inline void init_ia32_feat_ctl(struct cpuinfo_x86 *c) {} #endif + +extern __noendbr void cet_disable(void); + #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 5cd22090e53d..9a59e2e73bee 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -204,7 +204,7 @@ /* FREE! ( 7*32+10) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ @@ -387,6 +387,7 @@ #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ +#define X86_FEATURE_IBT (18*32+20) /* Indirect Branch Tracking */ #define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ #define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ #define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 8f28fafa98b3..1231d63f836d 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -56,8 +56,11 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif -/* Force disable because it's broken beyond repair */ -#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#ifdef CONFIG_INTEL_IOMMU_SVM +# define DISABLE_ENQCMD 0 +#else +# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#endif #ifdef CONFIG_X86_SGX # define DISABLE_SGX 0 diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 03cb12775043..98938a68251c 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -7,6 +7,7 @@ #include <asm/tlb.h> #include <asm/nospec-branch.h> #include <asm/mmu_context.h> +#include <asm/ibt.h> #include <linux/build_bug.h> #include <linux/kernel.h> #include <linux/pgtable.h> @@ -120,8 +121,12 @@ extern asmlinkage u64 __efi_call(void *fp, ...); efi_enter_mm(); \ }) -#define arch_efi_call_virt(p, f, args...) \ - efi_call((void *)p->f, args) \ +#define arch_efi_call_virt(p, f, args...) ({ \ + u64 ret, ibt = ibt_save(); \ + ret = efi_call((void *)p->f, args); \ + ibt_restore(ibt); \ + ret; \ +}) #define arch_efi_call_virt_teardown() \ ({ \ diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h new file mode 100644 index 000000000000..689880eca9ba --- /dev/null +++ b/arch/x86/include/asm/ibt.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IBT_H +#define _ASM_X86_IBT_H + +#include <linux/types.h> + +/* + * The rules for enabling IBT are: + * + * - CC_HAS_IBT: the toolchain supports it + * - X86_KERNEL_IBT: it is selected in Kconfig + * - !__DISABLE_EXPORTS: this is regular kernel code + * + * Esp. that latter one is a bit non-obvious, but some code like compressed, + * purgatory, realmode etc.. is built with custom CFLAGS that do not include + * -fcf-protection=branch and things will go *bang*. + * + * When all the above are satisfied, HAS_KERNEL_IBT will be 1, otherwise 0. + */ +#if defined(CONFIG_X86_KERNEL_IBT) && !defined(__DISABLE_EXPORTS) + +#define HAS_KERNEL_IBT 1 + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_X86_64 +#define ASM_ENDBR "endbr64\n\t" +#else +#define ASM_ENDBR "endbr32\n\t" +#endif + +#define __noendbr __attribute__((nocf_check)) + +static inline __attribute_const__ u32 gen_endbr(void) +{ + u32 endbr; + + /* + * Generate ENDBR64 in a way that is sure to not result in + * an ENDBR64 instruction as immediate. + */ + asm ( "mov $~0xfa1e0ff3, %[endbr]\n\t" + "not %[endbr]\n\t" + : [endbr] "=&r" (endbr) ); + + return endbr; +} + +static inline __attribute_const__ u32 gen_endbr_poison(void) +{ + /* + * 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it + * will be unique to (former) ENDBR sites. + */ + return 0x001f0f66; /* osp nopl (%rax) */ +} + +static inline bool is_endbr(u32 val) +{ + if (val == gen_endbr_poison()) + return true; + + val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */ + return val == gen_endbr(); +} + +extern __noendbr u64 ibt_save(void); +extern __noendbr void ibt_restore(u64 save); + +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_X86_64 +#define ENDBR endbr64 +#else +#define ENDBR endbr32 +#endif + +#endif /* __ASSEMBLY__ */ + +#else /* !IBT */ + +#define HAS_KERNEL_IBT 0 + +#ifndef __ASSEMBLY__ + +#define ASM_ENDBR + +#define __noendbr + +static inline bool is_endbr(u32 val) { return false; } + +static inline u64 ibt_save(void) { return 0; } +static inline void ibt_restore(u64 save) { } + +#else /* __ASSEMBLY__ */ + +#define ENDBR + +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_X86_KERNEL_IBT */ + +#define ENDBR_INSN_SIZE (4*HAS_KERNEL_IBT) + +#endif /* _ASM_X86_IBT_H */ diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 1345088e9902..7924f27f5c8b 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -5,6 +5,8 @@ /* Interrupts/Exceptions */ #include <asm/trapnr.h> +#define IDT_ALIGN (8 * (1 + HAS_KERNEL_IBT)) + #ifndef __ASSEMBLY__ #include <linux/entry-common.h> #include <linux/hardirq.h> @@ -480,7 +482,7 @@ __visible noinstr void func(struct pt_regs *regs, \ /* * ASM code to emit the common vector entry stubs where each stub is - * packed into 8 bytes. + * packed into IDT_ALIGN bytes. * * Note, that the 'pushq imm8' is emitted via '.byte 0x6a, vector' because * GCC treats the local vector variable as unsigned int and would expand @@ -492,33 +494,33 @@ __visible noinstr void func(struct pt_regs *regs, \ * point is to mask off the bits above bit 7 because the push is sign * extending. */ - .align 8 + .align IDT_ALIGN SYM_CODE_START(irq_entries_start) vector=FIRST_EXTERNAL_VECTOR .rept NR_EXTERNAL_VECTORS UNWIND_HINT_IRET_REGS 0 : + ENDBR .byte 0x6a, vector jmp asm_common_interrupt - nop - /* Ensure that the above is 8 bytes max */ - . = 0b + 8 + /* Ensure that the above is IDT_ALIGN bytes max */ + .fill 0b + IDT_ALIGN - ., 1, 0xcc vector = vector+1 .endr SYM_CODE_END(irq_entries_start) #ifdef CONFIG_X86_LOCAL_APIC - .align 8 + .align IDT_ALIGN SYM_CODE_START(spurious_entries_start) vector=FIRST_SYSTEM_VECTOR .rept NR_SYSTEM_VECTORS UNWIND_HINT_IRET_REGS 0 : + ENDBR .byte 0x6a, vector jmp asm_spurious_interrupt - nop - /* Ensure that the above is 8 bytes max */ - . = 0b + 8 + /* Ensure that the above is IDT_ALIGN bytes max */ + .fill 0b + IDT_ALIGN - ., 1, 0xcc vector = vector+1 .endr SYM_CODE_END(spurious_entries_start) @@ -615,6 +617,11 @@ DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault); DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_DF, xenpv_exc_double_fault); #endif +/* #CP */ +#ifdef CONFIG_X86_KERNEL_IBT +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_CP, exc_control_protection); +#endif + /* #VC */ #ifdef CONFIG_AMD_MEM_ENCRYPT DECLARE_IDTENTRY_VC(X86_TRAP_VC, exc_vmm_communication); diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h index ae9d40f6c706..63f818aedf77 100644 --- a/arch/x86/include/asm/irq_stack.h +++ b/arch/x86/include/asm/irq_stack.h @@ -3,6 +3,7 @@ #define _ASM_X86_IRQ_STACK_H #include <linux/ptrace.h> +#include <linux/objtool.h> #include <asm/processor.h> @@ -99,7 +100,8 @@ } #define ASM_CALL_ARG0 \ - "call %P[__func] \n" + "call %P[__func] \n" \ + ASM_REACHABLE #define ASM_CALL_ARG1 \ "movq %[arg1], %%rdi \n" \ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 87761396e8cc..111104d1c2cd 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -141,13 +141,8 @@ static __always_inline void arch_local_irq_restore(unsigned long flags) #ifdef CONFIG_X86_64 #ifdef CONFIG_XEN_PV #define SWAPGS ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV -#define INTERRUPT_RETURN \ - ANNOTATE_RETPOLINE_SAFE; \ - ALTERNATIVE_TERNARY("jmp *paravirt_iret(%rip);", \ - X86_FEATURE_XENPV, "jmp xen_iret;", "jmp native_iret;") #else #define SWAPGS swapgs -#define INTERRUPT_RETURN jmp native_iret #endif #endif #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 631d5040b31e..d39e0de06be2 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -82,7 +82,7 @@ KVM_X86_OP_NULL(guest_apic_has_interrupt) KVM_X86_OP(load_eoi_exitmap) KVM_X86_OP(set_virtual_apic_mode) KVM_X86_OP_NULL(set_apic_access_page_addr) -KVM_X86_OP(deliver_posted_interrupt) +KVM_X86_OP(deliver_interrupt) KVM_X86_OP_NULL(sync_pir_to_irr) KVM_X86_OP(set_tss_addr) KVM_X86_OP(set_identity_map_addr) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6e7c545bc7ee..ec9830d2aabf 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -703,7 +703,6 @@ struct kvm_vcpu_arch { struct fpu_guest guest_fpu; u64 xcr0; - u64 guest_supported_xcr0; struct kvm_pio_request pio; void *pio_data; @@ -1410,7 +1409,8 @@ struct kvm_x86_ops { void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu); - int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); + void (*deliver_interrupt)(struct kvm_lapic *apic, int delivery_mode, + int trig_mode, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr); diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 030907922bd0..85865f1645bd 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -3,6 +3,7 @@ #define _ASM_X86_LINKAGE_H #include <linux/stringify.h> +#include <asm/ibt.h> #undef notrace #define notrace __attribute__((no_instrument_function)) @@ -34,5 +35,35 @@ #endif /* __ASSEMBLY__ */ +/* SYM_FUNC_START -- use for global functions */ +#define SYM_FUNC_START(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ + ENDBR + +/* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */ +#define SYM_FUNC_START_NOALIGN(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) \ + ENDBR + +/* SYM_FUNC_START_LOCAL -- use for local functions */ +#define SYM_FUNC_START_LOCAL(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \ + ENDBR + +/* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */ +#define SYM_FUNC_START_LOCAL_NOALIGN(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) \ + ENDBR + +/* SYM_FUNC_START_WEAK -- use for weak functions */ +#define SYM_FUNC_START_WEAK(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) \ + ENDBR + +/* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */ +#define SYM_FUNC_START_WEAK_NOALIGN(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ + ENDBR + #endif /* _ASM_X86_LINKAGE_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3faf0f97edb1..65c3599b8f05 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -360,11 +360,29 @@ #define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c #define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d - #define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 #define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 #define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 +/* Control-flow Enforcement Technology MSRs */ +#define MSR_IA32_U_CET 0x000006a0 /* user mode cet */ +#define MSR_IA32_S_CET 0x000006a2 /* kernel mode cet */ +#define CET_SHSTK_EN BIT_ULL(0) +#define CET_WRSS_EN BIT_ULL(1) +#define CET_ENDBR_EN BIT_ULL(2) +#define CET_LEG_IW_EN BIT_ULL(3) +#define CET_NO_TRACK_EN BIT_ULL(4) +#define CET_SUPPRESS_DISABLE BIT_ULL(5) +#define CET_RESERVED (BIT_ULL(6) | BIT_ULL(7) | BIT_ULL(8) | BIT_ULL(9)) +#define CET_SUPPRESS BIT_ULL(10) +#define CET_WAIT_ENDBR BIT_ULL(11) + +#define MSR_IA32_PL0_SSP 0x000006a4 /* ring-0 shadow stack pointer */ +#define MSR_IA32_PL1_SSP 0x000006a5 /* ring-1 shadow stack pointer */ +#define MSR_IA32_PL2_SSP 0x000006a6 /* ring-2 shadow stack pointer */ +#define MSR_IA32_PL3_SSP 0x000006a7 /* ring-3 shadow stack pointer */ +#define MSR_IA32_INT_SSP_TAB 0x000006a8 /* exception shadow stack table */ + /* Hardware P state interface */ #define MSR_PPERF 0x0000064e #define MSR_PERF_LIMIT_REASONS 0x0000064f @@ -476,6 +494,7 @@ #define MSR_AMD64_ICIBSEXTDCTL 0xc001103c #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b #define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index cc74dc584836..acbaeaf83b61 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -84,7 +84,7 @@ #ifdef CONFIG_RETPOLINE ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ __stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE #else jmp *%\reg #endif @@ -94,7 +94,7 @@ #ifdef CONFIG_RETPOLINE ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \ __stringify(call __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_LFENCE #else call *%\reg #endif @@ -146,7 +146,7 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[]; "lfence;\n" \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ - X86_FEATURE_RETPOLINE_AMD) + X86_FEATURE_RETPOLINE_LFENCE) # define THUNK_TARGET(addr) [thunk_target] "r" (addr) @@ -176,7 +176,7 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[]; "lfence;\n" \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ - X86_FEATURE_RETPOLINE_AMD) + X86_FEATURE_RETPOLINE_LFENCE) # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) #endif @@ -188,9 +188,11 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[]; /* The Spectre V2 mitigation variants */ enum spectre_v2_mitigation { SPECTRE_V2_NONE, - SPECTRE_V2_RETPOLINE_GENERIC, - SPECTRE_V2_RETPOLINE_AMD, - SPECTRE_V2_IBRS_ENHANCED, + SPECTRE_V2_RETPOLINE, + SPECTRE_V2_LFENCE, + SPECTRE_V2_EIBRS, + SPECTRE_V2_EIBRS_RETPOLINE, + SPECTRE_V2_EIBRS_LFENCE, }; /* The indirect branch speculation control variants */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 0d76502cc6f5..964442b99245 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -666,6 +666,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); ".globl " PV_THUNK_NAME(func) ";" \ ".type " PV_THUNK_NAME(func) ", @function;" \ PV_THUNK_NAME(func) ":" \ + ASM_ENDBR \ FRAME_BEGIN \ PV_SAVE_ALL_CALLER_REGS \ "call " #func ";" \ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index a69012e1903f..7cd2874628a0 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -272,7 +272,6 @@ struct paravirt_patch_template { extern struct pv_info pv_info; extern struct paravirt_patch_template pv_ops; -extern void (*paravirt_iret)(void); #define PARAVIRT_PATCH(x) \ (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a87e7c33d5ac..91d0f93a00c7 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -837,7 +837,7 @@ bool xen_set_default_idle(void); #define xen_set_default_idle 0 #endif -void stop_this_cpu(void *dummy); +void __noreturn stop_this_cpu(void *dummy); void microcode_check(void); enum l1tf_mitigations { diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 1474cf96251d..892fd8c3a6f7 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -2,6 +2,8 @@ #ifndef __ASM_QSPINLOCK_PARAVIRT_H #define __ASM_QSPINLOCK_PARAVIRT_H +#include <asm/ibt.h> + /* * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit * registers. For i386, however, only 1 32-bit register needs to be saved @@ -39,6 +41,7 @@ asm (".pushsection .text;" ".type " PV_UNLOCK ", @function;" ".align 4,0x90;" PV_UNLOCK ": " + ASM_ENDBR FRAME_BEGIN "push %rdx;" "mov $0x1,%eax;" diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index b228c9d44ee7..656ed6531d03 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -4,6 +4,7 @@ #include <linux/const.h> #include <asm/alternative.h> +#include <asm/ibt.h> /* * Constructor for a conventional segment GDT (or LDT) entry. @@ -275,7 +276,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) * vector has no error code (two bytes), a 'push $vector_number' (two * bytes), and a jump to the common entry code (up to five bytes). */ -#define EARLY_IDT_HANDLER_SIZE 9 +#define EARLY_IDT_HANDLER_SIZE (9 + ENDBR_INSN_SIZE) /* * xen_early_idt_handler_array is for Xen pv guests: for each entry in @@ -283,7 +284,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) * pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to * max 8 bytes. */ -#define XEN_EARLY_IDT_HANDLER_SIZE 8 +#define XEN_EARLY_IDT_HANDLER_SIZE (8 + ENDBR_INSN_SIZE) #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index a12458a7a8d4..896e48d45828 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -8,6 +8,7 @@ #include <linux/linkage.h> #include <asm/page_types.h> +#include <asm/ibt.h> #ifdef __i386__ @@ -119,7 +120,7 @@ void *extend_brk(size_t size, size_t align); * executable.) */ #define RESERVE_BRK(name,sz) \ - static void __section(".discard.text") __used notrace \ + static void __section(".discard.text") __noendbr __used notrace \ __brk_reservation_fn_##name##__(void) { \ asm volatile ( \ ".pushsection .brk_reservation,\"aw\",@nobits;" \ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index b00dbc5fac2b..bb2fb78523ce 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -220,6 +220,42 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define SVM_NESTED_CTL_SEV_ENABLE BIT(1) #define SVM_NESTED_CTL_SEV_ES_ENABLE BIT(2) + +/* AVIC */ +#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) +#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 +#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) + +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL) +#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) +#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) +#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) +#define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK (0xFF) + +#define AVIC_DOORBELL_PHYSICAL_ID_MASK (0xFF) + +#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1 +#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0 +#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF + +enum avic_ipi_failure_cause { + AVIC_IPI_FAILURE_INVALID_INT_TYPE, + AVIC_IPI_FAILURE_TARGET_NOT_RUNNING, + AVIC_IPI_FAILURE_INVALID_TARGET, + AVIC_IPI_FAILURE_INVALID_BACKING_PAGE, +}; + + +/* + * 0xff is broadcast, so the max index allowed for physical APIC ID + * table is 0xfe. APIC IDs above 0xff are reserved. + */ +#define AVIC_MAX_PHYSICAL_ID_COUNT 0xff + +#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF) +#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL + + struct vmcb_seg { u16 selector; u16 attrib; diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index 6a2827d0681f..59358d1bf880 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -159,7 +159,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #endif /* CONFIG_IA32_EMULATION */ -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI /* * For the x32 ABI, we need to create a stub for compat_sys_*() which is aware * of the x86-64-style parameter ordering of x32 syscalls. The syscalls common @@ -177,12 +177,12 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #define __X32_COMPAT_SYS_NI(name) \ __SYS_NI(x64, compat_sys_##name) -#else /* CONFIG_X86_X32 */ +#else /* CONFIG_X86_X32_ABI */ #define __X32_COMPAT_SYS_STUB0(name) #define __X32_COMPAT_SYS_STUBx(x, name, ...) #define __X32_COMPAT_COND_SYSCALL(name) #define __X32_COMPAT_SYS_NI(name) -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ #ifdef CONFIG_COMPAT diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index b7421780e4e9..c6015b407461 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -96,24 +96,40 @@ union text_poke_insn { }; static __always_inline -void *text_gen_insn(u8 opcode, const void *addr, const void *dest) +void __text_gen_insn(void *buf, u8 opcode, const void *addr, const void *dest, int size) { - static union text_poke_insn insn; /* per instance */ - int size = text_opcode_size(opcode); + union text_poke_insn *insn = buf; + + BUG_ON(size < text_opcode_size(opcode)); + + /* + * Hide the addresses to avoid the compiler folding in constants when + * referencing code, these can mess up annotations like + * ANNOTATE_NOENDBR. + */ + OPTIMIZER_HIDE_VAR(insn); + OPTIMIZER_HIDE_VAR(addr); + OPTIMIZER_HIDE_VAR(dest); - insn.opcode = opcode; + insn->opcode = opcode; if (size > 1) { - insn.disp = (long)dest - (long)(addr + size); + insn->disp = (long)dest - (long)(addr + size); if (size == 2) { /* - * Ensure that for JMP9 the displacement + * Ensure that for JMP8 the displacement * actually fits the signed byte. */ - BUG_ON((insn.disp >> 31) != (insn.disp >> 7)); + BUG_ON((insn->disp >> 31) != (insn->disp >> 7)); } } +} +static __always_inline +void *text_gen_insn(u8 opcode, const void *addr, const void *dest) +{ + static union text_poke_insn insn; /* per instance */ + __text_gen_insn(&insn, opcode, addr, dest, text_opcode_size(opcode)); return &insn.text; } diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 6221be7cafc3..35317c5c551d 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -18,6 +18,8 @@ void __init trap_init(void); asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs); #endif +extern bool ibt_selftest(void); + #ifdef CONFIG_X86_F00F_BUG /* For handling the FOOF bug */ void handle_invalid_op(struct pt_regs *regs); diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 98aa103eb4ab..2963a2f5dbc4 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -37,7 +37,7 @@ struct vdso_image { extern const struct vdso_image vdso_image_64; #endif -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI extern const struct vdso_image vdso_image_x32; #endif diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h index a9630104f1c4..78e667a31d6c 100644 --- a/arch/x86/include/asm/xen/cpuid.h +++ b/arch/x86/include/asm/xen/cpuid.h @@ -100,6 +100,13 @@ /* Memory mapped from other domains has valid IOMMU entries */ #define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2) #define XEN_HVM_CPUID_VCPU_ID_PRESENT (1u << 3) /* vcpu id is present in EBX */ +#define XEN_HVM_CPUID_DOMID_PRESENT (1u << 4) /* domid is present in ECX */ +/* + * Bits 55:49 from the IO-APIC RTE and bits 11:5 from the MSI address can be + * used to store high bits for the Destination ID. This expands the Destination + * ID field from 8 to 15 bits, allowing to target APIC IDs up 32768. + */ +#define XEN_HVM_CPUID_EXT_DEST_ID (1u << 5) /* * Leaf 6 (0x40000x05) diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 1bf2ad34188a..16f548a661cf 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -43,20 +43,6 @@ static inline uint32_t xen_cpuid_base(void) return hypervisor_cpuid_base("XenVMMXenVMM", 2); } -#ifdef CONFIG_XEN -extern bool __init xen_hvm_need_lapic(void); - -static inline bool __init xen_x2apic_para_available(void) -{ - return xen_hvm_need_lapic(); -} -#else -static inline bool __init xen_x2apic_para_available(void) -{ - return (xen_cpuid_base() != 0); -} -#endif - struct pci_dev; #ifdef CONFIG_XEN_PV_DOM0 diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index bcba3c643e63..c47cc7f2feeb 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h @@ -130,6 +130,8 @@ #define X86_CR4_SMAP _BITUL(X86_CR4_SMAP_BIT) #define X86_CR4_PKE_BIT 22 /* enable Protection Keys support */ #define X86_CR4_PKE _BITUL(X86_CR4_PKE_BIT) +#define X86_CR4_CET_BIT 23 /* enable Control-flow Enforcement Technology */ +#define X86_CR4_CET _BITUL(X86_CR4_CET_BIT) /* * x86-64 Task Priority Register, CR8 diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 5007c3ffe96f..a79196fd364f 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -115,6 +115,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) } extern s32 __retpoline_sites[], __retpoline_sites_end[]; +extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[]; extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; void text_poke_early(void *addr, const void *opcode, size_t len); @@ -389,7 +390,7 @@ static int emit_indirect(int op, int reg, u8 *bytes) * * CALL *%\reg * - * It also tries to inline spectre_v2=retpoline,amd when size permits. + * It also tries to inline spectre_v2=retpoline,lfence when size permits. */ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) { @@ -407,7 +408,7 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) BUG_ON(reg == 4); if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && - !cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) + !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) return -1; op = insn->opcode.bytes[0]; @@ -438,9 +439,9 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) } /* - * For RETPOLINE_AMD: prepend the indirect CALL/JMP with an LFENCE. + * For RETPOLINE_LFENCE: prepend the indirect CALL/JMP with an LFENCE. */ - if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) { + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { bytes[i++] = 0x0f; bytes[i++] = 0xae; bytes[i++] = 0xe8; /* LFENCE */ @@ -512,6 +513,42 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } #endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */ +#ifdef CONFIG_X86_KERNEL_IBT + +/* + * Generated by: objtool --ibt + */ +void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + u32 endbr, poison = gen_endbr_poison(); + void *addr = (void *)s + *s; + + if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr))) + continue; + + if (WARN_ON_ONCE(!is_endbr(endbr))) + continue; + + DPRINTK("ENDBR at: %pS (%px)", addr, addr); + + /* + * When we have IBT, the lack of ENDBR will trigger #CP + */ + DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr); + DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr); + text_poke_early(addr, &poison, 4); + } +} + +#else + +void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) { } + +#endif /* CONFIG_X86_KERNEL_IBT */ + #ifdef CONFIG_SMP static void alternatives_smp_lock(const s32 *start, const s32 *end, u8 *text, u8 *text_end) @@ -713,34 +750,39 @@ asm ( " .pushsection .init.text, \"ax\", @progbits\n" " .type int3_magic, @function\n" "int3_magic:\n" + ANNOTATE_NOENDBR " movl $1, (%" _ASM_ARG1 ")\n" ASM_RET " .size int3_magic, .-int3_magic\n" " .popsection\n" ); -extern __initdata unsigned long int3_selftest_ip; /* defined in asm below */ +extern void int3_selftest_ip(void); /* defined in asm below */ static int __init int3_exception_notify(struct notifier_block *self, unsigned long val, void *data) { + unsigned long selftest = (unsigned long)&int3_selftest_ip; struct die_args *args = data; struct pt_regs *regs = args->regs; + OPTIMIZER_HIDE_VAR(selftest); + if (!regs || user_mode(regs)) return NOTIFY_DONE; if (val != DIE_INT3) return NOTIFY_DONE; - if (regs->ip - INT3_INSN_SIZE != int3_selftest_ip) + if (regs->ip - INT3_INSN_SIZE != selftest) return NOTIFY_DONE; int3_emulate_call(regs, (unsigned long)&int3_magic); return NOTIFY_STOP; } -static void __init int3_selftest(void) +/* Must be noinline to ensure uniqueness of int3_selftest_ip. */ +static noinline void __init int3_selftest(void) { static __initdata struct notifier_block int3_exception_nb = { .notifier_call = int3_exception_notify, @@ -753,18 +795,12 @@ static void __init int3_selftest(void) /* * Basically: int3_magic(&val); but really complicated :-) * - * Stick the address of the INT3 instruction into int3_selftest_ip, - * then trigger the INT3, padded with NOPs to match a CALL instruction - * length. + * INT3 padded with NOP to CALL_INSN_SIZE. The int3_exception_nb + * notifier above will emulate CALL for us. */ - asm volatile ("1: int3; nop; nop; nop; nop\n\t" - ".pushsection .init.data,\"aw\"\n\t" - ".align " __ASM_SEL(4, 8) "\n\t" - ".type int3_selftest_ip, @object\n\t" - ".size int3_selftest_ip, " __ASM_SEL(4, 8) "\n\t" - "int3_selftest_ip:\n\t" - __ASM_SEL(.long, .quad) " 1b\n\t" - ".popsection\n\t" + asm volatile ("int3_selftest_ip:\n\t" + ANNOTATE_NOENDBR + " int3; nop; nop; nop; nop\n\t" : ASM_CALL_CONSTRAINT : __ASM_SEL_RAW(a, D) (&val) : "memory"); @@ -831,6 +867,8 @@ void __init alternative_instructions(void) */ apply_alternatives(__alt_instructions, __alt_instructions_end); + apply_ibt_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); + #ifdef CONFIG_SMP /* Patch to UP if other cpus not imminent. */ if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) { diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 241dda687eb9..60e330cdbd17 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -232,6 +232,7 @@ #include <asm/paravirt.h> #include <asm/reboot.h> #include <asm/nospec-branch.h> +#include <asm/ibt.h> #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) extern int (*console_blank_hook)(int); @@ -598,6 +599,7 @@ static long __apm_bios_call(void *_call) struct desc_struct save_desc_40; struct desc_struct *gdt; struct apm_bios_call *call = _call; + u64 ibt; cpu = get_cpu(); BUG_ON(cpu != 0); @@ -607,11 +609,13 @@ static long __apm_bios_call(void *_call) apm_irq_save(flags); firmware_restrict_branch_speculation_start(); + ibt = ibt_save(); APM_DO_SAVE_SEGS; apm_bios_call_asm(call->func, call->ebx, call->ecx, &call->eax, &call->ebx, &call->ecx, &call->edx, &call->esi); APM_DO_RESTORE_SEGS; + ibt_restore(ibt); firmware_restrict_branch_speculation_end(); apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; @@ -676,6 +680,7 @@ static long __apm_bios_call_simple(void *_call) struct desc_struct save_desc_40; struct desc_struct *gdt; struct apm_bios_call *call = _call; + u64 ibt; cpu = get_cpu(); BUG_ON(cpu != 0); @@ -685,10 +690,12 @@ static long __apm_bios_call_simple(void *_call) apm_irq_save(flags); firmware_restrict_branch_speculation_start(); + ibt = ibt_save(); APM_DO_SAVE_SEGS; error = apm_bios_call_simple_asm(call->func, call->ebx, call->ecx, &call->eax); APM_DO_RESTORE_SEGS; + ibt_restore(ibt); firmware_restrict_branch_speculation_end(); apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1c1f218a701d..6296e1ebed1d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -16,6 +16,7 @@ #include <linux/prctl.h> #include <linux/sched/smt.h> #include <linux/pgtable.h> +#include <linux/bpf.h> #include <asm/spec-ctrl.h> #include <asm/cmdline.h> @@ -650,6 +651,32 @@ static inline const char *spectre_v2_module_string(void) static inline const char *spectre_v2_module_string(void) { return ""; } #endif +#define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n" +#define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n" +#define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n" + +#ifdef CONFIG_BPF_SYSCALL +void unpriv_ebpf_notify(int new_state) +{ + if (new_state) + return; + + /* Unprivileged eBPF is enabled */ + + switch (spectre_v2_enabled) { + case SPECTRE_V2_EIBRS: + pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); + break; + case SPECTRE_V2_EIBRS_LFENCE: + if (sched_smt_active()) + pr_err(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG); + break; + default: + break; + } +} +#endif + static inline bool match_option(const char *arg, int arglen, const char *opt) { int len = strlen(opt); @@ -664,7 +691,10 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_FORCE, SPECTRE_V2_CMD_RETPOLINE, SPECTRE_V2_CMD_RETPOLINE_GENERIC, - SPECTRE_V2_CMD_RETPOLINE_AMD, + SPECTRE_V2_CMD_RETPOLINE_LFENCE, + SPECTRE_V2_CMD_EIBRS, + SPECTRE_V2_CMD_EIBRS_RETPOLINE, + SPECTRE_V2_CMD_EIBRS_LFENCE, }; enum spectre_v2_user_cmd { @@ -737,6 +767,13 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) return SPECTRE_V2_USER_CMD_AUTO; } +static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) +{ + return (mode == SPECTRE_V2_EIBRS || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_EIBRS_LFENCE); +} + static void __init spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) { @@ -804,7 +841,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) */ if (!boot_cpu_has(X86_FEATURE_STIBP) || !smt_possible || - spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + spectre_v2_in_eibrs_mode(spectre_v2_enabled)) return; /* @@ -824,9 +861,11 @@ set_mode: static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", - [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", - [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", - [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", + [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines", + [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE", + [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", + [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", + [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", }; static const struct { @@ -837,8 +876,12 @@ static const struct { { "off", SPECTRE_V2_CMD_NONE, false }, { "on", SPECTRE_V2_CMD_FORCE, true }, { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, - { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false }, + { "retpoline,lfence", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false }, { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, + { "eibrs", SPECTRE_V2_CMD_EIBRS, false }, + { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false }, + { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false }, { "auto", SPECTRE_V2_CMD_AUTO, false }, }; @@ -875,10 +918,30 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) } if ((cmd == SPECTRE_V2_CMD_RETPOLINE || - cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || - cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && + cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE || + cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC || + cmd == SPECTRE_V2_CMD_EIBRS_LFENCE || + cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) && !IS_ENABLED(CONFIG_RETPOLINE)) { - pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option); + pr_err("%s selected but not compiled in. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if ((cmd == SPECTRE_V2_CMD_EIBRS || + cmd == SPECTRE_V2_CMD_EIBRS_LFENCE || + cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) && + !boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) { + pr_err("%s selected but CPU doesn't have eIBRS. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if ((cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE || + cmd == SPECTRE_V2_CMD_EIBRS_LFENCE) && + !boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { + pr_err("%s selected, but CPU doesn't have a serializing LFENCE. Switching to AUTO select\n", + mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } @@ -887,6 +950,16 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return cmd; } +static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) +{ + if (!IS_ENABLED(CONFIG_RETPOLINE)) { + pr_err("Kernel not compiled with retpoline; no mitigation available!"); + return SPECTRE_V2_NONE; + } + + return SPECTRE_V2_RETPOLINE; +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -907,49 +980,64 @@ static void __init spectre_v2_select_mitigation(void) case SPECTRE_V2_CMD_FORCE: case SPECTRE_V2_CMD_AUTO: if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) { - mode = SPECTRE_V2_IBRS_ENHANCED; - /* Force it so VMEXIT will restore correctly */ - x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - goto specv2_set_mode; + mode = SPECTRE_V2_EIBRS; + break; } - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_auto; + + mode = spectre_v2_select_retpoline(); break; - case SPECTRE_V2_CMD_RETPOLINE_AMD: - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_amd; + + case SPECTRE_V2_CMD_RETPOLINE_LFENCE: + pr_err(SPECTRE_V2_LFENCE_MSG); + mode = SPECTRE_V2_LFENCE; break; + case SPECTRE_V2_CMD_RETPOLINE_GENERIC: - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_generic; + mode = SPECTRE_V2_RETPOLINE; break; + case SPECTRE_V2_CMD_RETPOLINE: - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_auto; + mode = spectre_v2_select_retpoline(); + break; + + case SPECTRE_V2_CMD_EIBRS: + mode = SPECTRE_V2_EIBRS; + break; + + case SPECTRE_V2_CMD_EIBRS_LFENCE: + mode = SPECTRE_V2_EIBRS_LFENCE; + break; + + case SPECTRE_V2_CMD_EIBRS_RETPOLINE: + mode = SPECTRE_V2_EIBRS_RETPOLINE; break; } - pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!"); - return; -retpoline_auto: - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { - retpoline_amd: - if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { - pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); - goto retpoline_generic; - } - mode = SPECTRE_V2_RETPOLINE_AMD; - setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); - setup_force_cpu_cap(X86_FEATURE_RETPOLINE); - } else { - retpoline_generic: - mode = SPECTRE_V2_RETPOLINE_GENERIC; + if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) + pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); + + if (spectre_v2_in_eibrs_mode(mode)) { + /* Force it so VMEXIT will restore correctly */ + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + } + + switch (mode) { + case SPECTRE_V2_NONE: + case SPECTRE_V2_EIBRS: + break; + + case SPECTRE_V2_LFENCE: + case SPECTRE_V2_EIBRS_LFENCE: + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE); + fallthrough; + + case SPECTRE_V2_RETPOLINE: + case SPECTRE_V2_EIBRS_RETPOLINE: setup_force_cpu_cap(X86_FEATURE_RETPOLINE); + break; } -specv2_set_mode: spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); @@ -975,7 +1063,7 @@ specv2_set_mode: * the CPU supports Enhanced IBRS, kernel might un-intentionally not * enable IBRS around firmware calls. */ - if (boot_cpu_has(X86_FEATURE_IBRS) && mode != SPECTRE_V2_IBRS_ENHANCED) { + if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) { setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } @@ -1045,6 +1133,10 @@ void cpu_bugs_smt_update(void) { mutex_lock(&spec_ctrl_mutex); + if (sched_smt_active() && unprivileged_ebpf_enabled() && + spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) + pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG); + switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: break; @@ -1684,7 +1776,7 @@ static ssize_t tsx_async_abort_show_state(char *buf) static char *stibp_state(void) { - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) return ""; switch (spectre_v2_user_stibp) { @@ -1714,6 +1806,27 @@ static char *ibpb_state(void) return ""; } +static ssize_t spectre_v2_show_state(char *buf) +{ + if (spectre_v2_enabled == SPECTRE_V2_LFENCE) + return sprintf(buf, "Vulnerable: LFENCE\n"); + + if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) + return sprintf(buf, "Vulnerable: eIBRS with unprivileged eBPF\n"); + + if (sched_smt_active() && unprivileged_ebpf_enabled() && + spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) + return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); + + return sprintf(buf, "%s%s%s%s%s%s\n", + spectre_v2_strings[spectre_v2_enabled], + ibpb_state(), + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + stibp_state(), + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + spectre_v2_module_string()); +} + static ssize_t srbds_show_state(char *buf) { return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); @@ -1739,12 +1852,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]); case X86_BUG_SPECTRE_V2: - return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - ibpb_state(), - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - stibp_state(), - boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", - spectre_v2_module_string()); + return spectre_v2_show_state(buf); case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 64deb7727d00..ed4417500700 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -59,6 +59,7 @@ #include <asm/cpu_device_id.h> #include <asm/uv/uv.h> #include <asm/sigframe.h> +#include <asm/traps.h> #include "cpu.h" @@ -438,7 +439,8 @@ out: /* These bits should not change their value after CPU init is finished. */ static const unsigned long cr4_pinned_mask = - X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE; + X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | + X86_CR4_FSGSBASE | X86_CR4_CET; static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); static unsigned long cr4_pinned_bits __ro_after_init; @@ -592,6 +594,58 @@ static __init int setup_disable_pku(char *arg) __setup("nopku", setup_disable_pku); #endif /* CONFIG_X86_64 */ +#ifdef CONFIG_X86_KERNEL_IBT + +__noendbr u64 ibt_save(void) +{ + u64 msr = 0; + + if (cpu_feature_enabled(X86_FEATURE_IBT)) { + rdmsrl(MSR_IA32_S_CET, msr); + wrmsrl(MSR_IA32_S_CET, msr & ~CET_ENDBR_EN); + } + + return msr; +} + +__noendbr void ibt_restore(u64 save) +{ + u64 msr; + + if (cpu_feature_enabled(X86_FEATURE_IBT)) { + rdmsrl(MSR_IA32_S_CET, msr); + msr &= ~CET_ENDBR_EN; + msr |= (save & CET_ENDBR_EN); + wrmsrl(MSR_IA32_S_CET, msr); + } +} + +#endif + +static __always_inline void setup_cet(struct cpuinfo_x86 *c) +{ + u64 msr = CET_ENDBR_EN; + + if (!HAS_KERNEL_IBT || + !cpu_feature_enabled(X86_FEATURE_IBT)) + return; + + wrmsrl(MSR_IA32_S_CET, msr); + cr4_set_bits(X86_CR4_CET); + + if (!ibt_selftest()) { + pr_err("IBT selftest: Failed!\n"); + setup_clear_cpu_cap(X86_FEATURE_IBT); + return; + } +} + +__noendbr void cet_disable(void) +{ + if (cpu_feature_enabled(X86_FEATURE_IBT)) + wrmsrl(MSR_IA32_S_CET, 0); +} + /* * Some CPU features depend on higher CPUID levels, which may not always * be available due to CPUID level capping or broken virtualization @@ -1709,6 +1763,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) x86_init_rdrand(c); setup_pku(c); + setup_cet(c); /* * Clear/Set all flags overridden by options, need do it @@ -1777,6 +1832,8 @@ void enable_sep_cpu(void) void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); + if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT)) + pr_info("CET detected: Indirect Branch Tracking enabled\n"); #ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index 001808e3901c..7c63a1911fae 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -13,6 +13,30 @@ #include "sgx.h" /* + * Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's + * follow right after the EPC data in the backing storage. In addition to the + * visible enclave pages, there's one extra page slot for SECS, before PCMD + * structs. + */ +static inline pgoff_t sgx_encl_get_backing_page_pcmd_offset(struct sgx_encl *encl, + unsigned long page_index) +{ + pgoff_t epc_end_off = encl->size + sizeof(struct sgx_secs); + + return epc_end_off + page_index * sizeof(struct sgx_pcmd); +} + +/* + * Free a page from the backing storage in the given page index. + */ +static inline void sgx_encl_truncate_backing_page(struct sgx_encl *encl, unsigned long page_index) +{ + struct inode *inode = file_inode(encl->backing); + + shmem_truncate_range(inode, PFN_PHYS(page_index), PFN_PHYS(page_index) + PAGE_SIZE - 1); +} + +/* * ELDU: Load an EPC page as unblocked. For more info, see "OS Management of EPC * Pages" in the SDM. */ @@ -22,9 +46,11 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, { unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK; struct sgx_encl *encl = encl_page->encl; + pgoff_t page_index, page_pcmd_off; struct sgx_pageinfo pginfo; struct sgx_backing b; - pgoff_t page_index; + bool pcmd_page_empty; + u8 *pcmd_page; int ret; if (secs_page) @@ -32,14 +58,16 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, else page_index = PFN_DOWN(encl->size); + page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); + ret = sgx_encl_get_backing(encl, page_index, &b); if (ret) return ret; pginfo.addr = encl_page->desc & PAGE_MASK; pginfo.contents = (unsigned long)kmap_atomic(b.contents); - pginfo.metadata = (unsigned long)kmap_atomic(b.pcmd) + - b.pcmd_offset; + pcmd_page = kmap_atomic(b.pcmd); + pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset; if (secs_page) pginfo.secs = (u64)sgx_get_epc_virt_addr(secs_page); @@ -55,11 +83,24 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, ret = -EFAULT; } - kunmap_atomic((void *)(unsigned long)(pginfo.metadata - b.pcmd_offset)); + memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd)); + + /* + * The area for the PCMD in the page was zeroed above. Check if the + * whole page is now empty meaning that all PCMD's have been zeroed: + */ + pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE); + + kunmap_atomic(pcmd_page); kunmap_atomic((void *)(unsigned long)pginfo.contents); sgx_encl_put_backing(&b, false); + sgx_encl_truncate_backing_page(encl, page_index); + + if (pcmd_page_empty) + sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off)); + return ret; } @@ -410,6 +451,8 @@ void sgx_encl_release(struct kref *ref) } kfree(entry); + /* Invoke scheduler to prevent soft lockups. */ + cond_resched(); } xa_destroy(&encl->page_array); @@ -577,7 +620,7 @@ static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl, int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, struct sgx_backing *backing) { - pgoff_t pcmd_index = PFN_DOWN(encl->size) + 1 + (page_index >> 5); + pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); struct page *contents; struct page *pcmd; @@ -585,7 +628,7 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, if (IS_ERR(contents)) return PTR_ERR(contents); - pcmd = sgx_encl_get_backing_page(encl, pcmd_index); + pcmd = sgx_encl_get_backing_page(encl, PFN_DOWN(page_pcmd_off)); if (IS_ERR(pcmd)) { put_page(contents); return PTR_ERR(pcmd); @@ -594,9 +637,7 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, backing->page_index = page_index; backing->contents = contents; backing->pcmd = pcmd; - backing->pcmd_offset = - (page_index & (PAGE_SIZE / sizeof(struct sgx_pcmd) - 1)) * - sizeof(struct sgx_pcmd); + backing->pcmd_offset = page_pcmd_off & (PAGE_SIZE - 1); return 0; } diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 4b41efc9e367..8e4bc6453d26 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -344,10 +344,8 @@ static void sgx_reclaim_pages(void) { struct sgx_epc_page *chunk[SGX_NR_TO_SCAN]; struct sgx_backing backing[SGX_NR_TO_SCAN]; - struct sgx_epc_section *section; struct sgx_encl_page *encl_page; struct sgx_epc_page *epc_page; - struct sgx_numa_node *node; pgoff_t page_index; int cnt = 0; int ret; @@ -418,13 +416,7 @@ skip: kref_put(&encl_page->encl->refcount, sgx_encl_release); epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED; - section = &sgx_epc_sections[epc_page->section]; - node = section->node; - - spin_lock(&node->lock); - list_add_tail(&epc_page->list, &node->free_page_list); - spin_unlock(&node->lock); - atomic_long_inc(&sgx_nr_free_pages); + sgx_free_epc_page(epc_page); } } diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index bc0657f0deed..f267205f2d5a 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -995,8 +995,10 @@ early_param("memmap", parse_memmap_opt); */ void __init e820__reserve_setup_data(void) { + struct setup_indirect *indirect; struct setup_data *data; - u64 pa_data; + u64 pa_data, pa_next; + u32 len; pa_data = boot_params.hdr.setup_data; if (!pa_data) @@ -1004,6 +1006,14 @@ void __init e820__reserve_setup_data(void) while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); + if (!data) { + pr_warn("e820: failed to memremap setup_data entry\n"); + return; + } + + len = sizeof(*data); + pa_next = data->next; + e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); /* @@ -1015,18 +1025,27 @@ void __init e820__reserve_setup_data(void) sizeof(*data) + data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { - e820__range_update(((struct setup_indirect *)data->data)->addr, - ((struct setup_indirect *)data->data)->len, - E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - e820__range_update_kexec(((struct setup_indirect *)data->data)->addr, - ((struct setup_indirect *)data->data)->len, - E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + if (data->type == SETUP_INDIRECT) { + len += data->len; + early_memunmap(data, sizeof(*data)); + data = early_memremap(pa_data, len); + if (!data) { + pr_warn("e820: failed to memremap indirect setup_data\n"); + return; + } + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + e820__range_update(indirect->addr, indirect->len, + E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + e820__range_update_kexec(indirect->addr, indirect->len, + E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + } } - pa_data = data->next; - early_memunmap(data, sizeof(*data)); + pa_data = pa_next; + early_memunmap(data, len); } e820__update_table(e820_table); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 8dea01ffc5c1..19821f027cb3 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -612,6 +612,13 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags) fpu_inherit_perms(dst_fpu); fpregs_unlock(); + /* + * Children never inherit PASID state. + * Force it to have its init value: + */ + if (use_xsave()) + dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID; + trace_x86_fpu_copy_src(src_fpu); trace_x86_fpu_copy_dst(dst_fpu); diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 437d7c930c0b..75ffaef8c299 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -91,11 +91,9 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { struct fpu *fpu = &target->thread.fpu; - struct user32_fxsr_struct newstate; + struct fxregs_state newstate; int ret; - BUILD_BUG_ON(sizeof(newstate) != sizeof(struct fxregs_state)); - if (!cpu_feature_enabled(X86_FEATURE_FXSR)) return -ENODEV; @@ -116,9 +114,10 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, /* Copy the state */ memcpy(&fpu->fpstate->regs.fxsave, &newstate, sizeof(newstate)); - /* Clear xmm8..15 */ + /* Clear xmm8..15 for 32-bit callers */ BUILD_BUG_ON(sizeof(fpu->__fpstate.regs.fxsave.xmm_space) != 16 * 16); - memset(&fpu->fpstate->regs.fxsave.xmm_space[8], 0, 8 * 16); + if (in_ia32_syscall()) + memset(&fpu->fpstate->regs.fxsave.xmm_space[8*4], 0, 8 * 16); /* Mark FP and SSE as in use when XSAVE is enabled */ if (use_xsave()) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 02b3ddaf4f75..7c7824ae7862 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1558,7 +1558,10 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize, fpregs_restore_userregs(); newfps->xfeatures = curfps->xfeatures | xfeatures; - newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; + + if (!guest_fpu) + newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; + newfps->xfd = curfps->xfd & ~xfeatures; /* Do the final updates within the locked region */ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 7cc540e6de0c..1e31c7d21597 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -316,12 +316,12 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) unsigned long offset; unsigned long npages; unsigned long size; - unsigned long retq; unsigned long *ptr; void *trampoline; void *ip; /* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */ unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 }; + unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE }; union ftrace_op_code_union op_ptr; int ret; @@ -359,12 +359,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) goto fail; ip = trampoline + size; - - /* The trampoline ends with ret(q) */ - retq = (unsigned long)ftrace_stub; - ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); - if (WARN_ON(ret < 0)) - goto fail; + memcpy(ip, retq, RET_SIZE); /* No need to test direct calls on created trampolines */ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 11ac028e30e4..4ec13608d3c6 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -145,6 +145,7 @@ SYM_FUNC_START(ftrace_caller) movq %rcx, RSP(%rsp) SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL) + ANNOTATE_NOENDBR /* Load the ftrace_ops into the 3rd parameter */ movq function_trace_op(%rip), %rdx @@ -155,6 +156,7 @@ SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL) movq $0, CS(%rsp) SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) + ANNOTATE_NOENDBR call ftrace_stub /* Handlers can change the RIP */ @@ -169,6 +171,7 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) * layout here. */ SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL) + ANNOTATE_NOENDBR jmp ftrace_epilogue SYM_FUNC_END(ftrace_caller); @@ -176,10 +179,10 @@ SYM_FUNC_END(ftrace_caller); SYM_FUNC_START(ftrace_epilogue) /* * This is weak to keep gas from relaxing the jumps. - * It is also used to copy the RET for trampolines. */ SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK) UNWIND_HINT_FUNC + ENDBR RET SYM_FUNC_END(ftrace_epilogue) @@ -192,6 +195,7 @@ SYM_FUNC_START(ftrace_regs_caller) /* save_mcount_regs fills in first two parameters */ SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL) + ANNOTATE_NOENDBR /* Load the ftrace_ops into the 3rd parameter */ movq function_trace_op(%rip), %rdx @@ -221,6 +225,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL) leaq (%rsp), %rcx SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) + ANNOTATE_NOENDBR call ftrace_stub /* Copy flags back to SS, to restore them */ @@ -248,6 +253,7 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) */ testq %rax, %rax SYM_INNER_LABEL(ftrace_regs_caller_jmp, SYM_L_GLOBAL) + ANNOTATE_NOENDBR jnz 1f restore_mcount_regs @@ -261,6 +267,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_jmp, SYM_L_GLOBAL) * to the return. */ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) + ANNOTATE_NOENDBR jmp ftrace_epilogue /* Swap the flags with orig_rax */ @@ -284,6 +291,7 @@ SYM_FUNC_START(__fentry__) jnz trace SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBAL) + ENDBR RET trace: @@ -307,7 +315,7 @@ EXPORT_SYMBOL(__fentry__) #ifdef CONFIG_FUNCTION_GRAPH_TRACER SYM_FUNC_START(return_to_handler) - subq $24, %rsp + subq $16, %rsp /* Save the return values */ movq %rax, (%rsp) @@ -319,7 +327,19 @@ SYM_FUNC_START(return_to_handler) movq %rax, %rdi movq 8(%rsp), %rdx movq (%rsp), %rax - addq $24, %rsp - JMP_NOSPEC rdi + + addq $16, %rsp + /* + * Jump back to the old return address. This cannot be JMP_NOSPEC rdi + * since IBT would demand that contain ENDBR, which simply isn't so for + * return addresses. Use a retpoline here to keep the RSB balanced. + */ + ANNOTATE_INTRA_FUNCTION_CALL + call .Ldo_rop + int3 +.Ldo_rop: + mov %rdi, (%rsp) + UNWIND_HINT_FUNC + RET SYM_FUNC_END(return_to_handler) #endif diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 9c63fc5988cd..b8e3019547a5 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -99,6 +99,7 @@ SYM_CODE_END(startup_64) SYM_CODE_START(secondary_startup_64) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR /* * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, * and someone has loaded a mapped page table. @@ -127,6 +128,7 @@ SYM_CODE_START(secondary_startup_64) */ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR /* * Retrieve the modifier (SME encryption mask if SME is active) to be @@ -192,6 +194,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) jmp *%rax 1: UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR // above /* * We must switch to a new descriptor in kernel space for the GDT @@ -299,6 +302,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) pushq %rax # target address in negative space lretq .Lafter_lret: + ANNOTATE_NOENDBR SYM_CODE_END(secondary_startup_64) #include "verify_cpu.S" @@ -328,6 +332,7 @@ SYM_CODE_END(start_cpu0) */ SYM_CODE_START_NOALIGN(vc_boot_ghcb) UNWIND_HINT_IRET_REGS offset=8 + ENDBR /* Build pt_regs */ PUSH_AND_CLEAR_REGS @@ -345,7 +350,6 @@ SYM_CODE_START_NOALIGN(vc_boot_ghcb) /* Remove Error Code */ addq $8, %rsp - /* Pure iret required here - don't use INTERRUPT_RETURN */ iretq SYM_CODE_END(vc_boot_ghcb) #endif @@ -372,9 +376,11 @@ SYM_CODE_START(early_idt_handler_array) .rept NUM_EXCEPTION_VECTORS .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0 UNWIND_HINT_IRET_REGS + ENDBR pushq $0 # Dummy error code, to make stack frame uniform .else UNWIND_HINT_IRET_REGS offset=8 + ENDBR .endif pushq $i # 72(%rsp) Vector number jmp early_idt_handler_common @@ -382,10 +388,11 @@ SYM_CODE_START(early_idt_handler_array) i = i + 1 .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc .endr - UNWIND_HINT_IRET_REGS offset=16 SYM_CODE_END(early_idt_handler_array) + ANNOTATE_NOENDBR // early_idt_handler_array[NUM_EXCEPTION_VECTORS] SYM_CODE_START_LOCAL(early_idt_handler_common) + UNWIND_HINT_IRET_REGS offset=16 /* * The stack is the hardware frame, an error code or zero, and the * vector number. @@ -426,11 +433,14 @@ SYM_CODE_END(early_idt_handler_common) * early_idt_handler_array can't be used because it returns via the * paravirtualized INTERRUPT_RETURN and pv-ops don't work that early. * + * XXX it does, fix this. + * * This handler will end up in the .init.text section and not be * available to boot secondary CPUs. */ SYM_CODE_START_NOALIGN(vc_no_ghcb) UNWIND_HINT_IRET_REGS offset=8 + ENDBR /* Build pt_regs */ PUSH_AND_CLEAR_REGS diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index df0fa695bb09..608eb63bf044 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -10,6 +10,7 @@ #include <asm/proto.h> #include <asm/desc.h> #include <asm/hw_irq.h> +#include <asm/idtentry.h> #define DPL0 0x0 #define DPL3 0x3 @@ -103,6 +104,10 @@ static const __initconst struct idt_data def_idts[] = { ISTG(X86_TRAP_MC, asm_exc_machine_check, IST_INDEX_MCE), #endif +#ifdef CONFIG_X86_KERNEL_IBT + INTG(X86_TRAP_CP, asm_exc_control_protection), +#endif + #ifdef CONFIG_AMD_MEM_ENCRYPT ISTG(X86_TRAP_VC, asm_exc_vmm_communication, IST_INDEX_VC), #endif @@ -272,7 +277,7 @@ void __init idt_setup_apic_and_irq_gates(void) idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true); for_each_clear_bit_from(i, system_vectors, FIRST_SYSTEM_VECTOR) { - entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); + entry = irq_entries_start + IDT_ALIGN * (i - FIRST_EXTERNAL_VECTOR); set_intr_gate(i, entry); } @@ -283,7 +288,7 @@ void __init idt_setup_apic_and_irq_gates(void) * system_vectors bitmap. Otherwise they show up in * /proc/interrupts. */ - entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR); + entry = spurious_entries_start + IDT_ALIGN * (i - FIRST_SYSTEM_VECTOR); set_intr_gate(i, entry); } #endif diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index 64b6da95af98..e2e89bebcbc3 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -88,11 +88,13 @@ create_setup_data_node(struct dentry *parent, int no, static int __init create_setup_data_nodes(struct dentry *parent) { + struct setup_indirect *indirect; struct setup_data_node *node; struct setup_data *data; - int error; + u64 pa_data, pa_next; struct dentry *d; - u64 pa_data; + int error; + u32 len; int no = 0; d = debugfs_create_dir("setup_data", parent); @@ -112,12 +114,29 @@ static int __init create_setup_data_nodes(struct dentry *parent) error = -ENOMEM; goto err_dir; } - - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { - node->paddr = ((struct setup_indirect *)data->data)->addr; - node->type = ((struct setup_indirect *)data->data)->type; - node->len = ((struct setup_indirect *)data->data)->len; + pa_next = data->next; + + if (data->type == SETUP_INDIRECT) { + len = sizeof(*data) + data->len; + memunmap(data); + data = memremap(pa_data, len, MEMREMAP_WB); + if (!data) { + kfree(node); + error = -ENOMEM; + goto err_dir; + } + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + node->paddr = indirect->addr; + node->type = indirect->type; + node->len = indirect->len; + } else { + node->paddr = pa_data; + node->type = data->type; + node->len = data->len; + } } else { node->paddr = pa_data; node->type = data->type; @@ -125,7 +144,7 @@ static int __init create_setup_data_nodes(struct dentry *parent) } create_setup_data_node(d, no, node); - pa_data = data->next; + pa_data = pa_next; memunmap(data); no++; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6290712cb36d..8ef933c03afa 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -52,6 +52,7 @@ #include <asm/insn.h> #include <asm/debugreg.h> #include <asm/set_memory.h> +#include <asm/ibt.h> #include "common.h" @@ -193,17 +194,10 @@ static unsigned long __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) { struct kprobe *kp; - unsigned long faddr; + bool faddr; kp = get_kprobe((void *)addr); - faddr = ftrace_location(addr); - /* - * Addresses inside the ftrace location are refused by - * arch_check_ftrace_location(). Something went terribly wrong - * if such an address is checked here. - */ - if (WARN_ON(faddr && faddr != addr)) - return 0UL; + faddr = ftrace_location(addr) == addr; /* * Use the current code if it is not modified by Kprobe * and it cannot be modified by ftrace. @@ -301,6 +295,22 @@ static int can_probe(unsigned long paddr) return (addr == paddr); } +/* If x86 supports IBT (ENDBR) it must be skipped. */ +kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, + bool *on_func_entry) +{ + if (is_endbr(*(u32 *)addr)) { + *on_func_entry = !offset || offset == 4; + if (*on_func_entry) + offset = 4; + + } else { + *on_func_entry = !offset; + } + + return (kprobe_opcode_t *)(addr + offset); +} + /* * Copy an instruction with recovering modified instruction by kprobes * and adjust the displacement if the instruction uses the %rip-relative @@ -1023,6 +1033,7 @@ asm( ".type __kretprobe_trampoline, @function\n" "__kretprobe_trampoline:\n" #ifdef CONFIG_X86_64 + ANNOTATE_NOENDBR /* Push a fake return address to tell the unwinder it's a kretprobe. */ " pushq $__kretprobe_trampoline\n" UNWIND_HINT_FUNC diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c index d0a19121c6a4..257892fcefa7 100644 --- a/arch/x86/kernel/ksysfs.c +++ b/arch/x86/kernel/ksysfs.c @@ -91,26 +91,41 @@ static int get_setup_data_paddr(int nr, u64 *paddr) static int __init get_setup_data_size(int nr, size_t *size) { - int i = 0; + u64 pa_data = boot_params.hdr.setup_data, pa_next; + struct setup_indirect *indirect; struct setup_data *data; - u64 pa_data = boot_params.hdr.setup_data; + int i = 0; + u32 len; while (pa_data) { data = memremap(pa_data, sizeof(*data), MEMREMAP_WB); if (!data) return -ENOMEM; + pa_next = data->next; + if (nr == i) { - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) - *size = ((struct setup_indirect *)data->data)->len; - else + if (data->type == SETUP_INDIRECT) { + len = sizeof(*data) + data->len; + memunmap(data); + data = memremap(pa_data, len, MEMREMAP_WB); + if (!data) + return -ENOMEM; + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) + *size = indirect->len; + else + *size = data->len; + } else { *size = data->len; + } memunmap(data); return 0; } - pa_data = data->next; + pa_data = pa_next; memunmap(data); i++; } @@ -120,9 +135,11 @@ static int __init get_setup_data_size(int nr, size_t *size) static ssize_t type_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { + struct setup_indirect *indirect; + struct setup_data *data; int nr, ret; u64 paddr; - struct setup_data *data; + u32 len; ret = kobj_to_setup_data_nr(kobj, &nr); if (ret) @@ -135,10 +152,20 @@ static ssize_t type_show(struct kobject *kobj, if (!data) return -ENOMEM; - if (data->type == SETUP_INDIRECT) - ret = sprintf(buf, "0x%x\n", ((struct setup_indirect *)data->data)->type); - else + if (data->type == SETUP_INDIRECT) { + len = sizeof(*data) + data->len; + memunmap(data); + data = memremap(paddr, len, MEMREMAP_WB); + if (!data) + return -ENOMEM; + + indirect = (struct setup_indirect *)data->data; + + ret = sprintf(buf, "0x%x\n", indirect->type); + } else { ret = sprintf(buf, "0x%x\n", data->type); + } + memunmap(data); return ret; } @@ -149,9 +176,10 @@ static ssize_t setup_data_data_read(struct file *fp, char *buf, loff_t off, size_t count) { + struct setup_indirect *indirect; + struct setup_data *data; int nr, ret = 0; u64 paddr, len; - struct setup_data *data; void *p; ret = kobj_to_setup_data_nr(kobj, &nr); @@ -165,10 +193,27 @@ static ssize_t setup_data_data_read(struct file *fp, if (!data) return -ENOMEM; - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { - paddr = ((struct setup_indirect *)data->data)->addr; - len = ((struct setup_indirect *)data->data)->len; + if (data->type == SETUP_INDIRECT) { + len = sizeof(*data) + data->len; + memunmap(data); + data = memremap(paddr, len, MEMREMAP_WB); + if (!data) + return -ENOMEM; + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + paddr = indirect->addr; + len = indirect->len; + } else { + /* + * Even though this is technically undefined, return + * the data as though it is a normal setup_data struct. + * This will at least allow it to be inspected. + */ + paddr += sizeof(*data); + len = data->len; + } } else { paddr += sizeof(*data); len = data->len; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a438217cbfac..79e0b8d63ffa 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -462,19 +462,24 @@ static bool pv_tlb_flush_supported(void) { return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && !kvm_para_has_hint(KVM_HINTS_REALTIME) && - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)); + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) && + !boot_cpu_has(X86_FEATURE_MWAIT) && + (num_possible_cpus() != 1)); } static bool pv_ipi_supported(void) { - return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI); + return (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI) && + (num_possible_cpus() != 1)); } static bool pv_sched_yield_supported(void) { return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) && !kvm_para_has_hint(KVM_HINTS_REALTIME) && - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)); + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) && + !boot_cpu_has(X86_FEATURE_MWAIT) && + (num_possible_cpus() != 1)); } #define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG) @@ -619,7 +624,7 @@ static void kvm_smp_send_call_func_ipi(const struct cpumask *mask) /* Make sure other vCPUs get a chance to run if they need to. */ for_each_cpu(cpu, mask) { - if (vcpu_is_preempted(cpu)) { + if (!idle_cpu(cpu) && vcpu_is_preempted(cpu)) { kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu)); break; } @@ -1024,10 +1029,11 @@ asm( ".global __raw_callee_save___kvm_vcpu_is_preempted;" ".type __raw_callee_save___kvm_vcpu_is_preempted, @function;" "__raw_callee_save___kvm_vcpu_is_preempted:" +ASM_ENDBR "movq __per_cpu_offset(,%rdi,8), %rax;" "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);" "setne %al;" -"ret;" +ASM_RET ".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;" ".popsection"); diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index a35cbf9107af..c5caa7311bd8 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -239,6 +239,9 @@ static void __init kvmclock_init_mem(void) static int __init kvm_setup_vsyscall_timeinfo(void) { + if (!kvm_para_available() || !kvmclock) + return 0; + kvmclock_init_mem(); #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index f5da4a18070a..566bb8e17149 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -27,6 +27,7 @@ #include <asm/kexec-bzimage64.h> #include <asm/setup.h> #include <asm/set_memory.h> +#include <asm/cpu.h> #ifdef CONFIG_ACPI /* @@ -310,6 +311,7 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); hw_breakpoint_disable(); + cet_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC @@ -325,7 +327,7 @@ void machine_kexec(struct kimage *image) } control_page = page_address(image->control_code_page) + PAGE_SIZE; - memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); + __memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 95fa745e310a..58bafbd19b1d 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -253,7 +253,7 @@ int module_finalize(const Elf_Ehdr *hdr, { const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, *para = NULL, *orc = NULL, *orc_ip = NULL, - *retpolines = NULL; + *retpolines = NULL, *ibt_endbr = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { @@ -271,8 +271,18 @@ int module_finalize(const Elf_Ehdr *hdr, orc_ip = s; if (!strcmp(".retpoline_sites", secstrings + s->sh_name)) retpolines = s; + if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name)) + ibt_endbr = s; } + /* + * See alternative_instructions() for the ordering rules between the + * various patching types. + */ + if (para) { + void *pseg = (void *)para->sh_addr; + apply_paravirt(pseg, pseg + para->sh_size); + } if (retpolines) { void *rseg = (void *)retpolines->sh_addr; apply_retpolines(rseg, rseg + retpolines->sh_size); @@ -282,6 +292,10 @@ int module_finalize(const Elf_Ehdr *hdr, void *aseg = (void *)alt->sh_addr; apply_alternatives(aseg, aseg + alt->sh_size); } + if (ibt_endbr) { + void *iseg = (void *)ibt_endbr->sh_addr; + apply_ibt_endbr(iseg, iseg + ibt_endbr->sh_size); + } if (locks && text) { void *lseg = (void *)locks->sh_addr; void *tseg = (void *)text->sh_addr; @@ -290,11 +304,6 @@ int module_finalize(const Elf_Ehdr *hdr, tseg, tseg + text->sh_size); } - if (para) { - void *pseg = (void *)para->sh_addr; - apply_paravirt(pseg, pseg + para->sh_size); - } - /* make jump label nops */ jump_label_apply_nops(me); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 4420499f7bb4..7ca2d46c08cc 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -41,6 +41,7 @@ extern void _paravirt_nop(void); asm (".pushsection .entry.text, \"ax\"\n" ".global _paravirt_nop\n" "_paravirt_nop:\n\t" + ASM_ENDBR ASM_RET ".size _paravirt_nop, . - _paravirt_nop\n\t" ".type _paravirt_nop, @function\n\t" @@ -50,6 +51,7 @@ asm (".pushsection .entry.text, \"ax\"\n" asm (".pushsection .entry.text, \"ax\"\n" ".global paravirt_ret0\n" "paravirt_ret0:\n\t" + ASM_ENDBR "xor %" _ASM_AX ", %" _ASM_AX ";\n\t" ASM_RET ".size paravirt_ret0, . - paravirt_ret0\n\t" @@ -69,29 +71,12 @@ noinstr void paravirt_BUG(void) BUG(); } -struct branch { - unsigned char opcode; - u32 delta; -} __attribute__((packed)); - static unsigned paravirt_patch_call(void *insn_buff, const void *target, unsigned long addr, unsigned len) { - const int call_len = 5; - struct branch *b = insn_buff; - unsigned long delta = (unsigned long)target - (addr+call_len); - - if (len < call_len) { - pr_warn("paravirt: Failed to patch indirect CALL at %ps\n", (void *)addr); - /* Kernel might not be viable if patching fails, bail out: */ - BUG_ON(1); - } - - b->opcode = 0xe8; /* call */ - b->delta = delta; - BUILD_BUG_ON(sizeof(*b) != call_len); - - return call_len; + __text_gen_insn(insn_buff, CALL_INSN_OPCODE, + (void *)addr, target, CALL_INSN_SIZE); + return CALL_INSN_SIZE; } #ifdef CONFIG_PARAVIRT_XXL @@ -149,8 +134,6 @@ void paravirt_set_sched_clock(u64 (*func)(void)) } /* These are in entry.S */ -extern void native_iret(void); - static struct resource reserve_ioports = { .start = 0, .end = IO_SPACE_LIMIT, @@ -414,8 +397,6 @@ struct paravirt_patch_template pv_ops = { #ifdef CONFIG_PARAVIRT_XXL NOKPROBE_SYMBOL(native_load_idt); - -void (*paravirt_iret)(void) = native_iret; #endif EXPORT_SYMBOL(pv_ops); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e131d71b3cae..b370767f5b19 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -747,7 +747,7 @@ bool xen_set_default_idle(void) } #endif -void stop_this_cpu(void *dummy) +void __noreturn stop_this_cpu(void *dummy) { local_irq_disable(); /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3402edec236c..e459253649be 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -681,7 +681,7 @@ void set_personality_64bit(void) static void __set_personality_x32(void) { -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI if (current->mm) current->mm->context.flags = 0; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 6d2244c94799..8d2f2f995539 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1224,7 +1224,7 @@ static struct user_regset x86_64_regsets[] __ro_after_init = { }, [REGSET_FP] = { .core_note_type = NT_PRFPREG, - .n = sizeof(struct user_i387_struct) / sizeof(long), + .n = sizeof(struct fxregs_state) / sizeof(long), .size = sizeof(long), .align = sizeof(long), .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set }, @@ -1271,7 +1271,7 @@ static struct user_regset x86_32_regsets[] __ro_after_init = { }, [REGSET_XFP] = { .core_note_type = NT_PRXFPREG, - .n = sizeof(struct user32_fxsr_struct) / sizeof(u32), + .n = sizeof(struct fxregs_state) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set }, diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 399f075ccdc4..c1d8626c53b6 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -42,6 +42,7 @@ .code64 SYM_CODE_START_NOALIGN(relocate_kernel) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR /* * %rdi indirection_page * %rsi page_list @@ -115,6 +116,14 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) pushq %rdx /* + * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP + * below. + */ + movq %cr4, %rax + andq $~(X86_CR4_CET), %rax + movq %rax, %cr4 + + /* * Set cr0 to a known state: * - Paging enabled * - Alignment check disabled @@ -215,6 +224,7 @@ SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR // RET target, above movq RSP(%r8), %rsp movq CR4(%r8), %rax movq %rax, %cr4 diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c index 9ae64f9af956..9b9fb7882c20 100644 --- a/arch/x86/kernel/resource.c +++ b/arch/x86/kernel/resource.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/dmi.h> #include <linux/ioport.h> #include <asm/e820/api.h> @@ -24,31 +23,11 @@ static void resource_clip(struct resource *res, resource_size_t start, res->start = end + 1; } -/* - * Some BIOS-es contain a bug where they add addresses which map to - * system RAM in the PCI host bridge window returned by the ACPI _CRS - * method, see commit 4dc2287c1805 ("x86: avoid E820 regions when - * allocating address space"). To avoid this Linux by default excludes - * E820 reservations when allocating addresses since 2010. - * In 2019 some systems have shown-up with E820 reservations which cover - * the entire _CRS returned PCI host bridge window, causing all attempts - * to assign memory to PCI BARs to fail if Linux uses E820 reservations. - * - * Ideally Linux would fully stop using E820 reservations, but then - * the old systems this was added for will regress. - * Instead keep the old behavior for old systems, while ignoring the - * E820 reservations for any systems from now on. - */ static void remove_e820_regions(struct resource *avail) { - int i, year = dmi_get_bios_year(); + int i; struct e820_entry *entry; - if (year >= 2018) - return; - - pr_info_once("PCI: Removing E820 reservations from host bridge windows\n"); - for (i = 0; i < e820_table->nr_entries; i++) { entry = &e820_table->entries[i]; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f7a132eb794d..90d7e1788c91 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -369,21 +369,41 @@ static void __init parse_setup_data(void) static void __init memblock_x86_reserve_range_setup_data(void) { + struct setup_indirect *indirect; struct setup_data *data; - u64 pa_data; + u64 pa_data, pa_next; + u32 len; pa_data = boot_params.hdr.setup_data; while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); + if (!data) { + pr_warn("setup: failed to memremap setup_data entry\n"); + return; + } + + len = sizeof(*data); + pa_next = data->next; + memblock_reserve(pa_data, sizeof(*data) + data->len); - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) - memblock_reserve(((struct setup_indirect *)data->data)->addr, - ((struct setup_indirect *)data->data)->len); + if (data->type == SETUP_INDIRECT) { + len += data->len; + early_memunmap(data, sizeof(*data)); + data = early_memremap(pa_data, len); + if (!data) { + pr_warn("setup: failed to memremap indirect setup_data\n"); + return; + } - pa_data = data->next; - early_memunmap(data, sizeof(*data)); + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) + memblock_reserve(indirect->addr, indirect->len); + } + + pa_data = pa_next; + early_memunmap(data, len); } } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c9d566dcf89a..1563fb995005 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -39,6 +39,7 @@ #include <linux/io.h> #include <linux/hardirq.h> #include <linux/atomic.h> +#include <linux/ioasid.h> #include <asm/stacktrace.h> #include <asm/processor.h> @@ -209,6 +210,81 @@ DEFINE_IDTENTRY(exc_overflow) do_error_trap(regs, 0, "overflow", X86_TRAP_OF, SIGSEGV, 0, NULL); } +#ifdef CONFIG_X86_KERNEL_IBT + +static __ro_after_init bool ibt_fatal = true; + +extern void ibt_selftest_ip(void); /* code label defined in asm below */ + +enum cp_error_code { + CP_EC = (1 << 15) - 1, + + CP_RET = 1, + CP_IRET = 2, + CP_ENDBR = 3, + CP_RSTRORSSP = 4, + CP_SETSSBSY = 5, + + CP_ENCL = 1 << 15, +}; + +DEFINE_IDTENTRY_ERRORCODE(exc_control_protection) +{ + if (!cpu_feature_enabled(X86_FEATURE_IBT)) { + pr_err("Unexpected #CP\n"); + BUG(); + } + + if (WARN_ON_ONCE(user_mode(regs) || (error_code & CP_EC) != CP_ENDBR)) + return; + + if (unlikely(regs->ip == (unsigned long)&ibt_selftest_ip)) { + regs->ax = 0; + return; + } + + pr_err("Missing ENDBR: %pS\n", (void *)instruction_pointer(regs)); + if (!ibt_fatal) { + printk(KERN_DEFAULT CUT_HERE); + __warn(__FILE__, __LINE__, (void *)regs->ip, TAINT_WARN, regs, NULL); + return; + } + BUG(); +} + +/* Must be noinline to ensure uniqueness of ibt_selftest_ip. */ +noinline bool ibt_selftest(void) +{ + unsigned long ret; + + asm (" lea ibt_selftest_ip(%%rip), %%rax\n\t" + ANNOTATE_RETPOLINE_SAFE + " jmp *%%rax\n\t" + "ibt_selftest_ip:\n\t" + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + " nop\n\t" + + : "=a" (ret) : : "memory"); + + return !ret; +} + +static int __init ibt_setup(char *str) +{ + if (!strcmp(str, "off")) + setup_clear_cpu_cap(X86_FEATURE_IBT); + + if (!strcmp(str, "warn")) + ibt_fatal = false; + + return 1; +} + +__setup("ibt=", ibt_setup); + +#endif /* CONFIG_X86_KERNEL_IBT */ + #ifdef CONFIG_X86_F00F_BUG void handle_invalid_op(struct pt_regs *regs) #else @@ -559,6 +635,57 @@ static bool fixup_iopl_exception(struct pt_regs *regs) return true; } +/* + * The unprivileged ENQCMD instruction generates #GPs if the + * IA32_PASID MSR has not been populated. If possible, populate + * the MSR from a PASID previously allocated to the mm. + */ +static bool try_fixup_enqcmd_gp(void) +{ +#ifdef CONFIG_IOMMU_SVA + u32 pasid; + + /* + * MSR_IA32_PASID is managed using XSAVE. Directly + * writing to the MSR is only possible when fpregs + * are valid and the fpstate is not. This is + * guaranteed when handling a userspace exception + * in *before* interrupts are re-enabled. + */ + lockdep_assert_irqs_disabled(); + + /* + * Hardware without ENQCMD will not generate + * #GPs that can be fixed up here. + */ + if (!cpu_feature_enabled(X86_FEATURE_ENQCMD)) + return false; + + pasid = current->mm->pasid; + + /* + * If the mm has not been allocated a + * PASID, the #GP can not be fixed up. + */ + if (!pasid_valid(pasid)) + return false; + + /* + * Did this thread already have its PASID activated? + * If so, the #GP must be from something else. + */ + if (current->pasid_activated) + return false; + + wrmsrl(MSR_IA32_PASID, pasid | MSR_IA32_PASID_VALID); + current->pasid_activated = 1; + + return true; +#else + return false; +#endif +} + DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) { char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR; @@ -567,6 +694,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) unsigned long gp_addr; int ret; + if (user_mode(regs) && try_fixup_enqcmd_gp()) + return; + cond_local_irq_enable(regs); if (static_cpu_has(X86_FEATURE_UMIP)) { @@ -659,6 +789,7 @@ static bool do_int3(struct pt_regs *regs) return res == NOTIFY_STOP; } +NOKPROBE_SYMBOL(do_int3); static void do_int3_user(struct pt_regs *regs) { diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 27f830345b6f..7fda7f27e762 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -285,6 +285,15 @@ SECTIONS } #endif +#ifdef CONFIG_X86_KERNEL_IBT + . = ALIGN(8); + .ibt_endbr_seal : AT(ADDR(.ibt_endbr_seal) - LOAD_OFFSET) { + __ibt_endbr_seal = .; + *(.ibt_endbr_seal) + __ibt_endbr_seal_end = .; + } +#endif + /* * struct alt_inst entries. From the header (alternative.h): * "Alternative instructions for different CPU types or capabilities" diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 28be02adc669..b8f8d268d058 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -282,6 +282,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; struct kvm_cpuid_entry2 *best; + u64 guest_supported_xcr0; best = kvm_find_cpuid_entry(vcpu, 1, 0); if (best && apic) { @@ -293,9 +294,11 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_apic_set_version(vcpu); } - vcpu->arch.guest_supported_xcr0 = + guest_supported_xcr0 = cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent); + vcpu->arch.guest_fpu.fpstate->user_xfeatures = guest_supported_xcr0; + kvm_update_pv_runtime(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); @@ -554,12 +557,13 @@ void kvm_set_cpu_caps(void) ); kvm_cpu_cap_mask(CPUID_7_0_EBX, - F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | - F(BMI2) | F(ERMS) | F(INVPCID) | F(RTM) | 0 /*MPX*/ | F(RDSEED) | - F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) | - F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) | - F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | 0 /*INTEL_PT*/ - ); + F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) | + F(FDP_EXCPTN_ONLY) | F(SMEP) | F(BMI2) | F(ERMS) | F(INVPCID) | + F(RTM) | F(ZERO_FCS_FDS) | 0 /*MPX*/ | F(AVX512F) | + F(AVX512DQ) | F(RDSEED) | F(ADX) | F(SMAP) | F(AVX512IFMA) | + F(CLFLUSHOPT) | F(CLWB) | 0 /*INTEL_PT*/ | F(AVX512PF) | + F(AVX512ER) | F(AVX512CD) | F(SHA_NI) | F(AVX512BW) | + F(AVX512VL)); kvm_cpu_cap_mask(CPUID_7_ECX, F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) | diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5719d8cfdbd9..08c4e9c1a382 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -189,7 +189,7 @@ #define X16(x...) X8(x), X8(x) #define NR_FASTOP (ilog2(sizeof(ulong)) + 1) -#define FASTOP_SIZE 8 +#define FASTOP_SIZE (8 * (1 + HAS_KERNEL_IBT)) struct opcode { u64 flags; @@ -311,7 +311,8 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); #define __FOP_FUNC(name) \ ".align " __stringify(FASTOP_SIZE) " \n\t" \ ".type " name ", @function \n\t" \ - name ":\n\t" + name ":\n\t" \ + ASM_ENDBR #define FOP_FUNC(name) \ __FOP_FUNC(#name) @@ -433,6 +434,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); ".align 4 \n\t" \ ".type " #op ", @function \n\t" \ #op ": \n\t" \ + ASM_ENDBR \ #op " %al \n\t" \ __FOP_RET(#op) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4662469240bc..9322e6340a74 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1096,14 +1096,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, apic->regs + APIC_TMR); } - if (static_call(kvm_x86_deliver_posted_interrupt)(vcpu, vector)) { - kvm_lapic_set_irr(vector, apic); - kvm_make_request(KVM_REQ_EVENT, vcpu); - kvm_vcpu_kick(vcpu); - } else { - trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode, - trig_mode, vector); - } + static_call(kvm_x86_deliver_interrupt)(apic, delivery_mode, + trig_mode, vector); break; case APIC_DM_REMRD: @@ -2312,7 +2306,12 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) apic->irr_pending = true; apic->isr_count = 1; } else { - apic->irr_pending = (apic_search_irr(apic) != -1); + /* + * Don't clear irr_pending, searching the IRR can race with + * updates from the CPU as APICv is still active from hardware's + * perspective. The flag will be cleared as appropriate when + * KVM injects the interrupt. + */ apic->isr_count = count_vectors(apic->regs + APIC_ISR); } } diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 593093b52395..5628d0ba637e 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3565,7 +3565,7 @@ set_root_pgd: out_unlock: write_unlock(&vcpu->kvm->mmu_lock); - return 0; + return r; } static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) @@ -3889,12 +3889,23 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) walk_shadow_page_lockless_end(vcpu); } +static u32 alloc_apf_token(struct kvm_vcpu *vcpu) +{ + /* make sure the token value is not 0 */ + u32 id = vcpu->arch.apf.id; + + if (id << 12 == 0) + vcpu->arch.apf.id = 1; + + return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id; +} + static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, gfn_t gfn) { struct kvm_arch_async_pf arch; - arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id; + arch.token = alloc_apf_token(vcpu); arch.gfn = gfn; arch.direct_map = vcpu->arch.mmu->direct_map; arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu); diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index f614f95acc6b..b1a02993782b 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -95,7 +95,7 @@ static void kvm_perf_overflow(struct perf_event *perf_event, } static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, - unsigned config, bool exclude_user, + u64 config, bool exclude_user, bool exclude_kernel, bool intr, bool in_tx, bool in_tx_cp) { @@ -181,7 +181,8 @@ static int cmp_u64(const void *a, const void *b) void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) { - unsigned config, type = PERF_TYPE_RAW; + u64 config; + u32 type = PERF_TYPE_RAW; struct kvm *kvm = pmc->vcpu->kvm; struct kvm_pmu_event_filter *filter; bool allow_event = true; @@ -220,7 +221,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) } if (type == PERF_TYPE_RAW) - config = eventsel & X86_RAW_EVENT_MASK; + config = eventsel & AMD64_RAW_EVENT_MASK; if (pmc->current_config == eventsel && pmc_resume_counter(pmc)) return; diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 90364d02f22a..fb3e20791338 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -27,20 +27,6 @@ #include "irq.h" #include "svm.h" -#define SVM_AVIC_DOORBELL 0xc001011b - -#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF) - -/* - * 0xff is broadcast, so the max index allowed for physical APIC ID - * table is 0xfe. APIC IDs above 0xff are reserved. - */ -#define AVIC_MAX_PHYSICAL_ID_COUNT 255 - -#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1 -#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0 -#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF - /* AVIC GATAG is encoded using VM and VCPU IDs */ #define AVIC_VCPU_ID_BITS 8 #define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1) @@ -73,12 +59,6 @@ struct amd_svm_iommu_ir { void *data; /* Storing pointer to struct amd_ir_data */ }; -enum avic_ipi_failure_cause { - AVIC_IPI_FAILURE_INVALID_INT_TYPE, - AVIC_IPI_FAILURE_TARGET_NOT_RUNNING, - AVIC_IPI_FAILURE_INVALID_TARGET, - AVIC_IPI_FAILURE_INVALID_BACKING_PAGE, -}; /* Note: * This function is called from IOMMU driver to notify @@ -289,6 +269,22 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) return 0; } +void avic_ring_doorbell(struct kvm_vcpu *vcpu) +{ + /* + * Note, the vCPU could get migrated to a different pCPU at any point, + * which could result in signalling the wrong/previous pCPU. But if + * that happens the vCPU is guaranteed to do a VMRUN (after being + * migrated) and thus will process pending interrupts, i.e. a doorbell + * is not needed (and the spurious one is harmless). + */ + int cpu = READ_ONCE(vcpu->cpu); + + if (cpu != get_cpu()) + wrmsrl(MSR_AMD64_SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu)); + put_cpu(); +} + static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source, u32 icrl, u32 icrh) { @@ -304,8 +300,13 @@ static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source, kvm_for_each_vcpu(i, vcpu, kvm) { if (kvm_apic_match_dest(vcpu, source, icrl & APIC_SHORT_MASK, GET_APIC_DEST_FIELD(icrh), - icrl & APIC_DEST_MASK)) - kvm_vcpu_wake_up(vcpu); + icrl & APIC_DEST_MASK)) { + vcpu->arch.apic->irr_pending = true; + svm_complete_interrupt_delivery(vcpu, + icrl & APIC_MODE_MASK, + icrl & APIC_INT_LEVELTRIG, + icrl & APIC_VECTOR_MASK); + } } } @@ -345,8 +346,6 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu) avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh); break; case AVIC_IPI_FAILURE_INVALID_TARGET: - WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n", - index, vcpu->vcpu_id, icrh, icrl); break; case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE: WARN_ONCE(1, "Invalid backing page\n"); @@ -669,52 +668,6 @@ void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) return; } -int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) -{ - if (!vcpu->arch.apicv_active) - return -1; - - kvm_lapic_set_irr(vec, vcpu->arch.apic); - - /* - * Pairs with the smp_mb_*() after setting vcpu->guest_mode in - * vcpu_enter_guest() to ensure the write to the vIRR is ordered before - * the read of guest_mode, which guarantees that either VMRUN will see - * and process the new vIRR entry, or that the below code will signal - * the doorbell if the vCPU is already running in the guest. - */ - smp_mb__after_atomic(); - - /* - * Signal the doorbell to tell hardware to inject the IRQ if the vCPU - * is in the guest. If the vCPU is not in the guest, hardware will - * automatically process AVIC interrupts at VMRUN. - */ - if (vcpu->mode == IN_GUEST_MODE) { - int cpu = READ_ONCE(vcpu->cpu); - - /* - * Note, the vCPU could get migrated to a different pCPU at any - * point, which could result in signalling the wrong/previous - * pCPU. But if that happens the vCPU is guaranteed to do a - * VMRUN (after being migrated) and thus will process pending - * interrupts, i.e. a doorbell is not needed (and the spurious - * one is harmless). - */ - if (cpu != get_cpu()) - wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu)); - put_cpu(); - } else { - /* - * Wake the vCPU if it was blocking. KVM will then detect the - * pending IRQ when checking if the vCPU has a wake event. - */ - kvm_vcpu_wake_up(vcpu); - } - - return 0; -} - bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) { return false; diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 1218b5a342fc..39d280e7e80e 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1457,18 +1457,6 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, !__nested_vmcb_check_save(vcpu, &save_cached)) goto out_free; - /* - * While the nested guest CR3 is already checked and set by - * KVM_SET_SREGS, it was set when nested state was yet loaded, - * thus MMU might not be initialized correctly. - * Set it again to fix this. - */ - - ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3, - nested_npt_enabled(svm), false); - if (WARN_ON_ONCE(ret)) - goto out_free; - /* * All checks done, we can enter guest mode. Userspace provides @@ -1494,6 +1482,20 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, svm_switch_vmcb(svm, &svm->nested.vmcb02); nested_vmcb02_prepare_control(svm); + + /* + * While the nested guest CR3 is already checked and set by + * KVM_SET_SREGS, it was set when nested state was yet loaded, + * thus MMU might not be initialized correctly. + * Set it again to fix this. + */ + + ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3, + nested_npt_enabled(svm), false); + if (WARN_ON_ONCE(ret)) + goto out_free; + + kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); ret = 0; out_free: diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 6d97629655e3..fd3a00c892c7 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1585,6 +1585,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { struct vcpu_svm *svm = to_svm(vcpu); u64 hcr0 = cr0; + bool old_paging = is_paging(vcpu); #ifdef CONFIG_X86_64 if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) { @@ -1601,8 +1602,11 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) #endif vcpu->arch.cr0 = cr0; - if (!npt_enabled) + if (!npt_enabled) { hcr0 |= X86_CR0_PG | X86_CR0_WP; + if (old_paging != is_paging(vcpu)) + svm_set_cr4(vcpu, kvm_read_cr4(vcpu)); + } /* * re-enable caching here because the QEMU bios @@ -1646,8 +1650,12 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) svm_flush_tlb(vcpu); vcpu->arch.cr4 = cr4; - if (!npt_enabled) + if (!npt_enabled) { cr4 |= X86_CR4_PAE; + + if (!is_paging(vcpu)) + cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE); + } cr4 |= host_cr4_mce; to_svm(vcpu)->vmcb->save.cr4 = cr4; vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); @@ -2685,8 +2693,23 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) u64 data = msr->data; switch (ecx) { case MSR_AMD64_TSC_RATIO: - if (!msr->host_initiated && !svm->tsc_scaling_enabled) - return 1; + + if (!svm->tsc_scaling_enabled) { + + if (!msr->host_initiated) + return 1; + /* + * In case TSC scaling is not enabled, always + * leave this MSR at the default value. + * + * Due to bug in qemu 6.2.0, it would try to set + * this msr to 0 if tsc scaling is not enabled. + * Ignore this value as well. + */ + if (data != 0 && data != svm->tsc_ratio_msr) + return 1; + break; + } if (data & TSC_RATIO_RSVD) return 1; @@ -3291,6 +3314,55 @@ static void svm_set_irq(struct kvm_vcpu *vcpu) SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; } +void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode, + int trig_mode, int vector) +{ + /* + * vcpu->arch.apicv_active must be read after vcpu->mode. + * Pairs with smp_store_release in vcpu_enter_guest. + */ + bool in_guest_mode = (smp_load_acquire(&vcpu->mode) == IN_GUEST_MODE); + + if (!READ_ONCE(vcpu->arch.apicv_active)) { + /* Process the interrupt via inject_pending_event */ + kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_vcpu_kick(vcpu); + return; + } + + trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode, trig_mode, vector); + if (in_guest_mode) { + /* + * Signal the doorbell to tell hardware to inject the IRQ. If + * the vCPU exits the guest before the doorbell chimes, hardware + * will automatically process AVIC interrupts at the next VMRUN. + */ + avic_ring_doorbell(vcpu); + } else { + /* + * Wake the vCPU if it was blocking. KVM will then detect the + * pending IRQ when checking if the vCPU has a wake event. + */ + kvm_vcpu_wake_up(vcpu); + } +} + +static void svm_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, + int trig_mode, int vector) +{ + kvm_lapic_set_irr(vector, apic); + + /* + * Pairs with the smp_mb_*() after setting vcpu->guest_mode in + * vcpu_enter_guest() to ensure the write to the vIRR is ordered before + * the read of guest_mode. This guarantees that either VMRUN will see + * and process the new vIRR entry, or that svm_complete_interrupt_delivery + * will signal the doorbell if the CPU has already entered the guest. + */ + smp_mb__after_atomic(); + svm_complete_interrupt_delivery(apic->vcpu, delivery_mode, trig_mode, vector); +} + static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3338,11 +3410,13 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) if (svm->nested.nested_run_pending) return -EBUSY; + if (svm_nmi_blocked(vcpu)) + return 0; + /* An NMI must not be injected into L2 if it's supposed to VM-Exit. */ if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(svm)) return -EBUSY; - - return !svm_nmi_blocked(vcpu); + return 1; } static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) @@ -3394,9 +3468,13 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) { struct vcpu_svm *svm = to_svm(vcpu); + if (svm->nested.nested_run_pending) return -EBUSY; + if (svm_interrupt_blocked(vcpu)) + return 0; + /* * An IRQ must not be injected into L2 if it's supposed to VM-Exit, * e.g. if the IRQ arrived asynchronously after checking nested events. @@ -3404,7 +3482,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(svm)) return -EBUSY; - return !svm_interrupt_blocked(vcpu); + return 1; } static void svm_enable_irq_window(struct kvm_vcpu *vcpu) @@ -3615,7 +3693,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); unsigned long vmcb_pa = svm->current_vmcb->pa; - kvm_guest_enter_irqoff(); + guest_state_enter_irqoff(); if (sev_es_guest(vcpu->kvm)) { __svm_sev_es_vcpu_run(vmcb_pa); @@ -3635,7 +3713,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) vmload(__sme_page_pa(sd->save_area)); } - kvm_guest_exit_irqoff(); + guest_state_exit_irqoff(); } static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) @@ -4135,11 +4213,14 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) if (svm->nested.nested_run_pending) return -EBUSY; + if (svm_smi_blocked(vcpu)) + return 0; + /* An SMI must not be injected into L2 if it's supposed to VM-Exit. */ if (for_injection && is_guest_mode(vcpu) && nested_exit_on_smi(svm)) return -EBUSY; - return !svm_smi_blocked(vcpu); + return 1; } static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) @@ -4233,11 +4314,18 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) * Enter the nested guest now */ + vmcb_mark_all_dirty(svm->vmcb01.ptr); + vmcb12 = map.hva; nested_copy_vmcb_control_to_cache(svm, &vmcb12->control); nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false); + if (ret) + goto unmap_save; + + svm->nested.nested_run_pending = 1; + unmap_save: kvm_vcpu_unmap(vcpu, &map_save, true); unmap_map: @@ -4545,7 +4633,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .pmu_ops = &amd_pmu_ops, .nested_ops = &svm_nested_ops, - .deliver_posted_interrupt = svm_deliver_avic_intr, + .deliver_interrupt = svm_deliver_interrupt, .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt, .update_pi_irte = svm_update_pi_irte, .setup_mce = svm_setup_mce, @@ -4622,6 +4710,7 @@ static __init void svm_set_cpu_caps(void) /* CPUID 0x80000001 and 0x8000000A (SVM features) */ if (nested) { kvm_cpu_cap_set(X86_FEATURE_SVM); + kvm_cpu_cap_set(X86_FEATURE_VMCBCLEAN); if (nrips) kvm_cpu_cap_set(X86_FEATURE_NRIPS); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 73525353e424..fa98d6844728 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -489,6 +489,8 @@ void svm_set_gif(struct vcpu_svm *svm, bool value); int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code); void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, int read, int write); +void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode, + int trig_mode, int vec); /* nested.c */ @@ -556,17 +558,6 @@ extern struct kvm_x86_nested_ops svm_nested_ops; /* avic.c */ -#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) -#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 -#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) - -#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL) -#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) -#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) -#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) - -#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL - int avic_ga_log_notifier(u32 ga_tag); void avic_vm_destroy(struct kvm *kvm); int avic_vm_init(struct kvm *kvm); @@ -583,12 +574,12 @@ bool svm_check_apicv_inhibit_reasons(ulong bit); void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr); void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr); -int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec); bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu); int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); void avic_vcpu_blocking(struct kvm_vcpu *vcpu); void avic_vcpu_unblocking(struct kvm_vcpu *vcpu); +void avic_ring_doorbell(struct kvm_vcpu *vcpu); /* sev.c */ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index ba34e94049c7..dc822a1d403d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -246,8 +246,7 @@ static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx, src = &prev->host_state; dest = &vmx->loaded_vmcs->host_state; - vmx_set_vmcs_host_state(dest, src->cr3, src->fs_sel, src->gs_sel, - src->fs_base, src->gs_base); + vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base); dest->ldt_sel = src->ldt_sel; #ifdef CONFIG_X86_64 dest->ds_sel = src->ds_sel; @@ -3056,7 +3055,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long cr4; + unsigned long cr3, cr4; bool vm_fail; if (!nested_early_check) @@ -3079,6 +3078,12 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) */ vmcs_writel(GUEST_RFLAGS, 0); + cr3 = __get_current_cr3_fast(); + if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { + vmcs_writel(HOST_CR3, cr3); + vmx->loaded_vmcs->host_state.cr3 = cr3; + } + cr4 = cr4_read_shadow(); if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { vmcs_writel(HOST_CR4, cr4); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index aca3ae2a02f3..b730d799c26e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1080,14 +1080,9 @@ static void pt_guest_exit(struct vcpu_vmx *vmx) wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl); } -void vmx_set_vmcs_host_state(struct vmcs_host_state *host, unsigned long cr3, - u16 fs_sel, u16 gs_sel, - unsigned long fs_base, unsigned long gs_base) +void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, + unsigned long fs_base, unsigned long gs_base) { - if (unlikely(cr3 != host->cr3)) { - vmcs_writel(HOST_CR3, cr3); - host->cr3 = cr3; - } if (unlikely(fs_sel != host->fs_sel)) { if (!(fs_sel & 7)) vmcs_write16(HOST_FS_SELECTOR, fs_sel); @@ -1182,9 +1177,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) gs_base = segment_base(gs_sel); #endif - vmx_set_vmcs_host_state(host_state, __get_current_cr3_fast(), - fs_sel, gs_sel, fs_base, gs_base); - + vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base); vmx->guest_state_loaded = true; } @@ -4041,6 +4034,21 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) return 0; } +static void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, + int trig_mode, int vector) +{ + struct kvm_vcpu *vcpu = apic->vcpu; + + if (vmx_deliver_posted_interrupt(vcpu, vector)) { + kvm_lapic_set_irr(vector, apic); + kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_vcpu_kick(vcpu); + } else { + trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode, + trig_mode, vector); + } +} + /* * Set up the vmcs's constant host-state fields, i.e., host-state fields that * will not change in the lifetime of the guest. @@ -6754,7 +6762,7 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) { - kvm_guest_enter_irqoff(); + guest_state_enter_irqoff(); /* L1D Flush includes CPU buffer clear to mitigate MDS */ if (static_branch_unlikely(&vmx_l1d_should_flush)) @@ -6770,13 +6778,13 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vcpu->arch.cr2 = native_read_cr2(); - kvm_guest_exit_irqoff(); + guest_state_exit_irqoff(); } static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long cr4; + unsigned long cr3, cr4; /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!enable_vnmi && @@ -6819,6 +6827,19 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); vcpu->arch.regs_dirty = 0; + /* + * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately + * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time + * it switches back to the current->mm, which can occur in KVM context + * when switching to a temporary mm to patch kernel code, e.g. if KVM + * toggles a static key while handling a VM-Exit. + */ + cr3 = __get_current_cr3_fast(); + if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { + vmcs_writel(HOST_CR3, cr3); + vmx->loaded_vmcs->host_state.cr3 = cr3; + } + cr4 = cr4_read_shadow(); if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { vmcs_writel(HOST_CR4, cr4); @@ -7644,6 +7665,7 @@ static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) if (ret) return ret; + vmx->nested.nested_run_pending = 1; vmx->nested.smm.guest_mode = false; } return 0; @@ -7768,7 +7790,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .hwapic_isr_update = vmx_hwapic_isr_update, .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, .sync_pir_to_irr = vmx_sync_pir_to_irr, - .deliver_posted_interrupt = vmx_deliver_posted_interrupt, + .deliver_interrupt = vmx_deliver_interrupt, .dy_apicv_has_pending_interrupt = pi_has_pending_interrupt, .set_tss_addr = vmx_set_tss_addr, diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 7f2c82e7f38f..9c6bfcd84008 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -374,9 +374,8 @@ int allocate_vpid(void); void free_vpid(int vpid); void vmx_set_constant_host_state(struct vcpu_vmx *vmx); void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); -void vmx_set_vmcs_host_state(struct vmcs_host_state *host, unsigned long cr3, - u16 fs_sel, u16 gs_sel, - unsigned long fs_base, unsigned long gs_base); +void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, + unsigned long fs_base, unsigned long gs_base); int vmx_get_cpl(struct kvm_vcpu *vcpu); bool vmx_emulation_required(struct kvm_vcpu *vcpu); unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 74b53a16f38a..eb4029660bd9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -90,6 +90,8 @@ u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P; EXPORT_SYMBOL_GPL(kvm_mce_cap_supported); +#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e)) + #define emul_to_vcpu(ctxt) \ ((struct kvm_vcpu *)(ctxt)->vcpu) @@ -982,6 +984,18 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state); +static inline u64 kvm_guest_supported_xcr0(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.guest_fpu.fpstate->user_xfeatures; +} + +#ifdef CONFIG_X86_64 +static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu) +{ + return kvm_guest_supported_xcr0(vcpu) & XFEATURE_MASK_USER_DYNAMIC; +} +#endif + static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) { u64 xcr0 = xcr; @@ -1001,7 +1015,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) * saving. However, xcr0 bit 0 is always set, even if the * emulated CPU does not support XSAVE (see kvm_vcpu_reset()). */ - valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP; + valid_bits = kvm_guest_supported_xcr0(vcpu) | XFEATURE_MASK_FP; if (xcr0 & ~valid_bits) return 1; @@ -2349,10 +2363,12 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) return tsc; } +#ifdef CONFIG_X86_64 static inline int gtod_is_based_on_tsc(int mode) { return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK; } +#endif static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) { @@ -3704,8 +3720,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) !guest_cpuid_has(vcpu, X86_FEATURE_XFD)) return 1; - if (data & ~(XFEATURE_MASK_USER_DYNAMIC & - vcpu->arch.guest_supported_xcr0)) + if (data & ~kvm_guest_supported_xfd(vcpu)) return 1; fpu_update_guest_xfd(&vcpu->arch.guest_fpu, data); @@ -3715,8 +3730,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) !guest_cpuid_has(vcpu, X86_FEATURE_XFD)) return 1; - if (data & ~(XFEATURE_MASK_USER_DYNAMIC & - vcpu->arch.guest_supported_xcr0)) + if (data & ~kvm_guest_supported_xfd(vcpu)) return 1; vcpu->arch.guest_fpu.xfd_err = data; @@ -4231,6 +4245,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_EXIT_ON_EMULATION_FAILURE: case KVM_CAP_VCPU_ATTRIBUTES: case KVM_CAP_SYS_ATTRIBUTES: + case KVM_CAP_ENABLE_CAP: r = 1; break; case KVM_CAP_EXIT_HYPERCALL: @@ -4340,7 +4355,7 @@ static inline void __user *kvm_get_attr_addr(struct kvm_device_attr *attr) void __user *uaddr = (void __user*)(unsigned long)attr->addr; if ((u64)(unsigned long)uaddr != attr->addr) - return ERR_PTR(-EFAULT); + return ERR_PTR_USR(-EFAULT); return uaddr; } @@ -8940,6 +8955,13 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK) return -KVM_EOPNOTSUPP; + /* + * When tsc is in permanent catchup mode guests won't be able to use + * pvclock_read_retry loop to get consistent view of pvclock + */ + if (vcpu->arch.tsc_always_catchup) + return -KVM_EOPNOTSUPP; + if (!kvm_get_walltime_and_clockread(&ts, &cycle)) return -KVM_EOPNOTSUPP; @@ -9158,6 +9180,7 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) likely(!pic_in_kernel(vcpu->kvm)); } +/* Called within kvm->srcu read side. */ static void post_kvm_run_save(struct kvm_vcpu *vcpu) { struct kvm_run *kvm_run = vcpu->run; @@ -9166,16 +9189,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); - /* - * The call to kvm_ready_for_interrupt_injection() may end up in - * kvm_xen_has_interrupt() which may require the srcu lock to be - * held, to protect against changes in the vcpu_info address. - */ - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); kvm_run->ready_for_interrupt_injection = pic_in_kernel(vcpu->kvm) || kvm_vcpu_ready_for_interrupt_injection(vcpu); - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (is_smm(vcpu)) kvm_run->flags |= KVM_RUN_X86_SMM; @@ -9793,6 +9809,7 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu) EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit); /* + * Called within kvm->srcu read side. * Returns 1 to let vcpu_run() continue the guest execution loop without * exiting to the userspace. Otherwise, the value will be returned to the * userspace. @@ -9981,7 +9998,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * result in virtual interrupt delivery. */ local_irq_disable(); - vcpu->mode = IN_GUEST_MODE; + + /* Store vcpu->apicv_active before vcpu->mode. */ + smp_store_release(&vcpu->mode, IN_GUEST_MODE); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); @@ -10041,6 +10060,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(0, 7); } + guest_timing_enter_irqoff(); + for (;;) { /* * Assert that vCPU vs. VM APICv state is consistent. An APICv @@ -10125,7 +10146,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * of accounting via context tracking, but the loss of accuracy is * acceptable for all known use cases. */ - vtime_account_guest_exit(); + guest_timing_exit_irqoff(); if (lapic_in_kernel(vcpu)) { s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta; @@ -10167,6 +10188,7 @@ out: return r; } +/* Called within kvm->srcu read side. */ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) { bool hv_timer; @@ -10226,12 +10248,12 @@ static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) !vcpu->arch.apf.halted); } +/* Called within kvm->srcu read side. */ static int vcpu_run(struct kvm_vcpu *vcpu) { int r; struct kvm *kvm = vcpu->kvm; - vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); vcpu->arch.l1tf_flush_l1d = true; for (;;) { @@ -10259,14 +10281,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu) if (__xfer_to_guest_mode_work_pending()) { srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); r = xfer_to_guest_mode_handle_work(vcpu); + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); if (r) return r; - vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); } } - srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); - return r; } @@ -10372,6 +10392,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { struct kvm_run *kvm_run = vcpu->run; + struct kvm *kvm = vcpu->kvm; int r; vcpu_load(vcpu); @@ -10379,6 +10400,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_run->flags = 0; kvm_load_guest_fpu(vcpu); + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { if (kvm_run->immediate_exit) { r = -EINTR; @@ -10389,7 +10411,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) * use before KVM has ever run the vCPU. */ WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu)); + + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); kvm_vcpu_block(vcpu); + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); + if (kvm_apic_accept_events(vcpu) < 0) { r = 0; goto out; @@ -10449,8 +10475,9 @@ out: if (kvm_run->kvm_valid_regs) store_regs(vcpu); post_kvm_run_save(vcpu); - kvm_sigset_deactivate(vcpu); + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); + kvm_sigset_deactivate(vcpu); vcpu_put(vcpu); return r; } @@ -11639,8 +11666,6 @@ void kvm_arch_sync_events(struct kvm *kvm) kvm_free_pit(kvm); } -#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e)) - /** * __x86_set_memory_region: Setup KVM internal memory slot * diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 635b75f9e145..767ec7f99516 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -10,51 +10,6 @@ void kvm_spurious_fault(void); -static __always_inline void kvm_guest_enter_irqoff(void) -{ - /* - * VMENTER enables interrupts (host state), but the kernel state is - * interrupts disabled when this is invoked. Also tell RCU about - * it. This is the same logic as for exit_to_user_mode(). - * - * This ensures that e.g. latency analysis on the host observes - * guest mode as interrupt enabled. - * - * guest_enter_irqoff() informs context tracking about the - * transition to guest mode and if enabled adjusts RCU state - * accordingly. - */ - instrumentation_begin(); - trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); - instrumentation_end(); - - guest_enter_irqoff(); - lockdep_hardirqs_on(CALLER_ADDR0); -} - -static __always_inline void kvm_guest_exit_irqoff(void) -{ - /* - * VMEXIT disables interrupts (host state), but tracing and lockdep - * have them in state 'on' as recorded before entering guest mode. - * Same as enter_from_user_mode(). - * - * context_tracking_guest_exit() restores host context and reinstates - * RCU if enabled and required. - * - * This needs to be done immediately after VM-Exit, before any code - * that might contain tracepoints or call out to the greater world, - * e.g. before x86_spec_ctrl_restore_host(). - */ - lockdep_hardirqs_off(CALLER_ADDR0); - context_tracking_guest_exit(); - - instrumentation_begin(); - trace_hardirqs_off_finish(); - instrumentation_end(); -} - #define KVM_NESTED_VMENTER_CONSISTENCY_CHECK(consistency_check) \ ({ \ bool failed = (consistency_check); \ diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index bad57535fad0..74be1fda58e3 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -133,32 +133,57 @@ static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) { struct kvm_vcpu_xen *vx = &v->arch.xen; + struct gfn_to_hva_cache *ghc = &vx->runstate_cache; + struct kvm_memslots *slots = kvm_memslots(v->kvm); + bool atomic = (state == RUNSTATE_runnable); uint64_t state_entry_time; - unsigned int offset; + int __user *user_state; + uint64_t __user *user_times; kvm_xen_update_runstate(v, state); if (!vx->runstate_set) return; - BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c); + if (unlikely(slots->generation != ghc->generation || kvm_is_error_hva(ghc->hva)) && + kvm_gfn_to_hva_cache_init(v->kvm, ghc, ghc->gpa, ghc->len)) + return; + + /* We made sure it fits in a single page */ + BUG_ON(!ghc->memslot); + + if (atomic) + pagefault_disable(); - offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time); -#ifdef CONFIG_X86_64 /* - * The only difference is alignment of uint64_t in 32-bit. - * So the first field 'state' is accessed directly using - * offsetof() (where its offset happens to be zero), while the - * remaining fields which are all uint64_t, start at 'offset' - * which we tweak here by adding 4. + * The only difference between 32-bit and 64-bit versions of the + * runstate struct us the alignment of uint64_t in 32-bit, which + * means that the 64-bit version has an additional 4 bytes of + * padding after the first field 'state'. + * + * So we use 'int __user *user_state' to point to the state field, + * and 'uint64_t __user *user_times' for runstate_entry_time. So + * the actual array of time[] in each state starts at user_times[1]. */ + BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0); + BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0); + user_state = (int __user *)ghc->hva; + + BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c); + + user_times = (uint64_t __user *)(ghc->hva + + offsetof(struct compat_vcpu_runstate_info, + state_entry_time)); +#ifdef CONFIG_X86_64 BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4); BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) != offsetof(struct compat_vcpu_runstate_info, time) + 4); if (v->kvm->arch.xen.long_mode) - offset = offsetof(struct vcpu_runstate_info, state_entry_time); + user_times = (uint64_t __user *)(ghc->hva + + offsetof(struct vcpu_runstate_info, + state_entry_time)); #endif /* * First write the updated state_entry_time at the appropriate @@ -172,10 +197,8 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) != sizeof(state_entry_time)); - if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, - &state_entry_time, offset, - sizeof(state_entry_time))) - return; + if (__put_user(state_entry_time, user_times)) + goto out; smp_wmb(); /* @@ -189,11 +212,8 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) != sizeof(vx->current_runstate)); - if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, - &vx->current_runstate, - offsetof(struct vcpu_runstate_info, state), - sizeof(vx->current_runstate))) - return; + if (__put_user(vx->current_runstate, user_state)) + goto out; /* * Write the actual runstate times immediately after the @@ -208,24 +228,23 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != sizeof(vx->runstate_times)); - if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, - &vx->runstate_times[0], - offset + sizeof(u64), - sizeof(vx->runstate_times))) - return; - + if (__copy_to_user(user_times + 1, vx->runstate_times, sizeof(vx->runstate_times))) + goto out; smp_wmb(); /* * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's * runstate_entry_time field. */ - state_entry_time &= ~XEN_RUNSTATE_UPDATE; - if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, - &state_entry_time, offset, - sizeof(state_entry_time))) - return; + __put_user(state_entry_time, user_times); + smp_wmb(); + + out: + mark_page_dirty_in_slot(v->kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT); + + if (atomic) + pagefault_enable(); } int __kvm_xen_has_interrupt(struct kvm_vcpu *v) @@ -443,6 +462,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) break; } + /* It must fit within a single page */ + if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct vcpu_info) > PAGE_SIZE) { + r = -EINVAL; + break; + } + r = kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache, data->u.gpa, @@ -460,6 +485,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) break; } + /* It must fit within a single page */ + if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct pvclock_vcpu_time_info) > PAGE_SIZE) { + r = -EINVAL; + break; + } + r = kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache, data->u.gpa, @@ -481,6 +512,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) break; } + /* It must fit within a single page */ + if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct vcpu_runstate_info) > PAGE_SIZE) { + r = -EINVAL; + break; + } + r = kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.xen.runstate_cache, data->u.gpa, diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c index 520897061ee0..1e3de0769b81 100644 --- a/arch/x86/lib/error-inject.c +++ b/arch/x86/lib/error-inject.c @@ -3,6 +3,7 @@ #include <linux/linkage.h> #include <linux/error-injection.h> #include <linux/kprobes.h> +#include <linux/objtool.h> asmlinkage void just_return_func(void); @@ -11,6 +12,7 @@ asm( ".type just_return_func, @function\n" ".globl just_return_func\n" "just_return_func:\n" + ANNOTATE_NOENDBR ASM_RET ".size just_return_func, .-just_return_func\n" ); diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 59cf2343f3d9..d0d7b9bc6cad 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -27,8 +27,7 @@ * Output: * rax original destination */ -SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_WEAK(memcpy) +SYM_FUNC_START(__memcpy) ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memcpy_erms", X86_FEATURE_ERMS @@ -40,11 +39,12 @@ SYM_FUNC_START_WEAK(memcpy) movl %edx, %ecx rep movsb RET -SYM_FUNC_END(memcpy) -SYM_FUNC_END_ALIAS(__memcpy) -EXPORT_SYMBOL(memcpy) +SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) +SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) +EXPORT_SYMBOL(memcpy) + /* * memcpy_erms() - enhanced fast string memcpy. This is faster and * simpler than memcpy. Use memcpy_erms when possible. diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 50ea390df712..d83cba364e31 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -24,7 +24,6 @@ * Output: * rax: dest */ -SYM_FUNC_START_WEAK(memmove) SYM_FUNC_START(__memmove) mov %rdi, %rax @@ -207,6 +206,7 @@ SYM_FUNC_START(__memmove) 13: RET SYM_FUNC_END(__memmove) -SYM_FUNC_END_ALIAS(memmove) EXPORT_SYMBOL(__memmove) + +SYM_FUNC_ALIAS_WEAK(memmove, __memmove) EXPORT_SYMBOL(memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index d624f2bc42f1..fc9ffd3ff3b2 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -17,7 +17,6 @@ * * rax original destination */ -SYM_FUNC_START_WEAK(memset) SYM_FUNC_START(__memset) /* * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended @@ -42,10 +41,11 @@ SYM_FUNC_START(__memset) movq %r9,%rax RET SYM_FUNC_END(__memset) -SYM_FUNC_END_ALIAS(memset) -EXPORT_SYMBOL(memset) EXPORT_SYMBOL(__memset) +SYM_FUNC_ALIAS_WEAK(memset, __memset) +EXPORT_SYMBOL(memset) + /* * ISO C memset - set a memory block to a byte value. This function uses * enhanced rep stosb to override the fast string function. diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 89b3fb244e15..5f87bab4fb8d 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -34,7 +34,7 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE .endm @@ -55,6 +55,7 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) .align RETPOLINE_THUNK_SIZE SYM_CODE_START(__x86_indirect_thunk_array) + ANNOTATE_NOENDBR // apply_retpolines #define GEN(reg) THUNK reg #include <asm/GEN-for-each-reg.h> diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 026031b3b782..17a492c27306 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -615,6 +615,7 @@ static bool memremap_is_efi_data(resource_size_t phys_addr, static bool memremap_is_setup_data(resource_size_t phys_addr, unsigned long size) { + struct setup_indirect *indirect; struct setup_data *data; u64 paddr, paddr_next; @@ -627,6 +628,10 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, data = memremap(paddr, sizeof(*data), MEMREMAP_WB | MEMREMAP_DEC); + if (!data) { + pr_warn("failed to memremap setup_data entry\n"); + return false; + } paddr_next = data->next; len = data->len; @@ -636,10 +641,21 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, return true; } - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { - paddr = ((struct setup_indirect *)data->data)->addr; - len = ((struct setup_indirect *)data->data)->len; + if (data->type == SETUP_INDIRECT) { + memunmap(data); + data = memremap(paddr, sizeof(*data) + len, + MEMREMAP_WB | MEMREMAP_DEC); + if (!data) { + pr_warn("failed to memremap indirect setup_data\n"); + return false; + } + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + paddr = indirect->addr; + len = indirect->len; + } } memunmap(data); @@ -660,22 +676,51 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, static bool __init early_memremap_is_setup_data(resource_size_t phys_addr, unsigned long size) { + struct setup_indirect *indirect; struct setup_data *data; u64 paddr, paddr_next; paddr = boot_params.hdr.setup_data; while (paddr) { - unsigned int len; + unsigned int len, size; if (phys_addr == paddr) return true; data = early_memremap_decrypted(paddr, sizeof(*data)); + if (!data) { + pr_warn("failed to early memremap setup_data entry\n"); + return false; + } + + size = sizeof(*data); paddr_next = data->next; len = data->len; - early_memunmap(data, sizeof(*data)); + if ((phys_addr > paddr) && (phys_addr < (paddr + len))) { + early_memunmap(data, sizeof(*data)); + return true; + } + + if (data->type == SETUP_INDIRECT) { + size += len; + early_memunmap(data, sizeof(*data)); + data = early_memremap_decrypted(paddr, size); + if (!data) { + pr_warn("failed to early memremap indirect setup_data\n"); + return false; + } + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + paddr = indirect->addr; + len = indirect->len; + } + } + + early_memunmap(data, size); if ((phys_addr > paddr) && (phys_addr < (paddr + len))) return true; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 2b1e266ff95c..b592ea0fc150 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -46,6 +46,12 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) #define EMIT4_off32(b1, b2, b3, b4, off) \ do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) +#ifdef CONFIG_X86_KERNEL_IBT +#define EMIT_ENDBR() EMIT(gen_endbr(), 4) +#else +#define EMIT_ENDBR() +#endif + static bool is_imm8(int value) { return value <= 127 && value >= -128; @@ -241,7 +247,7 @@ struct jit_context { /* Number of bytes emit_patch() needs to generate instructions */ #define X86_PATCH_SIZE 5 /* Number of bytes that will be skipped on tailcall */ -#define X86_TAIL_CALL_OFFSET 11 +#define X86_TAIL_CALL_OFFSET (11 + ENDBR_INSN_SIZE) static void push_callee_regs(u8 **pprog, bool *callee_regs_used) { @@ -286,6 +292,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, /* BPF trampoline can be made to work without these nops, * but let's waste 5 bytes for now and optimize later */ + EMIT_ENDBR(); memcpy(prog, x86_nops[5], X86_PATCH_SIZE); prog += X86_PATCH_SIZE; if (!ebpf_from_cbpf) { @@ -296,6 +303,10 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, } EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ + + /* X86_TAIL_CALL_OFFSET is here */ + EMIT_ENDBR(); + /* sub rsp, rounded_stack_depth */ if (stack_depth) EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8)); @@ -384,6 +395,13 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, /* BPF poking in modules is not supported */ return -EINVAL; + /* + * See emit_prologue(), for IBT builds the trampoline hook is preceded + * with an ENDBR instruction. + */ + if (is_endbr(*(u32 *)ip)) + ip += ENDBR_INSN_SIZE; + return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true); } @@ -394,7 +412,7 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) u8 *prog = *pprog; #ifdef CONFIG_RETPOLINE - if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) { + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { EMIT_LFENCE(); EMIT2(0xFF, 0xE0 + reg); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { @@ -2024,14 +2042,18 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ip_off = stack_size; - if (flags & BPF_TRAMP_F_SKIP_FRAME) + if (flags & BPF_TRAMP_F_SKIP_FRAME) { /* skip patched call instruction and point orig_call to actual * body of the kernel function. */ + if (is_endbr(*(u32 *)orig_call)) + orig_call += ENDBR_INSN_SIZE; orig_call += X86_PATCH_SIZE; + } prog = image; + EMIT_ENDBR(); EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */ diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index 84b09c230cbd..a50245157685 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y KASAN_SANITIZE := n GCOV_PROFILE := n diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 25799d768624..854dd81804b7 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -20,12 +20,14 @@ */ #include <linux/linkage.h> +#include <linux/objtool.h> #include <asm/page_types.h> #include <asm/segment.h> .text .code64 -SYM_CODE_START(__efi64_thunk) +SYM_FUNC_START(__efi64_thunk) +STACK_FRAME_NON_STANDARD __efi64_thunk push %rbp push %rbx @@ -79,7 +81,7 @@ SYM_CODE_START(__efi64_thunk) 2: pushl $__KERNEL_CS pushl %ebp lret -SYM_CODE_END(__efi64_thunk) +SYM_FUNC_END(__efi64_thunk) .bss .balign 8 diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 42300941ec29..517a9d8d8f94 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -9,6 +9,7 @@ #include <xen/events.h> #include <xen/interface/memory.h> +#include <asm/apic.h> #include <asm/cpu.h> #include <asm/smp.h> #include <asm/io_apic.h> @@ -184,8 +185,7 @@ static int xen_cpu_dead_hvm(unsigned int cpu) if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock)) xen_teardown_timer(cpu); - - return 0; + return 0; } static bool no_vector_callback __initdata; @@ -242,15 +242,14 @@ static __init int xen_parse_no_vector_callback(char *arg) } early_param("xen_no_vector_callback", xen_parse_no_vector_callback); -bool __init xen_hvm_need_lapic(void) +static __init bool xen_x2apic_available(void) { - if (xen_pv_domain()) - return false; - if (!xen_hvm_domain()) - return false; - if (xen_feature(XENFEAT_hvm_pirqs) && xen_have_vector_callback) - return false; - return true; + return x2apic_supported(); +} + +static bool __init msi_ext_dest_id(void) +{ + return cpuid_eax(xen_cpuid_base() + 4) & XEN_HVM_CPUID_EXT_DEST_ID; } static __init void xen_hvm_guest_late_init(void) @@ -312,9 +311,10 @@ struct hypervisor_x86 x86_hyper_xen_hvm __initdata = { .detect = xen_platform_hvm, .type = X86_HYPER_XEN_HVM, .init.init_platform = xen_hvm_guest_init, - .init.x2apic_available = xen_x2apic_para_available, + .init.x2apic_available = xen_x2apic_available, .init.init_mem_mapping = xen_hvm_init_mem_mapping, .init.guest_late_init = xen_hvm_guest_late_init, + .init.msi_ext_dest_id = msi_ext_dest_id, .runtime.pin_vcpu = xen_pin_vcpu, .ignore_nopv = true, }; diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5004feb16783..5038edb79ad5 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -624,6 +624,9 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_coprocessor_error, false ), TRAP_ENTRY(exc_alignment_check, false ), TRAP_ENTRY(exc_simd_coprocessor_error, false ), +#ifdef CONFIG_X86_KERNEL_IBT + TRAP_ENTRY(exc_control_protection, false ), +#endif }; static bool __ref get_trap_addr(void **addr, unsigned int ist) @@ -1177,6 +1180,8 @@ static void __init xen_domu_set_legacy_features(void) x86_platform.legacy.rtc = 0; } +extern void early_xen_iret_patch(void); + /* First C function to be called on Xen boot */ asmlinkage __visible void __init xen_start_kernel(void) { @@ -1187,6 +1192,10 @@ asmlinkage __visible void __init xen_start_kernel(void) if (!xen_start_info) return; + __text_gen_insn(&early_xen_iret_patch, + JMP32_INSN_OPCODE, &early_xen_iret_patch, &xen_iret, + JMP32_INSN_SIZE); + xen_domain_type = XEN_PV_DOMAIN; xen_start_flags = xen_start_info->flags; @@ -1195,7 +1204,6 @@ asmlinkage __visible void __init xen_start_kernel(void) /* Install Xen paravirt ops */ pv_info = xen_info; pv_ops.cpu = xen_cpu_ops.cpu; - paravirt_iret = xen_iret; xen_init_irq_ops(); /* @@ -1341,10 +1349,6 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_acpi_sleep_register(); - /* Avoid searching for BIOS MP tables */ - x86_init.mpparse.find_smp_config = x86_init_noop; - x86_init.mpparse.get_smp_config = x86_init_uint_noop; - xen_boot_params_init_edd(); #ifdef CONFIG_ACPI diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 6a8f3b53ab83..4a6019238ee7 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -148,28 +148,12 @@ int xen_smp_intr_init_pv(unsigned int cpu) return rc; } -static void __init xen_fill_possible_map(void) -{ - int i, rc; - - if (xen_initial_domain()) - return; - - for (i = 0; i < nr_cpu_ids; i++) { - rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); - if (rc >= 0) { - num_processors++; - set_cpu_possible(i, true); - } - } -} - -static void __init xen_filter_cpu_maps(void) +static void __init _get_smp_config(unsigned int early) { int i, rc; unsigned int subtract = 0; - if (!xen_initial_domain()) + if (early) return; num_processors = 0; @@ -210,7 +194,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void) * sure the old memory can be recycled. */ make_lowmem_page_readwrite(xen_initial_gdt); - xen_filter_cpu_maps(); xen_setup_vcpu_info_placement(); /* @@ -476,5 +459,8 @@ static const struct smp_ops xen_smp_ops __initconst = { void __init xen_smp_init(void) { smp_ops = xen_smp_ops; - xen_fill_possible_map(); + + /* Avoid searching for BIOS MP tables */ + x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.get_smp_config = _get_smp_config; } diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c index 31b1e3477cb6..14ea32e734d5 100644 --- a/arch/x86/xen/vga.c +++ b/arch/x86/xen/vga.c @@ -57,6 +57,14 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size; screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos; + if (size >= offsetof(struct dom0_vga_console_info, + u.vesa_lfb.ext_lfb_base) + + sizeof(info->u.vesa_lfb.ext_lfb_base) + && info->u.vesa_lfb.ext_lfb_base) { + screen_info->ext_lfb_base = info->u.vesa_lfb.ext_lfb_base; + screen_info->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; + } + if (info->video_type == XEN_VGATYPE_EFI_LFB) { screen_info->orig_video_isVGA = VIDEO_TYPE_EFI; break; @@ -66,14 +74,6 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) u.vesa_lfb.mode_attrs) + sizeof(info->u.vesa_lfb.mode_attrs)) screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs; - - if (size >= offsetof(struct dom0_vga_console_info, - u.vesa_lfb.ext_lfb_base) - + sizeof(info->u.vesa_lfb.ext_lfb_base) - && info->u.vesa_lfb.ext_lfb_base) { - screen_info->ext_lfb_base = info->u.vesa_lfb.ext_lfb_base; - screen_info->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; - } break; } } diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index e730e6200e64..caa9bc2fa100 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -122,6 +122,7 @@ SYM_FUNC_END(xen_read_cr2_direct); .macro xen_pv_trap name SYM_CODE_START(xen_\name) UNWIND_HINT_EMPTY + ENDBR pop %rcx pop %r11 jmp \name @@ -147,6 +148,9 @@ xen_pv_trap asm_exc_page_fault xen_pv_trap asm_exc_spurious_interrupt_bug xen_pv_trap asm_exc_coprocessor_error xen_pv_trap asm_exc_alignment_check +#ifdef CONFIG_X86_KERNEL_IBT +xen_pv_trap asm_exc_control_protection +#endif #ifdef CONFIG_X86_MCE xen_pv_trap asm_xenpv_exc_machine_check #endif /* CONFIG_X86_MCE */ @@ -162,6 +166,7 @@ SYM_CODE_START(xen_early_idt_handler_array) i = 0 .rept NUM_EXCEPTION_VECTORS UNWIND_HINT_EMPTY + ENDBR pop %rcx pop %r11 jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE @@ -189,6 +194,7 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 */ SYM_CODE_START(xen_iret) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR pushq $0 jmp hypercall_iret SYM_CODE_END(xen_iret) @@ -230,6 +236,7 @@ SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode) /* Normal 64-bit system call target */ SYM_CODE_START(xen_syscall_target) UNWIND_HINT_EMPTY + ENDBR popq %rcx popq %r11 @@ -249,6 +256,7 @@ SYM_CODE_END(xen_syscall_target) /* 32-bit compat syscall target */ SYM_CODE_START(xen_syscall32_target) UNWIND_HINT_EMPTY + ENDBR popq %rcx popq %r11 @@ -266,6 +274,7 @@ SYM_CODE_END(xen_syscall32_target) /* 32-bit compat sysenter target */ SYM_CODE_START(xen_sysenter_target) UNWIND_HINT_EMPTY + ENDBR /* * NB: Xen is polite and clears TF from EFLAGS for us. This means * that we don't need to guard against single step exceptions here. @@ -289,6 +298,7 @@ SYM_CODE_END(xen_sysenter_target) SYM_CODE_START(xen_syscall32_target) SYM_CODE_START(xen_sysenter_target) UNWIND_HINT_EMPTY + ENDBR lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax pushq $0 diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 11d286529fe5..ac17196e2518 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -25,8 +25,12 @@ SYM_CODE_START(hypercall_page) .rept (PAGE_SIZE / 32) UNWIND_HINT_FUNC - .skip 31, 0x90 - RET + ANNOTATE_NOENDBR + ret + /* + * Xen will write the hypercall page, and sort out ENDBR. + */ + .skip 31, 0xcc .endr #define HYPERCALL(n) \ @@ -74,6 +78,7 @@ SYM_CODE_END(startup_xen) .pushsection .text SYM_CODE_START(asm_cpu_bringup_and_idle) UNWIND_HINT_EMPTY + ENDBR call cpu_bringup_and_idle SYM_CODE_END(asm_cpu_bringup_and_idle) |