diff options
Diffstat (limited to 'arch')
88 files changed, 969 insertions, 340 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 55fc331af337..393d71124f5d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -256,6 +256,7 @@ config ARM64 select HOTPLUG_SMT if HOTPLUG_CPU select IRQ_DOMAIN select IRQ_FORCED_THREADING + select JUMP_LABEL select KASAN_VMALLOC if KASAN select LOCK_MM_AND_FIND_VMA select MODULES_USE_ELF_RELA diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi index 0baf256b4400..983b2f0e8797 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi @@ -687,11 +687,12 @@ }; wdog0: watchdog@2ad0000 { - compatible = "fsl,imx21-wdt"; + compatible = "fsl,ls1046a-wdt", "fsl,imx21-wdt"; reg = <0x0 0x2ad0000 0x0 0x10000>; interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(2)>; + big-endian; }; edma0: dma-controller@2c00000 { diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi index d29710772569..1594ce9182a5 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi @@ -464,6 +464,7 @@ }; reg_nvcc_sd: LDO5 { + regulator-always-on; regulator-max-microvolt = <3300000>; regulator-min-microvolt = <1800000>; regulator-name = "On-module +V3.3_1.8_SD (LDO5)"; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi index 2f740d74707b..4bf818873fe3 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi @@ -70,7 +70,7 @@ tpm@1 { compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x1>; - spi-max-frequency = <36000000>; + spi-max-frequency = <25000000>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi index 5ab3ffe9931d..cf747ec6fa16 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi @@ -110,7 +110,7 @@ tpm@1 { compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x1>; - spi-max-frequency = <36000000>; + spi-max-frequency = <25000000>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi index e2b5e7ac3e46..5eb114d2360a 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi @@ -122,7 +122,7 @@ tpm@1 { compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x1>; - spi-max-frequency = <36000000>; + spi-max-frequency = <25000000>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts index 6daa2313f879..568d24265ddf 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts @@ -201,7 +201,7 @@ tpm@0 { compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x0>; - spi-max-frequency = <36000000>; + spi-max-frequency = <25000000>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts b/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts index 6c47f4b47356..9f4d0899a94d 100644 --- a/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts @@ -574,17 +574,17 @@ &scmi_iomuxc { pinctrl_emdio: emdiogrp { fsl,pins = < - IMX95_PAD_ENET2_MDC__NETCMIX_TOP_NETC_MDC 0x57e - IMX95_PAD_ENET2_MDIO__NETCMIX_TOP_NETC_MDIO 0x97e + IMX95_PAD_ENET2_MDC__NETCMIX_TOP_NETC_MDC 0x50e + IMX95_PAD_ENET2_MDIO__NETCMIX_TOP_NETC_MDIO 0x90e >; }; pinctrl_enetc0: enetc0grp { fsl,pins = < - IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x57e - IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x57e - IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x57e - IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x57e + IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x50e + IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x50e + IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x50e + IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x50e IMX95_PAD_ENET1_TX_CTL__NETCMIX_TOP_ETH0_RGMII_TX_CTL 0x57e IMX95_PAD_ENET1_TXC__NETCMIX_TOP_ETH0_RGMII_TX_CLK 0x58e IMX95_PAD_ENET1_RX_CTL__NETCMIX_TOP_ETH0_RGMII_RX_CTL 0x57e @@ -598,10 +598,10 @@ pinctrl_enetc1: enetc1grp { fsl,pins = < - IMX95_PAD_ENET2_TD3__NETCMIX_TOP_ETH1_RGMII_TD3 0x57e - IMX95_PAD_ENET2_TD2__NETCMIX_TOP_ETH1_RGMII_TD2 0x57e - IMX95_PAD_ENET2_TD1__NETCMIX_TOP_ETH1_RGMII_TD1 0x57e - IMX95_PAD_ENET2_TD0__NETCMIX_TOP_ETH1_RGMII_TD0 0x57e + IMX95_PAD_ENET2_TD3__NETCMIX_TOP_ETH1_RGMII_TD3 0x50e + IMX95_PAD_ENET2_TD2__NETCMIX_TOP_ETH1_RGMII_TD2 0x50e + IMX95_PAD_ENET2_TD1__NETCMIX_TOP_ETH1_RGMII_TD1 0x50e + IMX95_PAD_ENET2_TD0__NETCMIX_TOP_ETH1_RGMII_TD0 0x50e IMX95_PAD_ENET2_TX_CTL__NETCMIX_TOP_ETH1_RGMII_TX_CTL 0x57e IMX95_PAD_ENET2_TXC__NETCMIX_TOP_ETH1_RGMII_TX_CLK 0x58e IMX95_PAD_ENET2_RX_CTL__NETCMIX_TOP_ETH1_RGMII_RX_CTL 0x57e diff --git a/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts b/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts index 6886ea766655..d7d845231312 100644 --- a/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts @@ -566,17 +566,17 @@ &scmi_iomuxc { pinctrl_emdio: emdiogrp{ fsl,pins = < - IMX95_PAD_ENET1_MDC__NETCMIX_TOP_NETC_MDC 0x57e - IMX95_PAD_ENET1_MDIO__NETCMIX_TOP_NETC_MDIO 0x97e + IMX95_PAD_ENET1_MDC__NETCMIX_TOP_NETC_MDC 0x50e + IMX95_PAD_ENET1_MDIO__NETCMIX_TOP_NETC_MDIO 0x90e >; }; pinctrl_enetc0: enetc0grp { fsl,pins = < - IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x57e - IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x57e - IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x57e - IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x57e + IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x50e + IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x50e + IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x50e + IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x50e IMX95_PAD_ENET1_TX_CTL__NETCMIX_TOP_ETH0_RGMII_TX_CTL 0x57e IMX95_PAD_ENET1_TXC__NETCMIX_TOP_ETH0_RGMII_TX_CLK 0x58e IMX95_PAD_ENET1_RX_CTL__NETCMIX_TOP_ETH0_RGMII_RX_CTL 0x57e diff --git a/arch/arm64/boot/dts/freescale/imx95.dtsi b/arch/arm64/boot/dts/freescale/imx95.dtsi index 632631a29112..5aecdd9b62ff 100644 --- a/arch/arm64/boot/dts/freescale/imx95.dtsi +++ b/arch/arm64/boot/dts/freescale/imx95.dtsi @@ -1708,7 +1708,7 @@ <0x9 0 1 0>; reg-names = "dbi","atu", "dbi2", "app", "dma", "addr_space"; num-lanes = <1>; - interrupts = <GIC_SPI 317 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 311 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "dma"; clocks = <&scmi_clk IMX95_CLK_HSIO>, <&scmi_clk IMX95_CLK_HSIOPLL>, diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index ae7a275fd223..cefecb7a23cf 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -1090,6 +1090,8 @@ }; &pmk8280_rtc { + qcom,uefi-rtc-info; + status = "okay"; }; diff --git a/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi b/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi index c02fd4d15c96..e3888bc143a0 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi @@ -224,6 +224,7 @@ reg-names = "rtc", "alarm"; interrupts = <0x0 0x62 0x1 IRQ_TYPE_EDGE_RISING>; qcom,no-alarm; /* alarm owned by ADSP */ + qcom,uefi-rtc-info; }; pmk8550_sdam_2: nvram@7100 { diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi index ab232e5c7ad6..4203b335a263 100644 --- a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi @@ -379,6 +379,18 @@ <0 RK_PA7 RK_FUNC_GPIO &pcfg_pull_up>; }; }; + + spi1 { + spi1_csn0_gpio_pin: spi1-csn0-gpio-pin { + rockchip,pins = + <3 RK_PB1 RK_FUNC_GPIO &pcfg_pull_up_4ma>; + }; + + spi1_csn1_gpio_pin: spi1-csn1-gpio-pin { + rockchip,pins = + <3 RK_PB2 RK_FUNC_GPIO &pcfg_pull_up_4ma>; + }; + }; }; &pmu_io_domains { @@ -396,6 +408,17 @@ vqmmc-supply = <&vccio_sd>; }; +&spi1 { + /* + * Hardware CS has a very slow rise time of about 6us, + * causing transmission errors. + * With cs-gpios we have a rise time of about 20ns. + */ + cs-gpios = <&gpio3 RK_PB1 GPIO_ACTIVE_LOW>, <&gpio3 RK_PB2 GPIO_ACTIVE_LOW>; + pinctrl-names = "default"; + pinctrl-0 = <&spi1_clk &spi1_csn0_gpio_pin &spi1_csn1_gpio_pin &spi1_miso &spi1_mosi>; +}; + &tsadc { status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts index 3c127c5c2607..a9021c524afb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts @@ -30,6 +30,7 @@ fan: gpio_fan { compatible = "gpio-fan"; + fan-supply = <&vcc12v_dcin>; gpios = <&gpio0 RK_PD5 GPIO_ACTIVE_HIGH>; gpio-fan,speed-map = < 0 0>, diff --git a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts index b09e789c75c4..801b40fea4e8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts +++ b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts @@ -211,10 +211,38 @@ status = "okay"; }; +&cpu_b0 { + cpu-supply = <&vdd_cpu_big_s0>; +}; + +&cpu_b1 { + cpu-supply = <&vdd_cpu_big_s0>; +}; + +&cpu_b2 { + cpu-supply = <&vdd_cpu_big_s0>; +}; + +&cpu_b3 { + cpu-supply = <&vdd_cpu_big_s0>; +}; + &cpu_l0 { cpu-supply = <&vdd_cpu_lit_s0>; }; +&cpu_l1 { + cpu-supply = <&vdd_cpu_lit_s0>; +}; + +&cpu_l2 { + cpu-supply = <&vdd_cpu_lit_s0>; +}; + +&cpu_l3 { + cpu-supply = <&vdd_cpu_lit_s0>; +}; + &gmac0 { phy-mode = "rgmii-id"; clock_in_out = "output"; diff --git a/arch/arm64/boot/dts/rockchip/rk3576.dtsi b/arch/arm64/boot/dts/rockchip/rk3576.dtsi index 1086482f0479..64812e3bcb61 100644 --- a/arch/arm64/boot/dts/rockchip/rk3576.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3576.dtsi @@ -615,7 +615,7 @@ <0 0 0 2 &pcie1_intc 1>, <0 0 0 3 &pcie1_intc 2>, <0 0 0 4 &pcie1_intc 3>; - linux,pci-domain = <0>; + linux,pci-domain = <1>; max-link-speed = <2>; num-ib-windows = <8>; num-viewport = <8>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi index 7f874c77410c..6584d73660f6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi @@ -578,14 +578,14 @@ hdmim0_tx0_scl: hdmim0-tx0-scl { rockchip,pins = /* hdmim0_tx0_scl */ - <4 RK_PB7 5 &pcfg_pull_none>; + <4 RK_PB7 5 &pcfg_pull_none_drv_level_5_smt>; }; /omit-if-no-ref/ hdmim0_tx0_sda: hdmim0-tx0-sda { rockchip,pins = /* hdmim0_tx0_sda */ - <4 RK_PC0 5 &pcfg_pull_none>; + <4 RK_PC0 5 &pcfg_pull_none_drv_level_1_smt>; }; /omit-if-no-ref/ @@ -640,14 +640,14 @@ hdmim1_tx0_scl: hdmim1-tx0-scl { rockchip,pins = /* hdmim1_tx0_scl */ - <0 RK_PD5 11 &pcfg_pull_none>; + <0 RK_PD5 11 &pcfg_pull_none_drv_level_5_smt>; }; /omit-if-no-ref/ hdmim1_tx0_sda: hdmim1-tx0-sda { rockchip,pins = /* hdmim1_tx0_sda */ - <0 RK_PD4 11 &pcfg_pull_none>; + <0 RK_PD4 11 &pcfg_pull_none_drv_level_1_smt>; }; /omit-if-no-ref/ @@ -668,14 +668,14 @@ hdmim1_tx1_scl: hdmim1-tx1-scl { rockchip,pins = /* hdmim1_tx1_scl */ - <3 RK_PC6 5 &pcfg_pull_none>; + <3 RK_PC6 5 &pcfg_pull_none_drv_level_5_smt>; }; /omit-if-no-ref/ hdmim1_tx1_sda: hdmim1-tx1-sda { rockchip,pins = /* hdmim1_tx1_sda */ - <3 RK_PC5 5 &pcfg_pull_none>; + <3 RK_PC5 5 &pcfg_pull_none_drv_level_1_smt>; }; /omit-if-no-ref/ hdmim2_rx_cec: hdmim2-rx-cec { @@ -709,14 +709,14 @@ hdmim2_tx0_scl: hdmim2-tx0-scl { rockchip,pins = /* hdmim2_tx0_scl */ - <3 RK_PC7 5 &pcfg_pull_none>; + <3 RK_PC7 5 &pcfg_pull_none_drv_level_5_smt>; }; /omit-if-no-ref/ hdmim2_tx0_sda: hdmim2-tx0-sda { rockchip,pins = /* hdmim2_tx0_sda */ - <3 RK_PD0 5 &pcfg_pull_none>; + <3 RK_PD0 5 &pcfg_pull_none_drv_level_1_smt>; }; /omit-if-no-ref/ @@ -730,14 +730,14 @@ hdmim2_tx1_scl: hdmim2-tx1-scl { rockchip,pins = /* hdmim2_tx1_scl */ - <1 RK_PA4 5 &pcfg_pull_none>; + <1 RK_PA4 5 &pcfg_pull_none_drv_level_5_smt>; }; /omit-if-no-ref/ hdmim2_tx1_sda: hdmim2-tx1-sda { rockchip,pins = /* hdmim2_tx1_sda */ - <1 RK_PA3 5 &pcfg_pull_none>; + <1 RK_PA3 5 &pcfg_pull_none_drv_level_1_smt>; }; /omit-if-no-ref/ diff --git a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi index cc37f082adea..b07543315f87 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi @@ -321,6 +321,7 @@ bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; + cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>; disable-wp; max-frequency = <150000000>; no-sdio; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi index 244c66faa161..fb48ddc04bcb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi @@ -160,14 +160,15 @@ hdmim0_tx1_scl: hdmim0-tx1-scl { rockchip,pins = /* hdmim0_tx1_scl */ - <2 RK_PB5 4 &pcfg_pull_none>; + <2 RK_PB5 4 &pcfg_pull_none_drv_level_3_smt>; }; /omit-if-no-ref/ hdmim0_tx1_sda: hdmim0-tx1-sda { rockchip,pins = /* hdmim0_tx1_sda */ - <2 RK_PB4 4 &pcfg_pull_none>; + <2 RK_PB4 4 &pcfg_pull_none_drv_level_1_smt>; + }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts index 8b717c4017a4..b2947b36fada 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts @@ -474,6 +474,7 @@ bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; + cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>; disable-wp; max-frequency = <150000000>; no-sdio; diff --git a/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi b/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi index 5c645437b507..b0475b7c655a 100644 --- a/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi +++ b/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi @@ -333,6 +333,41 @@ }; /omit-if-no-ref/ + pcfg_pull_none_drv_level_1_smt: pcfg-pull-none-drv-level-1-smt { + bias-disable; + drive-strength = <1>; + input-schmitt-enable; + }; + + /omit-if-no-ref/ + pcfg_pull_none_drv_level_2_smt: pcfg-pull-none-drv-level-2-smt { + bias-disable; + drive-strength = <2>; + input-schmitt-enable; + }; + + /omit-if-no-ref/ + pcfg_pull_none_drv_level_3_smt: pcfg-pull-none-drv-level-3-smt { + bias-disable; + drive-strength = <3>; + input-schmitt-enable; + }; + + /omit-if-no-ref/ + pcfg_pull_none_drv_level_4_smt: pcfg-pull-none-drv-level-4-smt { + bias-disable; + drive-strength = <4>; + input-schmitt-enable; + }; + + /omit-if-no-ref/ + pcfg_pull_none_drv_level_5_smt: pcfg-pull-none-drv-level-5-smt { + bias-disable; + drive-strength = <5>; + input-schmitt-enable; + }; + + /omit-if-no-ref/ pcfg_output_high: pcfg-output-high { output-high; }; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 7e04a2905ce4..eb5c17d4c7ec 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1444,6 +1444,7 @@ CONFIG_PLATFORM_MHU=y CONFIG_BCM2835_MBOX=y CONFIG_QCOM_APCS_IPC=y CONFIG_MTK_ADSP_MBOX=m +CONFIG_QCOM_CPUCP_MBOX=m CONFIG_QCOM_IPCC=y CONFIG_ROCKCHIP_IOMMU=y CONFIG_TEGRA_IOMMU_SMMU=y diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index ba5df0df02a4..9f38340d24c2 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -287,17 +287,6 @@ .Lskip_fgt2_\@: .endm -.macro __init_el2_gcs - mrs_s x1, SYS_ID_AA64PFR1_EL1 - ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4 - cbz x1, .Lskip_gcs_\@ - - /* Ensure GCS is not enabled when we start trying to do BLs */ - msr_s SYS_GCSCR_EL1, xzr - msr_s SYS_GCSCRE0_EL1, xzr -.Lskip_gcs_\@: -.endm - /** * Initialize EL2 registers to sane values. This should be called early on all * cores that were booted in EL2. Note that everything gets initialised as @@ -319,7 +308,6 @@ __init_el2_cptr __init_el2_fgt __init_el2_fgt2 - __init_el2_gcs .endm #ifndef __KVM_NVHE_HYPERVISOR__ @@ -371,6 +359,13 @@ msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2 .Lskip_mpam_\@: + check_override id_aa64pfr1, ID_AA64PFR1_EL1_GCS_SHIFT, .Linit_gcs_\@, .Lskip_gcs_\@, x1, x2 + +.Linit_gcs_\@: + msr_s SYS_GCSCR_EL1, xzr + msr_s SYS_GCSCRE0_EL1, xzr + +.Lskip_gcs_\@: check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2 .Linit_sve_\@: /* SVE register access */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d27079968341..3e41a880b062 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1480,7 +1480,6 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *range); /* Guest/host FPSIMD coordination helpers */ -int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 2920b0a51403..a2faf0049dab 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -34,7 +34,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ syscall.o proton-pack.o idle.o patching.o pi/ \ - rsi.o + rsi.o jump_label.o obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ sys_compat.o @@ -47,7 +47,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o -obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_EFI) += efi.o efi-rt-wrapper.o obj-$(CONFIG_PCI) += pci.o diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b34044e20128..e151585c6cca 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -3135,6 +3135,13 @@ static bool has_sve_feature(const struct arm64_cpu_capabilities *cap, int scope) } #endif +#ifdef CONFIG_ARM64_SME +static bool has_sme_feature(const struct arm64_cpu_capabilities *cap, int scope) +{ + return system_supports_sme() && has_user_cpuid_feature(cap, scope); +} +#endif + static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR0_EL1, AES, PMULL, CAP_HWCAP, KERNEL_HWCAP_PMULL), HWCAP_CAP(ID_AA64ISAR0_EL1, AES, AES, CAP_HWCAP, KERNEL_HWCAP_AES), @@ -3223,31 +3230,31 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR2_EL1, BC, IMP, CAP_HWCAP, KERNEL_HWCAP_HBC), #ifdef CONFIG_ARM64_SME HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME), - HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), - HWCAP_CAP(ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2), - HWCAP_CAP(ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), - HWCAP_CAP(ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), - HWCAP_CAP(ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32), - HWCAP_CAP(ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16), - HWCAP_CAP(ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16), - HWCAP_CAP(ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16), - HWCAP_CAP(ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), - HWCAP_CAP(ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32), - HWCAP_CAP(ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2), - HWCAP_CAP(ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM), - HWCAP_CAP(ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES), - HWCAP_CAP(ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA), - HWCAP_CAP(ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4), #endif /* CONFIG_ARM64_SME */ HWCAP_CAP(ID_AA64FPFR0_EL1, F8CVT, IMP, CAP_HWCAP, KERNEL_HWCAP_F8CVT), HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA), diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 3857fd7ee8d4..62230d6dd919 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -15,6 +15,7 @@ #include <asm/efi.h> #include <asm/stacktrace.h> +#include <asm/vmap_stack.h> static bool region_is_misaligned(const efi_memory_desc_t *md) { @@ -214,9 +215,13 @@ static int __init arm64_efi_rt_init(void) if (!efi_enabled(EFI_RUNTIME_SERVICES)) return 0; - p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL, - NUMA_NO_NODE, &&l); -l: if (!p) { + if (!IS_ENABLED(CONFIG_VMAP_STACK)) { + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return -ENOMEM; + } + + p = arch_alloc_vmap_stack(THREAD_SIZE, NUMA_NO_NODE); + if (!p) { pr_warn("Failed to allocate EFI runtime stack\n"); clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return -ENOMEM; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 5954cec19660..08b7042a2e2d 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -673,6 +673,11 @@ static void permission_overlay_switch(struct task_struct *next) current->thread.por_el0 = read_sysreg_s(SYS_POR_EL0); if (current->thread.por_el0 != next->thread.por_el0) { write_sysreg_s(next->thread.por_el0, SYS_POR_EL0); + /* + * No ISB required as we can tolerate spurious Overlay faults - + * the fault handler will check again based on the new value + * of POR_EL0. + */ } } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 3b3f6b56e733..21a795303568 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -1143,7 +1143,7 @@ static inline unsigned int num_other_online_cpus(void) void smp_send_stop(void) { static unsigned long stop_in_progress; - cpumask_t mask; + static cpumask_t mask; unsigned long timeout; /* diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 38a91bb5d4c7..23dd3f3fc3eb 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -825,10 +825,6 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (!kvm_arm_vcpu_is_finalized(vcpu)) return -EPERM; - ret = kvm_arch_vcpu_run_map_fp(vcpu); - if (ret) - return ret; - if (likely(vcpu_has_run_once(vcpu))) return 0; @@ -2129,7 +2125,7 @@ static void cpu_hyp_init(void *discard) static void cpu_hyp_uninit(void *discard) { - if (__this_cpu_read(kvm_hyp_initialized)) { + if (!is_protected_kvm_enabled() && __this_cpu_read(kvm_hyp_initialized)) { cpu_hyp_reset(); __this_cpu_write(kvm_hyp_initialized, 0); } @@ -2345,8 +2341,13 @@ static void __init teardown_hyp_mode(void) free_hyp_pgds(); for_each_possible_cpu(cpu) { + if (per_cpu(kvm_hyp_initialized, cpu)) + continue; + free_pages(per_cpu(kvm_arm_hyp_stack_base, cpu), NVHE_STACK_SHIFT - PAGE_SHIFT); - free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); + + if (!kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]) + continue; if (free_sve) { struct cpu_sve_state *sve_state; @@ -2354,6 +2355,9 @@ static void __init teardown_hyp_mode(void) sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state; free_pages((unsigned long) sve_state, pkvm_host_sve_state_order()); } + + free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); + } } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 8f6c8f57c6b9..15e17aca1dec 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -15,32 +15,6 @@ #include <asm/sysreg.h> /* - * Called on entry to KVM_RUN unless this vcpu previously ran at least - * once and the most recent prior KVM_RUN for this vcpu was called from - * the same task as current (highly likely). - * - * This is guaranteed to execute before kvm_arch_vcpu_load_fp(vcpu), - * such that on entering hyp the relevant parts of current are already - * mapped. - */ -int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) -{ - struct user_fpsimd_state *fpsimd = ¤t->thread.uw.fpsimd_state; - int ret; - - /* pKVM has its own tracking of the host fpsimd state. */ - if (is_protected_kvm_enabled()) - return 0; - - /* Make sure the host task fpsimd state is visible to hyp: */ - ret = kvm_share_hyp(fpsimd, fpsimd + 1); - if (ret) - return ret; - - return 0; -} - -/* * Prepare vcpu for saving the host's FPSIMD state and loading the guest's. * The actual loading is done by the FPSIMD access trap taken to hyp. * diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 95d7534c9679..8957734d6183 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -479,6 +479,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) { struct kvm_mem_range cur; kvm_pte_t pte; + u64 granule; s8 level; int ret; @@ -496,18 +497,21 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) return -EPERM; } - do { - u64 granule = kvm_granule_size(level); + for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) { + if (!kvm_level_supports_block_mapping(level)) + continue; + granule = kvm_granule_size(level); cur.start = ALIGN_DOWN(addr, granule); cur.end = cur.start + granule; - level++; - } while ((level <= KVM_PGTABLE_LAST_LEVEL) && - !(kvm_level_supports_block_mapping(level) && - range_included(&cur, range))); + if (!range_included(&cur, range)) + continue; + *range = cur; + return 0; + } - *range = cur; + WARN_ON(1); - return 0; + return -EINVAL; } int host_stage2_idmap_locked(phys_addr_t addr, u64 size, diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 5b191f4dc566..dc1d26559bfa 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1402,6 +1402,21 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu) } } +#define has_tgran_2(__r, __sz) \ + ({ \ + u64 _s1, _s2, _mmfr0 = __r; \ + \ + _s2 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \ + TGRAN##__sz##_2, _mmfr0); \ + \ + _s1 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \ + TGRAN##__sz, _mmfr0); \ + \ + ((_s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_NI && \ + _s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz) || \ + (_s2 == ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz && \ + _s1 != ID_AA64MMFR0_EL1_TGRAN##__sz##_NI)); \ + }) /* * Our emulated CPU doesn't support all the possible features. For the * sake of simplicity (and probably mental sanity), wipe out a number @@ -1411,6 +1426,8 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu) */ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) { + u64 orig_val = val; + switch (reg) { case SYS_ID_AA64ISAR0_EL1: /* Support everything but TME */ @@ -1480,13 +1497,16 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) */ switch (PAGE_SIZE) { case SZ_4K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP); + if (has_tgran_2(orig_val, 4)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP); fallthrough; case SZ_16K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP); + if (has_tgran_2(orig_val, 16)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP); fallthrough; case SZ_64K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP); + if (has_tgran_2(orig_val, 64)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP); break; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 76c2f0da821f..c20bd6f21e60 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2624,7 +2624,7 @@ static bool access_mdcr(struct kvm_vcpu *vcpu, */ if (hpmn > vcpu->kvm->arch.nr_pmu_counters) { hpmn = vcpu->kvm->arch.nr_pmu_counters; - u64_replace_bits(val, hpmn, MDCR_EL2_HPMN); + u64p_replace_bits(&val, hpmn, MDCR_EL2_HPMN); } __vcpu_assign_sys_reg(vcpu, MDCR_EL2, val); diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c index a50fb7e6841f..679aafe77de2 100644 --- a/arch/arm64/kvm/vgic/vgic-v3-nested.c +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c @@ -401,9 +401,7 @@ void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu) { bool level; - level = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En; - if (level) - level &= vgic_v3_get_misr(vcpu); + level = (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En) && vgic_v3_get_misr(vcpu); kvm_vgic_inject_irq(vcpu->kvm, vcpu, vcpu->kvm->arch.vgic.mi_intid, level, vcpu); } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index ec0a337891dd..11eb8d1adc84 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -487,17 +487,29 @@ static void do_bad_area(unsigned long far, unsigned long esr, } } -static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma, - unsigned int mm_flags) +static bool fault_from_pkey(struct vm_area_struct *vma, unsigned int mm_flags) { - unsigned long iss2 = ESR_ELx_ISS2(esr); - if (!system_supports_poe()) return false; - if (esr_fsc_is_permission_fault(esr) && (iss2 & ESR_ELx_Overlay)) - return true; - + /* + * We do not check whether an Overlay fault has occurred because we + * cannot make a decision based solely on its value: + * + * - If Overlay is set, a fault did occur due to POE, but it may be + * spurious in those cases where we update POR_EL0 without ISB (e.g. + * on context-switch). We would then need to manually check POR_EL0 + * against vma_pkey(vma), which is exactly what + * arch_vma_access_permitted() does. + * + * - If Overlay is not set, we may still need to report a pkey fault. + * This is the case if an access was made within a mapping but with no + * page mapped, and POR_EL0 forbids the access (according to + * vma_pkey()). Such access will result in a SIGSEGV regardless + * because core code checks arch_vma_access_permitted(), but in order + * to report the correct error code - SEGV_PKUERR - we must handle + * that case here. + */ return !arch_vma_access_permitted(vma, mm_flags & FAULT_FLAG_WRITE, mm_flags & FAULT_FLAG_INSTRUCTION, @@ -635,7 +647,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, goto bad_area; } - if (fault_from_pkey(esr, vma, mm_flags)) { + if (fault_from_pkey(vma, mm_flags)) { pkey = vma_pkey(vma); vma_end_read(vma); fault = 0; @@ -679,7 +691,7 @@ retry: goto bad_area; } - if (fault_from_pkey(esr, vma, mm_flags)) { + if (fault_from_pkey(vma, mm_flags)) { pkey = vma_pkey(vma); mmap_read_unlock(mm); fault = 0; diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 80d470aa469d..54dccfd6aa11 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -518,7 +518,6 @@ alternative_else_nop_endif msr REG_PIR_EL1, x0 orr tcr2, tcr2, TCR2_EL1_PIE - msr REG_TCR2_EL1, x0 .Lskip_indirection: diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index d71ea0f4466f..1c5544401530 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -98,6 +98,7 @@ config RISCV select CLONE_BACKWARDS select COMMON_CLK select CPU_PM if CPU_IDLE || HIBERNATION || SUSPEND + select DYNAMIC_FTRACE if FUNCTION_TRACER select EDAC_SUPPORT select FRAME_POINTER if PERF_EVENTS || (FUNCTION_TRACER && !DYNAMIC_FTRACE) select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if DYNAMIC_FTRACE @@ -162,7 +163,7 @@ config RISCV select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER if HAVE_DYNAMIC_FTRACE_WITH_ARGS select HAVE_FUNCTION_GRAPH_FREGS - select HAVE_FUNCTION_TRACER if !XIP_KERNEL + select HAVE_FUNCTION_TRACER if !XIP_KERNEL && HAVE_DYNAMIC_FTRACE select HAVE_EBPF_JIT if MMU select HAVE_GUP_FAST if MMU select HAVE_FUNCTION_ARG_ACCESS_API diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h index 3b643b9efc07..5acce285e56e 100644 --- a/arch/riscv/include/asm/kvm_aia.h +++ b/arch/riscv/include/asm/kvm_aia.h @@ -87,6 +87,9 @@ DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available); extern struct kvm_device_ops kvm_riscv_aia_device_ops; +bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_aia_imsic_load(struct kvm_vcpu *vcpu, int cpu); +void kvm_riscv_vcpu_aia_imsic_put(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu); int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu); @@ -161,7 +164,6 @@ void kvm_riscv_aia_destroy_vm(struct kvm *kvm); int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner, void __iomem **hgei_va, phys_addr_t *hgei_pa); void kvm_riscv_aia_free_hgei(int cpu, int hgei); -void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable); void kvm_riscv_aia_enable(void); void kvm_riscv_aia_disable(void); diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 85cfebc32e4c..bcbf8b1ec115 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -306,6 +306,9 @@ static inline bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu) return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu; } +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} + #define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12 void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid, diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 525e50db24f7..b88a6218b7f2 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -311,8 +311,8 @@ do { \ do { \ if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \ !IS_ALIGNED((uintptr_t)__gu_ptr, sizeof(*__gu_ptr))) { \ - __inttype(x) val = (__inttype(x))x; \ - if (__asm_copy_to_user_sum_enabled(__gu_ptr, &(val), sizeof(*__gu_ptr))) \ + __inttype(x) ___val = (__inttype(x))x; \ + if (__asm_copy_to_user_sum_enabled(__gu_ptr, &(___val), sizeof(*__gu_ptr))) \ goto label; \ break; \ } \ diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 4c6c24380cfd..8d18d6727f0f 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -14,6 +14,18 @@ #include <asm/text-patching.h> #ifdef CONFIG_DYNAMIC_FTRACE +void ftrace_arch_code_modify_prepare(void) + __acquires(&text_mutex) +{ + mutex_lock(&text_mutex); +} + +void ftrace_arch_code_modify_post_process(void) + __releases(&text_mutex) +{ + mutex_unlock(&text_mutex); +} + unsigned long ftrace_call_adjust(unsigned long addr) { if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) @@ -29,10 +41,8 @@ unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip) void arch_ftrace_update_code(int command) { - mutex_lock(&text_mutex); command |= FTRACE_MAY_SLEEP; ftrace_modify_all_code(command); - mutex_unlock(&text_mutex); flush_icache_all(); } @@ -149,6 +159,8 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) unsigned int nops[2], offset; int ret; + guard(mutex)(&text_mutex); + ret = ftrace_rec_set_nop_ops(rec); if (ret) return ret; @@ -157,9 +169,7 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) nops[0] = to_auipc_t0(offset); nops[1] = RISCV_INSN_NOP4; - mutex_lock(&text_mutex); ret = patch_insn_write((void *)pc, nops, 2 * MCOUNT_INSN_SIZE); - mutex_unlock(&text_mutex); return ret; } diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 9c83848797a7..80230de167de 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -6,6 +6,7 @@ #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/init.h> +#include <linux/irqflags.h> #include <linux/randomize_kstack.h> #include <linux/sched.h> #include <linux/sched/debug.h> @@ -151,7 +152,9 @@ asmlinkage __visible __trap_section void name(struct pt_regs *regs) \ { \ if (user_mode(regs)) { \ irqentry_enter_from_user_mode(regs); \ + local_irq_enable(); \ do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \ + local_irq_disable(); \ irqentry_exit_to_user_mode(regs); \ } else { \ irqentry_state_t state = irqentry_nmi_enter(regs); \ @@ -173,17 +176,14 @@ asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *re if (user_mode(regs)) { irqentry_enter_from_user_mode(regs); - local_irq_enable(); handled = riscv_v_first_use_handler(regs); - - local_irq_disable(); - if (!handled) do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->epc, "Oops - illegal instruction"); + local_irq_disable(); irqentry_exit_to_user_mode(regs); } else { irqentry_state_t state = irqentry_nmi_enter(regs); @@ -308,9 +308,11 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs) { if (user_mode(regs)) { irqentry_enter_from_user_mode(regs); + local_irq_enable(); handle_break(regs); + local_irq_disable(); irqentry_exit_to_user_mode(regs); } else { irqentry_state_t state = irqentry_nmi_enter(regs); diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 93043924fe6c..f760e4fcc052 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -461,7 +461,7 @@ static int handle_scalar_misaligned_load(struct pt_regs *regs) } if (!fp) - SET_RD(insn, regs, val.data_ulong << shift >> shift); + SET_RD(insn, regs, (long)(val.data_ulong << shift) >> shift); else if (len == 8) set_f64_rd(insn, regs, val.data_u64); else diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index 19afd1f23537..dad318185660 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -30,28 +30,6 @@ unsigned int kvm_riscv_aia_nr_hgei; unsigned int kvm_riscv_aia_max_ids; DEFINE_STATIC_KEY_FALSE(kvm_riscv_aia_available); -static int aia_find_hgei(struct kvm_vcpu *owner) -{ - int i, hgei; - unsigned long flags; - struct aia_hgei_control *hgctrl = get_cpu_ptr(&aia_hgei); - - raw_spin_lock_irqsave(&hgctrl->lock, flags); - - hgei = -1; - for (i = 1; i <= kvm_riscv_aia_nr_hgei; i++) { - if (hgctrl->owners[i] == owner) { - hgei = i; - break; - } - } - - raw_spin_unlock_irqrestore(&hgctrl->lock, flags); - - put_cpu_ptr(&aia_hgei); - return hgei; -} - static inline unsigned long aia_hvictl_value(bool ext_irq_pending) { unsigned long hvictl; @@ -95,7 +73,6 @@ void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu) bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) { - int hgei; unsigned long seip; if (!kvm_riscv_aia_available()) @@ -114,11 +91,7 @@ bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) if (!kvm_riscv_aia_initialized(vcpu->kvm) || !seip) return false; - hgei = aia_find_hgei(vcpu); - if (hgei > 0) - return !!(ncsr_read(CSR_HGEIP) & BIT(hgei)); - - return false; + return kvm_riscv_vcpu_aia_imsic_has_interrupt(vcpu); } void kvm_riscv_vcpu_aia_update_hvip(struct kvm_vcpu *vcpu) @@ -164,6 +137,9 @@ void kvm_riscv_vcpu_aia_load(struct kvm_vcpu *vcpu, int cpu) csr_write(CSR_HVIPRIO2H, csr->hviprio2h); #endif } + + if (kvm_riscv_aia_initialized(vcpu->kvm)) + kvm_riscv_vcpu_aia_imsic_load(vcpu, cpu); } void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu) @@ -174,6 +150,9 @@ void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu) if (!kvm_riscv_aia_available()) return; + if (kvm_riscv_aia_initialized(vcpu->kvm)) + kvm_riscv_vcpu_aia_imsic_put(vcpu); + if (kvm_riscv_nacl_available()) { nsh = nacl_shmem(); csr->vsiselect = nacl_csr_read(nsh, CSR_VSISELECT); @@ -472,22 +451,6 @@ void kvm_riscv_aia_free_hgei(int cpu, int hgei) raw_spin_unlock_irqrestore(&hgctrl->lock, flags); } -void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable) -{ - int hgei; - - if (!kvm_riscv_aia_available()) - return; - - hgei = aia_find_hgei(owner); - if (hgei > 0) { - if (enable) - csr_set(CSR_HGEIE, BIT(hgei)); - else - csr_clear(CSR_HGEIE, BIT(hgei)); - } -} - static irqreturn_t hgei_interrupt(int irq, void *dev_id) { int i; diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index 29ef9c2133a9..2ff865943ebb 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -676,6 +676,48 @@ static void imsic_swfile_update(struct kvm_vcpu *vcpu, imsic_swfile_extirq_update(vcpu); } +bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu) +{ + struct imsic *imsic = vcpu->arch.aia_context.imsic_state; + unsigned long flags; + bool ret = false; + + /* + * The IMSIC SW-file directly injects interrupt via hvip so + * only check for interrupt when IMSIC VS-file is being used. + */ + + read_lock_irqsave(&imsic->vsfile_lock, flags); + if (imsic->vsfile_cpu > -1) + ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei)); + read_unlock_irqrestore(&imsic->vsfile_lock, flags); + + return ret; +} + +void kvm_riscv_vcpu_aia_imsic_load(struct kvm_vcpu *vcpu, int cpu) +{ + /* + * No need to explicitly clear HGEIE CSR bits because the + * hgei interrupt handler (aka hgei_interrupt()) will always + * clear it for us. + */ +} + +void kvm_riscv_vcpu_aia_imsic_put(struct kvm_vcpu *vcpu) +{ + struct imsic *imsic = vcpu->arch.aia_context.imsic_state; + unsigned long flags; + + if (!kvm_vcpu_is_blocking(vcpu)) + return; + + read_lock_irqsave(&imsic->vsfile_lock, flags); + if (imsic->vsfile_cpu > -1) + csr_set(CSR_HGEIE, BIT(imsic->vsfile_hgei)); + read_unlock_irqrestore(&imsic->vsfile_lock, flags); +} + void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu) { unsigned long flags; @@ -781,6 +823,9 @@ int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu) * producers to the new IMSIC VS-file. */ + /* Ensure HGEIE CSR bit is zero before using the new IMSIC VS-file */ + csr_clear(CSR_HGEIE, BIT(new_vsfile_hgei)); + /* Zero-out new IMSIC VS-file */ imsic_vsfile_local_clear(new_vsfile_hgei, imsic->nr_hw_eix); diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index e0a01af426ff..0462863206ca 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -207,16 +207,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return kvm_riscv_vcpu_timer_pending(vcpu); } -void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) -{ - kvm_riscv_aia_wakeon_hgei(vcpu, true); -} - -void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) -{ - kvm_riscv_aia_wakeon_hgei(vcpu, false); -} - int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) && diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c index ff672fa71fcc..85a7262115e1 100644 --- a/arch/riscv/kvm/vcpu_timer.c +++ b/arch/riscv/kvm/vcpu_timer.c @@ -345,8 +345,24 @@ void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu) /* * The vstimecmp CSRs are saved by kvm_riscv_vcpu_timer_sync() * upon every VM exit so no need to save here. + * + * If VS-timer expires when no VCPU running on a host CPU then + * WFI executed by such host CPU will be effective NOP resulting + * in no power savings. This is because as-per RISC-V Privileged + * specificaiton: "WFI is also required to resume execution for + * locally enabled interrupts pending at any privilege level, + * regardless of the global interrupt enable at each privilege + * level." + * + * To address the above issue, vstimecmp CSR must be set to -1UL + * over here when VCPU is scheduled-out or exits to user space. */ + csr_write(CSR_VSTIMECMP, -1UL); +#if defined(CONFIG_32BIT) + csr_write(CSR_VSTIMECMPH, -1UL); +#endif + /* timer should be enabled for the remaining operations */ if (unlikely(!t->init_done)) return; diff --git a/arch/riscv/tools/relocs_check.sh b/arch/riscv/tools/relocs_check.sh index baeb2e7b2290..742993e6a8cb 100755 --- a/arch/riscv/tools/relocs_check.sh +++ b/arch/riscv/tools/relocs_check.sh @@ -14,7 +14,9 @@ bad_relocs=$( ${srctree}/scripts/relocs_check.sh "$@" | # These relocations are okay # R_RISCV_RELATIVE - grep -F -w -v 'R_RISCV_RELATIVE' + # R_RISCV_NONE + grep -F -w -v 'R_RISCV_RELATIVE +R_RISCV_NONE' ) if [ -z "$bad_relocs" ]; then diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index d229cbd2ba22..9b0d55be1239 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -38,6 +38,7 @@ static int s390_sha1_init(struct shash_desc *desc) sctx->state[4] = SHA1_H4; sctx->count = 0; sctx->func = CPACF_KIMD_SHA_1; + sctx->first_message_part = 0; return 0; } @@ -60,6 +61,7 @@ static int s390_sha1_import(struct shash_desc *desc, const void *in) sctx->count = ictx->count; memcpy(sctx->state, ictx->state, sizeof(ictx->state)); sctx->func = CPACF_KIMD_SHA_1; + sctx->first_message_part = 0; return 0; } diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 33711a29618c..6cbbf5e8555f 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -32,6 +32,7 @@ static int sha512_init(struct shash_desc *desc) ctx->count = 0; ctx->sha512.count_hi = 0; ctx->func = CPACF_KIMD_SHA_512; + ctx->first_message_part = 0; return 0; } @@ -57,6 +58,7 @@ static int sha512_import(struct shash_desc *desc, const void *in) memcpy(sctx->state, ictx->state, sizeof(ictx->state)); sctx->func = CPACF_KIMD_SHA_512; + sctx->first_message_part = 0; return 0; } @@ -97,6 +99,7 @@ static int sha384_init(struct shash_desc *desc) ctx->count = 0; ctx->sha512.count_hi = 0; ctx->func = CPACF_KIMD_SHA_512; + ctx->first_message_part = 0; return 0; } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index c7f8313ba449..0c9a35782c83 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -566,7 +566,15 @@ static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target) { memcpy(plt, &bpf_plt, sizeof(*plt)); plt->ret = ret; - plt->target = target; + /* + * (target == NULL) implies that the branch to this PLT entry was + * patched and became a no-op. However, some CPU could have jumped + * to this PLT entry before patching and may be still executing it. + * + * Since the intention in this case is to make the PLT entry a no-op, + * make the target point to the return label instead of NULL. + */ + plt->target = target ?: ret; } /* diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 71019b3b54ea..8bed9030ad47 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -147,7 +147,7 @@ config X86 select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANTS_NO_INSTR select ARCH_WANT_GENERAL_HUGETLB - select ARCH_WANT_HUGE_PMD_SHARE + select ARCH_WANT_HUGE_PMD_SHARE if X86_64 select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP if X86_64 select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP if X86_64 @@ -2695,6 +2695,15 @@ config MITIGATION_ITS disabled, mitigation cannot be enabled via cmdline. See <file:Documentation/admin-guide/hw-vuln/indirect-target-selection.rst> +config MITIGATION_TSA + bool "Mitigate Transient Scheduler Attacks" + depends on CPU_SUP_AMD + default y + help + Enable mitigation for Transient Scheduler Attacks. TSA is a hardware + security vulnerability on AMD CPUs which can lead to forwarding of + invalid info to subsequent instructions and thus can affect their + timing and thereby cause a leakage. endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/coco/sev/Makefile b/arch/x86/coco/sev/Makefile index db3255b979bd..342d79f0ab6a 100644 --- a/arch/x86/coco/sev/Makefile +++ b/arch/x86/coco/sev/Makefile @@ -5,5 +5,6 @@ obj-y += core.o sev-nmi.o vc-handle.o # Clang 14 and older may fail to respect __no_sanitize_undefined when inlining UBSAN_SANITIZE_sev-nmi.o := n -# GCC may fail to respect __no_sanitize_address when inlining +# GCC may fail to respect __no_sanitize_address or __no_kcsan when inlining KASAN_SANITIZE_sev-nmi.o := n +KCSAN_SANITIZE_sev-nmi.o := n diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index b6db4e0b936b..7543a8b52c67 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -88,7 +88,7 @@ static const char * const sev_status_feat_names[] = { */ static u64 snp_tsc_scale __ro_after_init; static u64 snp_tsc_offset __ro_after_init; -static u64 snp_tsc_freq_khz __ro_after_init; +static unsigned long snp_tsc_freq_khz __ro_after_init; DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); @@ -2167,15 +2167,31 @@ static unsigned long securetsc_get_tsc_khz(void) void __init snp_secure_tsc_init(void) { - unsigned long long tsc_freq_mhz; + struct snp_secrets_page *secrets; + unsigned long tsc_freq_mhz; + void *mem; if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) return; + mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE); + if (!mem) { + pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n"); + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC); + } + + secrets = (__force struct snp_secrets_page *)mem; + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz); - snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000); + + /* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */ + tsc_freq_mhz &= GENMASK_ULL(17, 0); + + snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor); x86_platform.calibrate_cpu = securetsc_get_tsc_khz; x86_platform.calibrate_tsc = securetsc_get_tsc_khz; + + early_memunmap(mem, PAGE_SIZE); } diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index 175958b02f2b..8e9a0cc20a4a 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -36,20 +36,20 @@ EXPORT_SYMBOL_GPL(write_ibpb); /* * Define the VERW operand that is disguised as entry code so that - * it can be referenced with KPTI enabled. This ensure VERW can be + * it can be referenced with KPTI enabled. This ensures VERW can be * used late in exit-to-user path after page tables are switched. */ .pushsection .entry.text, "ax" .align L1_CACHE_BYTES, 0xcc -SYM_CODE_START_NOALIGN(mds_verw_sel) +SYM_CODE_START_NOALIGN(x86_verw_sel) UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR .word __KERNEL_DS .align L1_CACHE_BYTES, 0xcc -SYM_CODE_END(mds_verw_sel); +SYM_CODE_END(x86_verw_sel); /* For KVM */ -EXPORT_SYMBOL_GPL(mds_verw_sel); +EXPORT_SYMBOL_GPL(x86_verw_sel); .popsection diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 3d1d3547095a..afdbda2dd7b7 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -34,6 +34,7 @@ #include <linux/syscore_ops.h> #include <clocksource/hyperv_timer.h> #include <linux/highmem.h> +#include <linux/export.h> void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c index 31f0d29cbc5e..090f5ac9f492 100644 --- a/arch/x86/hyperv/irqdomain.c +++ b/arch/x86/hyperv/irqdomain.c @@ -10,6 +10,7 @@ #include <linux/pci.h> #include <linux/irq.h> +#include <linux/export.h> #include <asm/mshyperv.h> static int hv_map_interrupt(union hv_device_id device_id, bool level, @@ -46,7 +47,7 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level, if (nr_bank < 0) { local_irq_restore(flags); pr_err("%s: unable to generate VP set\n", __func__); - return EINVAL; + return -EINVAL; } intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; @@ -66,7 +67,7 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level, if (!hv_result_success(status)) hv_status_err(status, "\n"); - return hv_result(status); + return hv_result_to_errno(status); } static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry) @@ -88,7 +89,10 @@ static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry) status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL); local_irq_restore(flags); - return hv_result(status); + if (!hv_result_success(status)) + hv_status_err(status, "\n"); + + return hv_result_to_errno(status); } #ifdef CONFIG_PCI_MSI @@ -169,13 +173,34 @@ static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev) return dev_id; } -static int hv_map_msi_interrupt(struct pci_dev *dev, int cpu, int vector, - struct hv_interrupt_entry *entry) +/** + * hv_map_msi_interrupt() - "Map" the MSI IRQ in the hypervisor. + * @data: Describes the IRQ + * @out_entry: Hypervisor (MSI) interrupt entry (can be NULL) + * + * Map the IRQ in the hypervisor by issuing a MAP_DEVICE_INTERRUPT hypercall. + * + * Return: 0 on success, -errno on failure + */ +int hv_map_msi_interrupt(struct irq_data *data, + struct hv_interrupt_entry *out_entry) { - union hv_device_id device_id = hv_build_pci_dev_id(dev); + struct irq_cfg *cfg = irqd_cfg(data); + struct hv_interrupt_entry dummy; + union hv_device_id device_id; + struct msi_desc *msidesc; + struct pci_dev *dev; + int cpu; - return hv_map_interrupt(device_id, false, cpu, vector, entry); + msidesc = irq_data_get_msi_desc(data); + dev = msi_desc_to_pci_dev(msidesc); + device_id = hv_build_pci_dev_id(dev); + cpu = cpumask_first(irq_data_get_effective_affinity_mask(data)); + + return hv_map_interrupt(device_id, false, cpu, cfg->vector, + out_entry ? out_entry : &dummy); } +EXPORT_SYMBOL_GPL(hv_map_msi_interrupt); static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg) { @@ -188,13 +213,11 @@ static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry); static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { + struct hv_interrupt_entry *stored_entry; + struct irq_cfg *cfg = irqd_cfg(data); struct msi_desc *msidesc; struct pci_dev *dev; - struct hv_interrupt_entry out_entry, *stored_entry; - struct irq_cfg *cfg = irqd_cfg(data); - const cpumask_t *affinity; - int cpu; - u64 status; + int ret; msidesc = irq_data_get_msi_desc(data); dev = msi_desc_to_pci_dev(msidesc); @@ -204,9 +227,6 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) return; } - affinity = irq_data_get_effective_affinity_mask(data); - cpu = cpumask_first_and(affinity, cpu_online_mask); - if (data->chip_data) { /* * This interrupt is already mapped. Let's unmap first. @@ -219,14 +239,12 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) stored_entry = data->chip_data; data->chip_data = NULL; - status = hv_unmap_msi_interrupt(dev, stored_entry); + ret = hv_unmap_msi_interrupt(dev, stored_entry); kfree(stored_entry); - if (status != HV_STATUS_SUCCESS) { - hv_status_debug(status, "failed to unmap\n"); + if (ret) return; - } } stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC); @@ -235,15 +253,14 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) return; } - status = hv_map_msi_interrupt(dev, cpu, cfg->vector, &out_entry); - if (status != HV_STATUS_SUCCESS) { + ret = hv_map_msi_interrupt(data, stored_entry); + if (ret) { kfree(stored_entry); return; } - *stored_entry = out_entry; data->chip_data = stored_entry; - entry_to_msi_msg(&out_entry, msg); + entry_to_msi_msg(data->chip_data, msg); return; } @@ -257,7 +274,6 @@ static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd) { struct hv_interrupt_entry old_entry; struct msi_msg msg; - u64 status; if (!irqd->chip_data) { pr_debug("%s: no chip data\n!", __func__); @@ -270,10 +286,7 @@ static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd) kfree(irqd->chip_data); irqd->chip_data = NULL; - status = hv_unmap_msi_interrupt(dev, &old_entry); - - if (status != HV_STATUS_SUCCESS) - hv_status_err(status, "\n"); + (void)hv_unmap_msi_interrupt(dev, &old_entry); } static void hv_msi_free_irq(struct irq_domain *domain, diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index e93a2f488ff7..ade6c665c97e 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -10,6 +10,7 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/cpu.h> +#include <linux/export.h> #include <asm/svm.h> #include <asm/sev.h> #include <asm/io.h> diff --git a/arch/x86/hyperv/nested.c b/arch/x86/hyperv/nested.c index 1083dc8646f9..8ccbb7c4fc27 100644 --- a/arch/x86/hyperv/nested.c +++ b/arch/x86/hyperv/nested.c @@ -11,6 +11,7 @@ #include <linux/types.h> +#include <linux/export.h> #include <hyperv/hvhdk.h> #include <asm/mshyperv.h> #include <asm/tlbflush.h> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ee176236c2be..286d509f9363 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -456,6 +456,7 @@ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ #define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ #define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* The memory form of VERW mitigates TSA */ #define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */ #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ #define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ @@ -487,6 +488,9 @@ #define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ #define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */ #define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */ +#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */ +#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */ +#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ /* * BUG word(s) @@ -542,5 +546,5 @@ #define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */ #define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ #define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ - +#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 9a9b21b78905..b30e5474c18e 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -44,13 +44,13 @@ static __always_inline void native_irq_enable(void) static __always_inline void native_safe_halt(void) { - mds_idle_clear_cpu_buffers(); + x86_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } static __always_inline void native_halt(void) { - mds_idle_clear_cpu_buffers(); + x86_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 639d9bcee842..f7af967aa16f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -700,8 +700,13 @@ struct kvm_vcpu_hv { struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS]; - /* Preallocated buffer for handling hypercalls passing sparse vCPU set */ + /* + * Preallocated buffers for handling hypercalls that pass sparse vCPU + * sets (for high vCPU counts, they're too large to comfortably fit on + * the stack). + */ u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS]; + DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); struct hv_vp_assist_page vp_assist_page; @@ -764,6 +769,7 @@ enum kvm_only_cpuid_leafs { CPUID_8000_0022_EAX, CPUID_7_2_EDX, CPUID_24_0_EBX, + CPUID_8000_0021_ECX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index e1752ba47e67..abc4659f5809 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -112,12 +112,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) return hv_status; } -/* Hypercall to the L0 hypervisor */ -static inline u64 hv_do_nested_hypercall(u64 control, void *input, void *output) -{ - return hv_do_hypercall(control | HV_HYPERCALL_NESTED, input, output); -} - /* Fast hypercall with 8 bytes of input and no output */ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1) { @@ -165,13 +159,6 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) return _hv_do_fast_hypercall8(control, input1); } -static inline u64 hv_do_fast_nested_hypercall8(u16 code, u64 input1) -{ - u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED; - - return _hv_do_fast_hypercall8(control, input1); -} - /* Fast hypercall with 16 bytes of input */ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2) { @@ -223,13 +210,6 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) return _hv_do_fast_hypercall16(control, input1, input2); } -static inline u64 hv_do_fast_nested_hypercall16(u16 code, u64 input1, u64 input2) -{ - u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED; - - return _hv_do_fast_hypercall16(control, input1, input2); -} - extern struct hv_vp_assist_page **hv_vp_assist_page; static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) @@ -262,6 +242,8 @@ static inline void hv_apic_init(void) {} struct irq_domain *hv_create_pci_msi_domain(void); +int hv_map_msi_interrupt(struct irq_data *data, + struct hv_interrupt_entry *out_entry); int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector, struct hv_interrupt_entry *entry); int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b7dded3c8113..5cfb5d74dd5f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -628,6 +628,7 @@ #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 #define MSR_AMD_PPIN 0xc00102f1 +#define MSR_AMD64_CPUID_FN_7 0xc0011002 #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index dd2b129b0418..6ca6516c7492 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -43,8 +43,6 @@ static __always_inline void __monitorx(const void *eax, u32 ecx, u32 edx) static __always_inline void __mwait(u32 eax, u32 ecx) { - mds_idle_clear_cpu_buffers(); - /* * Use the instruction mnemonic with implicit operands, as the LLVM * assembler fails to assemble the mnemonic with explicit operands: @@ -80,7 +78,7 @@ static __always_inline void __mwait(u32 eax, u32 ecx) */ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx) { - /* No MDS buffer clear as this is AMD/HYGON only */ + /* No need for TSA buffer clearing on AMD */ /* "mwaitx %eax, %ebx, %ecx" */ asm volatile(".byte 0x0f, 0x01, 0xfb" @@ -98,7 +96,6 @@ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx) */ static __always_inline void __sti_mwait(u32 eax, u32 ecx) { - mds_idle_clear_cpu_buffers(); asm volatile("sti; mwait" :: "a" (eax), "c" (ecx)); } @@ -115,21 +112,29 @@ static __always_inline void __sti_mwait(u32 eax, u32 ecx) */ static __always_inline void mwait_idle_with_hints(u32 eax, u32 ecx) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) { const void *addr = ¤t_thread_info()->flags; alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); __monitor(addr, 0, 0); - if (!need_resched()) { - if (ecx & 1) { - __mwait(eax, ecx); - } else { - __sti_mwait(eax, ecx); - raw_local_irq_disable(); - } + if (need_resched()) + goto out; + + if (ecx & 1) { + __mwait(eax, ecx); + } else { + __sti_mwait(eax, ecx); + raw_local_irq_disable(); } } + +out: current_clr_polling(); } diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 20d754b98f3f..10f261678749 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -302,25 +302,31 @@ .endm /* - * Macro to execute VERW instruction that mitigate transient data sampling - * attacks such as MDS. On affected systems a microcode update overloaded VERW - * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. - * + * Macro to execute VERW insns that mitigate transient data sampling + * attacks such as MDS or TSA. On affected systems a microcode update + * overloaded VERW insns to also clear the CPU buffers. VERW clobbers + * CFLAGS.ZF. * Note: Only the memory operand variant of VERW clears the CPU buffers. */ -.macro CLEAR_CPU_BUFFERS +.macro __CLEAR_CPU_BUFFERS feature #ifdef CONFIG_X86_64 - ALTERNATIVE "", "verw mds_verw_sel(%rip)", X86_FEATURE_CLEAR_CPU_BUF + ALTERNATIVE "", "verw x86_verw_sel(%rip)", \feature #else /* * In 32bit mode, the memory operand must be a %cs reference. The data * segments may not be usable (vm86 mode), and the stack segment may not * be flat (ESPFIX32). */ - ALTERNATIVE "", "verw %cs:mds_verw_sel", X86_FEATURE_CLEAR_CPU_BUF + ALTERNATIVE "", "verw %cs:x86_verw_sel", \feature #endif .endm +#define CLEAR_CPU_BUFFERS \ + __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF + +#define VM_CLEAR_CPU_BUFFERS \ + __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF_VM + #ifdef CONFIG_X86_64 .macro CLEAR_BRANCH_HISTORY ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP @@ -567,24 +573,24 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DECLARE_STATIC_KEY_FALSE(switch_vcpu_ibpb); -DECLARE_STATIC_KEY_FALSE(mds_idle_clear); +DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DECLARE_STATIC_KEY_FALSE(cpu_buf_vm_clear); -extern u16 mds_verw_sel; +extern u16 x86_verw_sel; #include <asm/segment.h> /** - * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability + * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns * * This uses the otherwise unused and obsolete VERW instruction in * combination with microcode which triggers a CPU buffer flush when the * instruction is executed. */ -static __always_inline void mds_clear_cpu_buffers(void) +static __always_inline void x86_clear_cpu_buffers(void) { static const u16 ds = __KERNEL_DS; @@ -601,14 +607,15 @@ static __always_inline void mds_clear_cpu_buffers(void) } /** - * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability + * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS + * and TSA vulnerabilities. * * Clear CPU buffers if the corresponding static key is enabled */ -static __always_inline void mds_idle_clear_cpu_buffers(void) +static __always_inline void x86_idle_clear_cpu_buffers(void) { - if (static_branch_likely(&mds_idle_clear)) - mds_clear_cpu_buffers(); + if (static_branch_likely(&cpu_buf_idle_clear)) + x86_clear_cpu_buffers(); } #endif /* __ASSEMBLER__ */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 58e028d42e41..a631f7d7c0c0 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -223,6 +223,18 @@ struct snp_tsc_info_resp { u8 rsvd2[100]; } __packed; +/* + * Obtain the mean TSC frequency by decreasing the nominal TSC frequency with + * TSC_FACTOR as documented in the SNP Firmware ABI specification: + * + * GUEST_TSC_FREQ * (1 - (TSC_FACTOR * 0.00001)) + * + * which is equivalent to: + * + * GUEST_TSC_FREQ -= (GUEST_TSC_FREQ * TSC_FACTOR) / 100000; + */ +#define SNP_SCALE_TSC_FREQ(freq, factor) ((freq) - (freq) * (factor) / 100000) + struct snp_guest_req { void *req_buf; size_t req_sz; @@ -282,8 +294,11 @@ struct snp_secrets_page { u8 svsm_guest_vmpl; u8 rsvd3[3]; + /* The percentage decrease from nominal to mean TSC frequency. */ + u32 tsc_factor; + /* Remainder of page */ - u8 rsvd4[3744]; + u8 rsvd4[3740]; } __packed; struct snp_msg_desc { diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index d8525e6ef50a..8bc074c8d7c6 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -72,6 +72,7 @@ #define TDVMCALL_MAP_GPA 0x10001 #define TDVMCALL_GET_QUOTE 0x10002 #define TDVMCALL_REPORT_FATAL_ERROR 0x10003 +#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004ULL /* * TDG.VP.VMCALL Status Codes (returned in R10) diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 6f3499507c5e..0f15d683817d 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -965,7 +965,13 @@ struct kvm_tdx_cmd { struct kvm_tdx_capabilities { __u64 supported_attrs; __u64 supported_xfam; - __u64 reserved[254]; + + __u64 kernel_tdvmcallinfo_1_r11; + __u64 user_tdvmcallinfo_1_r11; + __u64 kernel_tdvmcallinfo_1_r12; + __u64 user_tdvmcallinfo_1_r12; + + __u64 reserved[250]; /* Configurable CPUID bits for userspace */ struct kvm_cpuid2 cpuid; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 655f44f89ded..329ee185d8cc 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -377,6 +377,47 @@ static void bsp_determine_snp(struct cpuinfo_x86 *c) #endif } +#define ZEN_MODEL_STEP_UCODE(fam, model, step, ucode) \ + X86_MATCH_VFM_STEPS(VFM_MAKE(X86_VENDOR_AMD, fam, model), \ + step, step, ucode) + +static const struct x86_cpu_id amd_tsa_microcode[] = { + ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x1, 0x0a0011d7), + ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x2, 0x0a00123b), + ZEN_MODEL_STEP_UCODE(0x19, 0x08, 0x2, 0x0a00820d), + ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x1, 0x0a10114c), + ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x2, 0x0a10124c), + ZEN_MODEL_STEP_UCODE(0x19, 0x18, 0x1, 0x0a108109), + ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x0, 0x0a20102e), + ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x2, 0x0a201211), + ZEN_MODEL_STEP_UCODE(0x19, 0x44, 0x1, 0x0a404108), + ZEN_MODEL_STEP_UCODE(0x19, 0x50, 0x0, 0x0a500012), + ZEN_MODEL_STEP_UCODE(0x19, 0x61, 0x2, 0x0a60120a), + ZEN_MODEL_STEP_UCODE(0x19, 0x74, 0x1, 0x0a704108), + ZEN_MODEL_STEP_UCODE(0x19, 0x75, 0x2, 0x0a705208), + ZEN_MODEL_STEP_UCODE(0x19, 0x78, 0x0, 0x0a708008), + ZEN_MODEL_STEP_UCODE(0x19, 0x7c, 0x0, 0x0a70c008), + ZEN_MODEL_STEP_UCODE(0x19, 0xa0, 0x2, 0x0aa00216), + {}, +}; + +static void tsa_init(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return; + + if (cpu_has(c, X86_FEATURE_ZEN3) || + cpu_has(c, X86_FEATURE_ZEN4)) { + if (x86_match_min_microcode_rev(amd_tsa_microcode)) + setup_force_cpu_cap(X86_FEATURE_VERW_CLEAR); + else + pr_debug("%s: current revision: 0x%x\n", __func__, c->microcode); + } else { + setup_force_cpu_cap(X86_FEATURE_TSA_SQ_NO); + setup_force_cpu_cap(X86_FEATURE_TSA_L1_NO); + } +} + static void bsp_init_amd(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -489,6 +530,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) } bsp_determine_snp(c); + + tsa_init(c); + return; warn: @@ -930,6 +974,16 @@ static void init_amd_zen2(struct cpuinfo_x86 *c) init_spectral_chicken(c); fix_erratum_1386(c); zen2_zenbleed_check(c); + + /* Disable RDSEED on AMD Cyan Skillfish because of an error. */ + if (c->x86_model == 0x47 && c->x86_stepping == 0x0) { + clear_cpu_cap(c, X86_FEATURE_RDSEED); + msr_clear_bit(MSR_AMD64_CPUID_FN_7, 18); + pr_emerg("RDSEED is not reliable on this platform; disabling.\n"); + } + + /* Correct misconfigured CPUID on some clients. */ + clear_cpu_cap(c, X86_FEATURE_INVLPGB); } static void init_amd_zen3(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7f94e6a5497d..f4d3abb12317 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -94,6 +94,8 @@ static void __init bhi_apply_mitigation(void); static void __init its_select_mitigation(void); static void __init its_update_mitigation(void); static void __init its_apply_mitigation(void); +static void __init tsa_select_mitigation(void); +static void __init tsa_apply_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -169,9 +171,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DEFINE_STATIC_KEY_FALSE(switch_vcpu_ibpb); EXPORT_SYMBOL_GPL(switch_vcpu_ibpb); -/* Control MDS CPU buffer clear before idling (halt, mwait) */ -DEFINE_STATIC_KEY_FALSE(mds_idle_clear); -EXPORT_SYMBOL_GPL(mds_idle_clear); +/* Control CPU buffer clear before idling (halt, mwait) */ +DEFINE_STATIC_KEY_FALSE(cpu_buf_idle_clear); +EXPORT_SYMBOL_GPL(cpu_buf_idle_clear); /* * Controls whether l1d flush based mitigations are enabled, @@ -225,6 +227,7 @@ void __init cpu_select_mitigations(void) gds_select_mitigation(); its_select_mitigation(); bhi_select_mitigation(); + tsa_select_mitigation(); /* * After mitigations are selected, some may need to update their @@ -272,6 +275,7 @@ void __init cpu_select_mitigations(void) gds_apply_mitigation(); its_apply_mitigation(); bhi_apply_mitigation(); + tsa_apply_mitigation(); } /* @@ -637,7 +641,7 @@ static void __init mmio_apply_mitigation(void) * is required irrespective of SMT state. */ if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) - static_branch_enable(&mds_idle_clear); + static_branch_enable(&cpu_buf_idle_clear); if (mmio_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); @@ -1488,6 +1492,94 @@ static void __init its_apply_mitigation(void) } #undef pr_fmt +#define pr_fmt(fmt) "Transient Scheduler Attacks: " fmt + +enum tsa_mitigations { + TSA_MITIGATION_NONE, + TSA_MITIGATION_AUTO, + TSA_MITIGATION_UCODE_NEEDED, + TSA_MITIGATION_USER_KERNEL, + TSA_MITIGATION_VM, + TSA_MITIGATION_FULL, +}; + +static const char * const tsa_strings[] = { + [TSA_MITIGATION_NONE] = "Vulnerable", + [TSA_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", + [TSA_MITIGATION_USER_KERNEL] = "Mitigation: Clear CPU buffers: user/kernel boundary", + [TSA_MITIGATION_VM] = "Mitigation: Clear CPU buffers: VM", + [TSA_MITIGATION_FULL] = "Mitigation: Clear CPU buffers", +}; + +static enum tsa_mitigations tsa_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_TSA) ? TSA_MITIGATION_AUTO : TSA_MITIGATION_NONE; + +static int __init tsa_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + tsa_mitigation = TSA_MITIGATION_NONE; + else if (!strcmp(str, "on")) + tsa_mitigation = TSA_MITIGATION_FULL; + else if (!strcmp(str, "user")) + tsa_mitigation = TSA_MITIGATION_USER_KERNEL; + else if (!strcmp(str, "vm")) + tsa_mitigation = TSA_MITIGATION_VM; + else + pr_err("Ignoring unknown tsa=%s option.\n", str); + + return 0; +} +early_param("tsa", tsa_parse_cmdline); + +static void __init tsa_select_mitigation(void) +{ + if (cpu_mitigations_off() || !boot_cpu_has_bug(X86_BUG_TSA)) { + tsa_mitigation = TSA_MITIGATION_NONE; + return; + } + + if (tsa_mitigation == TSA_MITIGATION_NONE) + return; + + if (!boot_cpu_has(X86_FEATURE_VERW_CLEAR)) { + tsa_mitigation = TSA_MITIGATION_UCODE_NEEDED; + goto out; + } + + if (tsa_mitigation == TSA_MITIGATION_AUTO) + tsa_mitigation = TSA_MITIGATION_FULL; + + /* + * No need to set verw_clear_cpu_buf_mitigation_selected - it + * doesn't fit all cases here and it is not needed because this + * is the only VERW-based mitigation on AMD. + */ +out: + pr_info("%s\n", tsa_strings[tsa_mitigation]); +} + +static void __init tsa_apply_mitigation(void) +{ + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + break; + case TSA_MITIGATION_VM: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + case TSA_MITIGATION_FULL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + default: + break; + } +} + +#undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = @@ -2249,10 +2341,10 @@ static void update_mds_branch_idle(void) return; if (sched_smt_active()) { - static_branch_enable(&mds_idle_clear); + static_branch_enable(&cpu_buf_idle_clear); } else if (mmio_mitigation == MMIO_MITIGATION_OFF || (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) { - static_branch_disable(&mds_idle_clear); + static_branch_disable(&cpu_buf_idle_clear); } } @@ -2316,6 +2408,25 @@ void cpu_bugs_smt_update(void) break; } + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + case TSA_MITIGATION_VM: + case TSA_MITIGATION_AUTO: + case TSA_MITIGATION_FULL: + /* + * TSA-SQ can potentially lead to info leakage between + * SMT threads. + */ + if (sched_smt_active()) + static_branch_enable(&cpu_buf_idle_clear); + else + static_branch_disable(&cpu_buf_idle_clear); + break; + case TSA_MITIGATION_NONE: + case TSA_MITIGATION_UCODE_NEEDED: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -3265,6 +3376,11 @@ static ssize_t gds_show_state(char *buf) return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]); } +static ssize_t tsa_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -3328,6 +3444,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_ITS: return its_show_state(buf); + case X86_BUG_TSA: + return tsa_show_state(buf); + default: break; } @@ -3414,6 +3533,11 @@ ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_att { return cpu_show_common(dev, attr, buf, X86_BUG_ITS); } + +ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_TSA); +} #endif void __warn_thunk(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 27125e009847..fb50c1dd53ef 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1233,6 +1233,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define ITS BIT(8) /* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */ #define ITS_NATIVE_ONLY BIT(9) +/* CPU is affected by Transient Scheduler Attacks */ +#define TSA BIT(10) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS), @@ -1280,7 +1282,7 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_AMD(0x16, RETBLEED), VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO), VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO), - VULNBL_AMD(0x19, SRSO), + VULNBL_AMD(0x19, SRSO | TSA), VULNBL_AMD(0x1a, SRSO), {} }; @@ -1530,6 +1532,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY); } + if (c->x86_vendor == X86_VENDOR_AMD) { + if (!cpu_has(c, X86_FEATURE_TSA_SQ_NO) || + !cpu_has(c, X86_FEATURE_TSA_L1_NO)) { + if (cpu_matches(cpu_vuln_blacklist, TSA) || + /* Enable bug on Zen guests to allow for live migration. */ + (cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_ZEN))) + setup_force_cpu_bug(X86_BUG_TSA); + } + } + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 9d852c3b2cb5..5c4eb28c3ac9 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -350,7 +350,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu) struct thresh_restart { struct threshold_block *b; - int reset; int set_lvt_off; int lvt_off; u16 old_limit; @@ -432,13 +431,13 @@ static void threshold_restart_bank(void *_tr) rdmsr(tr->b->address, lo, hi); - if (tr->b->threshold_limit < (hi & THRESHOLD_MAX)) - tr->reset = 1; /* limit cannot be lower than err count */ - - if (tr->reset) { /* reset err count and overflow bit */ - hi = - (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | - (THRESHOLD_MAX - tr->b->threshold_limit); + /* + * Reset error count and overflow bit. + * This is done during init or after handling an interrupt. + */ + if (hi & MASK_OVERFLOW_HI || tr->set_lvt_off) { + hi &= ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI); + hi |= THRESHOLD_MAX - tr->b->threshold_limit; } else if (tr->old_limit) { /* change limit w/o reset */ int new_count = (hi & THRESHOLD_MAX) + (tr->old_limit - tr->b->threshold_limit); @@ -1113,13 +1112,20 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol } bank_type = smca_get_bank_type(cpu, bank); - if (bank_type >= N_SMCA_BANK_TYPES) - return NULL; if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) { if (b->block < ARRAY_SIZE(smca_umc_block_names)) return smca_umc_block_names[b->block]; - return NULL; + } + + if (b && b->block) { + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_block_%u", b->block); + return buf_mcatype; + } + + if (bank_type >= N_SMCA_BANK_TYPES) { + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_bank_%u", bank); + return buf_mcatype; } if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index e9b3c5d4a52e..4da4eab56c81 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1740,6 +1740,11 @@ static void mc_poll_banks_default(void) void (*mc_poll_banks)(void) = mc_poll_banks_default; +static bool should_enable_timer(unsigned long iv) +{ + return !mca_cfg.ignore_ce && iv; +} + static void mce_timer_fn(struct timer_list *t) { struct timer_list *cpu_t = this_cpu_ptr(&mce_timer); @@ -1763,7 +1768,7 @@ static void mce_timer_fn(struct timer_list *t) if (mce_get_storm_mode()) { __start_timer(t, HZ); - } else { + } else if (should_enable_timer(iv)) { __this_cpu_write(mce_next_interval, iv); __start_timer(t, iv); } @@ -2156,11 +2161,10 @@ static void mce_start_timer(struct timer_list *t) { unsigned long iv = check_interval * HZ; - if (mca_cfg.ignore_ce || !iv) - return; - - this_cpu_write(mce_next_interval, iv); - __start_timer(t, iv); + if (should_enable_timer(iv)) { + this_cpu_write(mce_next_interval, iv); + __start_timer(t, iv); + } } static void __mcheck_cpu_setup_timer(void) @@ -2801,15 +2805,9 @@ static int mce_cpu_dead(unsigned int cpu) static int mce_cpu_online(unsigned int cpu) { struct timer_list *t = this_cpu_ptr(&mce_timer); - int ret; mce_device_create(cpu); - - ret = mce_threshold_create_device(cpu); - if (ret) { - mce_device_remove(cpu); - return ret; - } + mce_threshold_create_device(cpu); mce_reenable_cpu(); mce_start_timer(t); return 0; diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index efcf21e9552e..9b149b9c4109 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -478,6 +478,7 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c) void mce_intel_feature_clear(struct cpuinfo_x86 *c) { intel_clear_lmce(); + cmci_clear(); } bool intel_filter_mce(struct mce *m) diff --git a/arch/x86/kernel/cpu/microcode/amd_shas.c b/arch/x86/kernel/cpu/microcode/amd_shas.c index 2a1655b1fdd8..1fd349cfc802 100644 --- a/arch/x86/kernel/cpu/microcode/amd_shas.c +++ b/arch/x86/kernel/cpu/microcode/amd_shas.c @@ -231,6 +231,13 @@ static const struct patch_digest phashes[] = { 0x0d,0x5b,0x65,0x34,0x69,0xb2,0x62,0x21, } }, + { 0xa0011d7, { + 0x35,0x07,0xcd,0x40,0x94,0xbc,0x81,0x6b, + 0xfc,0x61,0x56,0x1a,0xe2,0xdb,0x96,0x12, + 0x1c,0x1c,0x31,0xb1,0x02,0x6f,0xe5,0xd2, + 0xfe,0x1b,0x04,0x03,0x2c,0x8f,0x4c,0x36, + } + }, { 0xa001223, { 0xfb,0x32,0x5f,0xc6,0x83,0x4f,0x8c,0xb8, 0xa4,0x05,0xf9,0x71,0x53,0x01,0x16,0xc4, @@ -294,6 +301,13 @@ static const struct patch_digest phashes[] = { 0xc0,0xcd,0x33,0xf2,0x8d,0xf9,0xef,0x59, } }, + { 0xa00123b, { + 0xef,0xa1,0x1e,0x71,0xf1,0xc3,0x2c,0xe2, + 0xc3,0xef,0x69,0x41,0x7a,0x54,0xca,0xc3, + 0x8f,0x62,0x84,0xee,0xc2,0x39,0xd9,0x28, + 0x95,0xa7,0x12,0x49,0x1e,0x30,0x71,0x72, + } + }, { 0xa00820c, { 0xa8,0x0c,0x81,0xc0,0xa6,0x00,0xe7,0xf3, 0x5f,0x65,0xd3,0xb9,0x6f,0xea,0x93,0x63, @@ -301,6 +315,13 @@ static const struct patch_digest phashes[] = { 0xe1,0x3b,0x8d,0xb2,0xf8,0x22,0x03,0xe2, } }, + { 0xa00820d, { + 0xf9,0x2a,0xc0,0xf4,0x9e,0xa4,0x87,0xa4, + 0x7d,0x87,0x00,0xfd,0xab,0xda,0x19,0xca, + 0x26,0x51,0x32,0xc1,0x57,0x91,0xdf,0xc1, + 0x05,0xeb,0x01,0x7c,0x5a,0x95,0x21,0xb7, + } + }, { 0xa10113e, { 0x05,0x3c,0x66,0xd7,0xa9,0x5a,0x33,0x10, 0x1b,0xf8,0x9c,0x8f,0xed,0xfc,0xa7,0xa0, @@ -322,6 +343,13 @@ static const struct patch_digest phashes[] = { 0xf1,0x5e,0xb0,0xde,0xb4,0x98,0xae,0xc4, } }, + { 0xa10114c, { + 0x9e,0xb6,0xa2,0xd9,0x87,0x38,0xc5,0x64, + 0xd8,0x88,0xfa,0x78,0x98,0xf9,0x6f,0x74, + 0x39,0x90,0x1b,0xa5,0xcf,0x5e,0xb4,0x2a, + 0x02,0xff,0xd4,0x8c,0x71,0x8b,0xe2,0xc0, + } + }, { 0xa10123e, { 0x03,0xb9,0x2c,0x76,0x48,0x93,0xc9,0x18, 0xfb,0x56,0xfd,0xf7,0xe2,0x1d,0xca,0x4d, @@ -343,6 +371,13 @@ static const struct patch_digest phashes[] = { 0x1b,0x7d,0x64,0x9d,0x4b,0x53,0x13,0x75, } }, + { 0xa10124c, { + 0x29,0xea,0xf1,0x2c,0xb2,0xe4,0xef,0x90, + 0xa4,0xcd,0x1d,0x86,0x97,0x17,0x61,0x46, + 0xfc,0x22,0xcb,0x57,0x75,0x19,0xc8,0xcc, + 0x0c,0xf5,0xbc,0xac,0x81,0x9d,0x9a,0xd2, + } + }, { 0xa108108, { 0xed,0xc2,0xec,0xa1,0x15,0xc6,0x65,0xe9, 0xd0,0xef,0x39,0xaa,0x7f,0x55,0x06,0xc6, @@ -350,6 +385,13 @@ static const struct patch_digest phashes[] = { 0x28,0x1e,0x9c,0x59,0x69,0x99,0x4d,0x16, } }, + { 0xa108109, { + 0x85,0xb4,0xbd,0x7c,0x49,0xa7,0xbd,0xfa, + 0x49,0x36,0x80,0x81,0xc5,0xb7,0x39,0x1b, + 0x9a,0xaa,0x50,0xde,0x9b,0xe9,0x32,0x35, + 0x42,0x7e,0x51,0x4f,0x52,0x2c,0x28,0x59, + } + }, { 0xa20102d, { 0xf9,0x6e,0xf2,0x32,0xd3,0x0f,0x5f,0x11, 0x59,0xa1,0xfe,0xcc,0xcd,0x9b,0x42,0x89, @@ -357,6 +399,13 @@ static const struct patch_digest phashes[] = { 0x8c,0xe9,0x19,0x3e,0xcc,0x3f,0x7b,0xb4, } }, + { 0xa20102e, { + 0xbe,0x1f,0x32,0x04,0x0d,0x3c,0x9c,0xdd, + 0xe1,0xa4,0xbf,0x76,0x3a,0xec,0xc2,0xf6, + 0x11,0x00,0xa7,0xaf,0x0f,0xe5,0x02,0xc5, + 0x54,0x3a,0x1f,0x8c,0x16,0xb5,0xff,0xbe, + } + }, { 0xa201210, { 0xe8,0x6d,0x51,0x6a,0x8e,0x72,0xf3,0xfe, 0x6e,0x16,0xbc,0x62,0x59,0x40,0x17,0xe9, @@ -364,6 +413,13 @@ static const struct patch_digest phashes[] = { 0xf7,0x55,0xf0,0x13,0xbb,0x22,0xf6,0x41, } }, + { 0xa201211, { + 0x69,0xa1,0x17,0xec,0xd0,0xf6,0x6c,0x95, + 0xe2,0x1e,0xc5,0x59,0x1a,0x52,0x0a,0x27, + 0xc4,0xed,0xd5,0x59,0x1f,0xbf,0x00,0xff, + 0x08,0x88,0xb5,0xe1,0x12,0xb6,0xcc,0x27, + } + }, { 0xa404107, { 0xbb,0x04,0x4e,0x47,0xdd,0x5e,0x26,0x45, 0x1a,0xc9,0x56,0x24,0xa4,0x4c,0x82,0xb0, @@ -371,6 +427,13 @@ static const struct patch_digest phashes[] = { 0x13,0xbc,0xc5,0x25,0xe4,0xc5,0xc3,0x99, } }, + { 0xa404108, { + 0x69,0x67,0x43,0x06,0xf8,0x0c,0x62,0xdc, + 0xa4,0x21,0x30,0x4f,0x0f,0x21,0x2c,0xcb, + 0xcc,0x37,0xf1,0x1c,0xc3,0xf8,0x2f,0x19, + 0xdf,0x53,0x53,0x46,0xb1,0x15,0xea,0x00, + } + }, { 0xa500011, { 0x23,0x3d,0x70,0x7d,0x03,0xc3,0xc4,0xf4, 0x2b,0x82,0xc6,0x05,0xda,0x80,0x0a,0xf1, @@ -378,6 +441,13 @@ static const struct patch_digest phashes[] = { 0x11,0x5e,0x96,0x7e,0x71,0xe9,0xfc,0x74, } }, + { 0xa500012, { + 0xeb,0x74,0x0d,0x47,0xa1,0x8e,0x09,0xe4, + 0x93,0x4c,0xad,0x03,0x32,0x4c,0x38,0x16, + 0x10,0x39,0xdd,0x06,0xaa,0xce,0xd6,0x0f, + 0x62,0x83,0x9d,0x8e,0x64,0x55,0xbe,0x63, + } + }, { 0xa601209, { 0x66,0x48,0xd4,0x09,0x05,0xcb,0x29,0x32, 0x66,0xb7,0x9a,0x76,0xcd,0x11,0xf3,0x30, @@ -385,6 +455,13 @@ static const struct patch_digest phashes[] = { 0xe8,0x73,0xe2,0xd6,0xdb,0xd2,0x77,0x1d, } }, + { 0xa60120a, { + 0x0c,0x8b,0x3d,0xfd,0x52,0x52,0x85,0x7d, + 0x20,0x3a,0xe1,0x7e,0xa4,0x21,0x3b,0x7b, + 0x17,0x86,0xae,0xac,0x13,0xb8,0x63,0x9d, + 0x06,0x01,0xd0,0xa0,0x51,0x9a,0x91,0x2c, + } + }, { 0xa704107, { 0xf3,0xc6,0x58,0x26,0xee,0xac,0x3f,0xd6, 0xce,0xa1,0x72,0x47,0x3b,0xba,0x2b,0x93, @@ -392,6 +469,13 @@ static const struct patch_digest phashes[] = { 0x64,0x39,0x71,0x8c,0xce,0xe7,0x41,0x39, } }, + { 0xa704108, { + 0xd7,0x55,0x15,0x2b,0xfe,0xc4,0xbc,0x93, + 0xec,0x91,0xa0,0xae,0x45,0xb7,0xc3,0x98, + 0x4e,0xff,0x61,0x77,0x88,0xc2,0x70,0x49, + 0xe0,0x3a,0x1d,0x84,0x38,0x52,0xbf,0x5a, + } + }, { 0xa705206, { 0x8d,0xc0,0x76,0xbd,0x58,0x9f,0x8f,0xa4, 0x12,0x9d,0x21,0xfb,0x48,0x21,0xbc,0xe7, @@ -399,6 +483,13 @@ static const struct patch_digest phashes[] = { 0x03,0x35,0xe9,0xbe,0xfb,0x06,0xdf,0xfc, } }, + { 0xa705208, { + 0x30,0x1d,0x55,0x24,0xbc,0x6b,0x5a,0x19, + 0x0c,0x7d,0x1d,0x74,0xaa,0xd1,0xeb,0xd2, + 0x16,0x62,0xf7,0x5b,0xe1,0x1f,0x18,0x11, + 0x5c,0xf0,0x94,0x90,0x26,0xec,0x69,0xff, + } + }, { 0xa708007, { 0x6b,0x76,0xcc,0x78,0xc5,0x8a,0xa3,0xe3, 0x32,0x2d,0x79,0xe4,0xc3,0x80,0xdb,0xb2, @@ -406,6 +497,13 @@ static const struct patch_digest phashes[] = { 0xdf,0x92,0x73,0x84,0x87,0x3c,0x73,0x93, } }, + { 0xa708008, { + 0x08,0x6e,0xf0,0x22,0x4b,0x8e,0xc4,0x46, + 0x58,0x34,0xe6,0x47,0xa2,0x28,0xfd,0xab, + 0x22,0x3d,0xdd,0xd8,0x52,0x9e,0x1d,0x16, + 0xfa,0x01,0x68,0x14,0x79,0x3e,0xe8,0x6b, + } + }, { 0xa70c005, { 0x88,0x5d,0xfb,0x79,0x64,0xd8,0x46,0x3b, 0x4a,0x83,0x8e,0x77,0x7e,0xcf,0xb3,0x0f, @@ -413,6 +511,13 @@ static const struct patch_digest phashes[] = { 0xee,0x49,0xac,0xe1,0x8b,0x13,0xc5,0x13, } }, + { 0xa70c008, { + 0x0f,0xdb,0x37,0xa1,0x10,0xaf,0xd4,0x21, + 0x94,0x0d,0xa4,0xa2,0xe9,0x86,0x6c,0x0e, + 0x85,0x7c,0x36,0x30,0xa3,0x3a,0x78,0x66, + 0x18,0x10,0x60,0x0d,0x78,0x3d,0x44,0xd0, + } + }, { 0xaa00116, { 0xe8,0x4c,0x2c,0x88,0xa1,0xac,0x24,0x63, 0x65,0xe5,0xaa,0x2d,0x16,0xa9,0xc3,0xf5, @@ -441,4 +546,11 @@ static const struct patch_digest phashes[] = { 0x68,0x2f,0x46,0xee,0xfe,0xc6,0x6d,0xef, } }, + { 0xaa00216, { + 0x79,0xfb,0x5b,0x9f,0xb6,0xe6,0xa8,0xf5, + 0x4e,0x7c,0x4f,0x8e,0x1d,0xad,0xd0,0x08, + 0xc2,0x43,0x7c,0x8b,0xe6,0xdb,0xd0,0xd2, + 0xe8,0x39,0x26,0xc1,0xe5,0x5a,0x48,0xf1, + } + }, }; diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index dbf6d71bdf18..b4a1f6732a3a 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -50,6 +50,8 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, + { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, + { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 }, { X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 704883c21f3a..a838be04f874 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -907,16 +907,24 @@ static __init bool prefer_mwait_c1_over_halt(void) */ static __cpuidle void mwait_idle(void) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (!current_set_polling_and_test()) { const void *addr = ¤t_thread_info()->flags; alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); __monitor(addr, 0, 0); - if (!need_resched()) { - __sti_mwait(0, 0); - raw_local_irq_disable(); - } + if (need_resched()) + goto out; + + __sti_mwait(0, 0); + raw_local_irq_disable(); } + +out: __current_clr_polling(); } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b2d006756e02..f84bc0569c9c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1165,6 +1165,8 @@ void kvm_set_cpu_caps(void) */ SYNTHESIZED_F(LFENCE_RDTSC), /* SmmPgCfgLock */ + /* 4: Resv */ + SYNTHESIZED_F(VERW_CLEAR), F(NULL_SEL_CLR_BASE), /* UpperAddressIgnore */ F(AUTOIBRS), @@ -1179,6 +1181,11 @@ void kvm_set_cpu_caps(void) F(SRSO_USER_KERNEL_NO), ); + kvm_cpu_cap_init(CPUID_8000_0021_ECX, + SYNTHESIZED_F(TSA_SQ_NO), + SYNTHESIZED_F(TSA_L1_NO), + ); + kvm_cpu_cap_init(CPUID_8000_0022_EAX, F(PERFMON_V2), ); @@ -1748,8 +1755,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; case 0x80000021: - entry->ebx = entry->ecx = entry->edx = 0; + entry->ebx = entry->edx = 0; cpuid_entry_override(entry, CPUID_8000_0021_EAX); + cpuid_entry_override(entry, CPUID_8000_0021_ECX); break; /* AMD Extended Performance Monitoring and Debug */ case 0x80000022: { diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 24f0318c50d7..ee27064dd72f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1979,6 +1979,9 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY) goto out_flush_all; + if (is_noncanonical_invlpg_address(entries[i], vcpu)) + continue; + /* * Lower 12 bits of 'address' encode the number of additional * pages to flush. @@ -2001,11 +2004,11 @@ out_flush_all: static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) { struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + unsigned long *vcpu_mask = hv_vcpu->vcpu_mask; u64 *sparse_banks = hv_vcpu->sparse_banks; struct kvm *kvm = vcpu->kvm; struct hv_tlb_flush_ex flush_ex; struct hv_tlb_flush flush; - DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; /* * Normally, there can be no more than 'KVM_HV_TLB_FLUSH_FIFO_SIZE' diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index fde0ae986003..c53b92379e6e 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -52,6 +52,10 @@ /* CPUID level 0x80000022 (EAX) */ #define KVM_X86_FEATURE_PERFMON_V2 KVM_X86_FEATURE(CPUID_8000_0022_EAX, 0) +/* CPUID level 0x80000021 (ECX) */ +#define KVM_X86_FEATURE_TSA_SQ_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 1) +#define KVM_X86_FEATURE_TSA_L1_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 2) + struct cpuid_reg { u32 function; u32 index; @@ -82,6 +86,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX}, [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, [CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX}, + [CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX}, }; /* @@ -121,6 +126,8 @@ static __always_inline u32 __feature_translate(int x86_feature) KVM_X86_TRANSLATE_FEATURE(PERFMON_V2); KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL); KVM_X86_TRANSLATE_FEATURE(BHI_CTRL); + KVM_X86_TRANSLATE_FEATURE(TSA_SQ_NO); + KVM_X86_TRANSLATE_FEATURE(TSA_L1_NO); default: return x86_feature; } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 459c3b791fd4..b201f77fcd49 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1971,6 +1971,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src) struct kvm_vcpu *src_vcpu; unsigned long i; + if (src->created_vcpus != atomic_read(&src->online_vcpus) || + dst->created_vcpus != atomic_read(&dst->online_vcpus)) + return -EBUSY; + if (!sev_es_guest(src)) return 0; @@ -4445,8 +4449,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) * the VMSA will be NULL if this vCPU is the destination for intrahost * migration, and will be copied later. */ - if (svm->sev_es.vmsa && !svm->sev_es.snp_has_guest_vmsa) - svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); + if (!svm->sev_es.snp_has_guest_vmsa) { + if (svm->sev_es.vmsa) + svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); + else + svm->vmcb->control.vmsa_pa = INVALID_PAGE; + } if (cpu_feature_enabled(X86_FEATURE_ALLOWED_SEV_FEATURES)) svm->vmcb->control.allowed_sev_features = sev->vmsa_features | diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 0c61153b275f..235c4af6b692 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -169,6 +169,9 @@ SYM_FUNC_START(__svm_vcpu_run) #endif mov VCPU_RDI(%_ASM_DI), %_ASM_DI + /* Clobbers EFLAGS.ZF */ + VM_CLEAR_CPU_BUFFERS + /* Enter guest mode */ 3: vmrun %_ASM_AX 4: @@ -335,6 +338,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) mov SVM_current_vmcb(%rdi), %rax mov KVM_VMCB_pa(%rax), %rax + /* Clobbers EFLAGS.ZF */ + VM_CLEAR_CPU_BUFFERS + /* Enter guest mode */ 1: vmrun %rax 2: diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 1ad20c273f3b..ec79aacc446f 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -173,6 +173,8 @@ static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char i tdx_clear_unsupported_cpuid(entry); } +#define TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT BIT(1) + static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf, struct kvm_tdx_capabilities *caps) { @@ -188,6 +190,9 @@ static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf, caps->cpuid.nent = td_conf->num_cpuid_config; + caps->user_tdvmcallinfo_1_r11 = + TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT; + for (i = 0; i < td_conf->num_cpuid_config; i++) td_init_cpuid_entry2(&caps->cpuid.entries[i], i); @@ -1530,6 +1535,27 @@ static int tdx_get_quote(struct kvm_vcpu *vcpu) return 0; } +static int tdx_setup_event_notify_interrupt(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + u64 vector = tdx->vp_enter_args.r12; + + if (vector < 32 || vector > 255) { + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); + return 1; + } + + vcpu->run->exit_reason = KVM_EXIT_TDX; + vcpu->run->tdx.flags = 0; + vcpu->run->tdx.nr = TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT; + vcpu->run->tdx.setup_event_notify.ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED; + vcpu->run->tdx.setup_event_notify.vector = vector; + + vcpu->arch.complete_userspace_io = tdx_complete_simple; + + return 0; +} + static int handle_tdvmcall(struct kvm_vcpu *vcpu) { switch (tdvmcall_leaf(vcpu)) { @@ -1541,6 +1567,8 @@ static int handle_tdvmcall(struct kvm_vcpu *vcpu) return tdx_get_td_vm_call_info(vcpu); case TDVMCALL_GET_QUOTE: return tdx_get_quote(vcpu); + case TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: + return tdx_setup_event_notify_interrupt(vcpu); default: break; } @@ -2241,25 +2269,26 @@ static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd) const struct tdx_sys_info_td_conf *td_conf = &tdx_sysinfo->td_conf; struct kvm_tdx_capabilities __user *user_caps; struct kvm_tdx_capabilities *caps = NULL; + u32 nr_user_entries; int ret = 0; /* flags is reserved for future use */ if (cmd->flags) return -EINVAL; - caps = kmalloc(sizeof(*caps) + + caps = kzalloc(sizeof(*caps) + sizeof(struct kvm_cpuid_entry2) * td_conf->num_cpuid_config, GFP_KERNEL); if (!caps) return -ENOMEM; user_caps = u64_to_user_ptr(cmd->data); - if (copy_from_user(caps, user_caps, sizeof(*caps))) { + if (get_user(nr_user_entries, &user_caps->cpuid.nent)) { ret = -EFAULT; goto out; } - if (caps->cpuid.nent < td_conf->num_cpuid_config) { + if (nr_user_entries < td_conf->num_cpuid_config) { ret = -E2BIG; goto out; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4953846cb30d..191a9ed0da22 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7291,7 +7291,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vmx_l1d_flush(vcpu); else if (static_branch_unlikely(&cpu_buf_vm_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) - mds_clear_cpu_buffers(); + x86_clear_cpu_buffers(); vmx_disable_fb_clear(vmx); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a9d992d5652f..93636f77c42d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3258,9 +3258,11 @@ int kvm_guest_time_update(struct kvm_vcpu *v) /* With all the info we got, fill in the values */ - if (kvm_caps.has_tsc_control) + if (kvm_caps.has_tsc_control) { tgt_tsc_khz = kvm_scale_tsc(tgt_tsc_khz, v->arch.l1_tsc_scaling_ratio); + tgt_tsc_khz = tgt_tsc_khz ? : 1; + } if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) { kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL, @@ -6186,6 +6188,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp, u32 user_tsc_khz; r = -EINVAL; + + if (vcpu->arch.guest_tsc_protected) + goto out; + user_tsc_khz = (u32)arg; if (kvm_caps.has_tsc_control && diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 9b029bb29a16..5fa2cca43653 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1971,8 +1971,19 @@ int kvm_xen_setup_evtchn(struct kvm *kvm, { struct kvm_vcpu *vcpu; - if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm)) - return -EINVAL; + /* + * Don't check for the port being within range of max_evtchn_port(). + * Userspace can configure what ever targets it likes; events just won't + * be delivered if/while the target is invalid, just like userspace can + * configure MSIs which target non-existent APICs. + * + * This allow on Live Migration and Live Update, the IRQ routing table + * can be restored *independently* of other things like creating vCPUs, + * without imposing an ordering dependency on userspace. In this + * particular case, the problematic ordering would be with setting the + * Xen 'long mode' flag, which changes max_evtchn_port() to allow 4096 + * instead of 1024 event channels. + */ /* We only support 2 level event channels for now */ if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) |