diff options
Diffstat (limited to 'arch')
216 files changed, 1482 insertions, 2408 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 072a1b39e3af..2d0e7099eb3f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -468,6 +468,9 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM config ARCH_HAVE_NMI_SAFE_CMPXCHG bool +config ARCH_HAS_NMI_SAFE_THIS_CPU_OPS + bool + config HAVE_ALIGNED_STRUCT_PAGE bool help diff --git a/arch/arm/boot/dts/am335x-pcm-953.dtsi b/arch/arm/boot/dts/am335x-pcm-953.dtsi index dae448040a97..947497413977 100644 --- a/arch/arm/boot/dts/am335x-pcm-953.dtsi +++ b/arch/arm/boot/dts/am335x-pcm-953.dtsi @@ -12,22 +12,20 @@ compatible = "phytec,am335x-pcm-953", "phytec,am335x-phycore-som", "ti,am33xx"; /* Power */ - regulators { - vcc3v3: fixedregulator@1 { - compatible = "regulator-fixed"; - regulator-name = "vcc3v3"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - regulator-boot-on; - }; + vcc3v3: fixedregulator1 { + compatible = "regulator-fixed"; + regulator-name = "vcc3v3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-boot-on; + }; - vcc1v8: fixedregulator@2 { - compatible = "regulator-fixed"; - regulator-name = "vcc1v8"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-boot-on; - }; + vcc1v8: fixedregulator2 { + compatible = "regulator-fixed"; + regulator-name = "vcc1v8"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-boot-on; }; /* User IO */ diff --git a/arch/arm/boot/dts/at91rm9200.dtsi b/arch/arm/boot/dts/at91rm9200.dtsi index 7a113325abb9..6f9004ebf424 100644 --- a/arch/arm/boot/dts/at91rm9200.dtsi +++ b/arch/arm/boot/dts/at91rm9200.dtsi @@ -666,7 +666,7 @@ compatible = "atmel,at91rm9200-udc"; reg = <0xfffb0000 0x4000>; interrupts = <11 IRQ_TYPE_LEVEL_HIGH 2>; - clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 2>; + clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 1>; clock-names = "pclk", "hclk"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi index 60d61291f344..024af2db638e 100644 --- a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi +++ b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi @@ -39,6 +39,13 @@ }; + usb1 { + pinctrl_usb1_vbus_gpio: usb1_vbus_gpio { + atmel,pins = + <AT91_PIOC 5 AT91_PERIPH_GPIO AT91_PINCTRL_DEGLITCH>; /* PC5 GPIO */ + }; + }; + mmc0_slot1 { pinctrl_board_mmc0_slot1: mmc0_slot1-board { atmel,pins = @@ -84,6 +91,8 @@ }; usb1: gadget@fffa4000 { + pinctrl-0 = <&pinctrl_usb1_vbus_gpio>; + pinctrl-names = "default"; atmel,vbus-gpio = <&pioC 5 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6q-prti6q.dts b/arch/arm/boot/dts/imx6q-prti6q.dts index b4605edfd2ab..d8fa83effd63 100644 --- a/arch/arm/boot/dts/imx6q-prti6q.dts +++ b/arch/arm/boot/dts/imx6q-prti6q.dts @@ -364,8 +364,8 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_wifi>; interrupts-extended = <&gpio1 30 IRQ_TYPE_LEVEL_HIGH>; - ref-clock-frequency = "38400000"; - tcxo-clock-frequency = "19200000"; + ref-clock-frequency = <38400000>; + tcxo-clock-frequency = <19200000>; }; }; diff --git a/arch/arm/boot/dts/lan966x-pcb8291.dts b/arch/arm/boot/dts/lan966x-pcb8291.dts index f4f054cdf2a8..3a3d76af8612 100644 --- a/arch/arm/boot/dts/lan966x-pcb8291.dts +++ b/arch/arm/boot/dts/lan966x-pcb8291.dts @@ -69,6 +69,12 @@ pins = "GPIO_35", "GPIO_36"; function = "can0_b"; }; + + sgpio_a_pins: sgpio-a-pins { + /* SCK, D0, D1, LD */ + pins = "GPIO_32", "GPIO_33", "GPIO_34", "GPIO_35"; + function = "sgpio_a"; + }; }; &can0 { @@ -118,6 +124,20 @@ status = "okay"; }; +&sgpio { + pinctrl-0 = <&sgpio_a_pins>; + pinctrl-names = "default"; + microchip,sgpio-port-ranges = <0 3>, <8 11>; + status = "okay"; + + gpio@0 { + ngpios = <64>; + }; + gpio@1 { + ngpios = <64>; + }; +}; + &switch { status = "okay"; }; diff --git a/arch/arm/boot/dts/rk3036-evb.dts b/arch/arm/boot/dts/rk3036-evb.dts index 9fd4d9db9f8f..becdc0b664bf 100644 --- a/arch/arm/boot/dts/rk3036-evb.dts +++ b/arch/arm/boot/dts/rk3036-evb.dts @@ -35,11 +35,10 @@ &i2c1 { status = "okay"; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; }; }; diff --git a/arch/arm/boot/dts/rk3066a-mk808.dts b/arch/arm/boot/dts/rk3066a-mk808.dts index cfa318a506eb..2db5ba706208 100644 --- a/arch/arm/boot/dts/rk3066a-mk808.dts +++ b/arch/arm/boot/dts/rk3066a-mk808.dts @@ -32,7 +32,7 @@ keyup-threshold-microvolt = <2500000>; poll-interval = <100>; - recovery { + button-recovery { label = "recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <0>; diff --git a/arch/arm/boot/dts/rk3188-radxarock.dts b/arch/arm/boot/dts/rk3188-radxarock.dts index e7cf18823558..118deacd38c4 100644 --- a/arch/arm/boot/dts/rk3188-radxarock.dts +++ b/arch/arm/boot/dts/rk3188-radxarock.dts @@ -71,7 +71,7 @@ #sound-dai-cells = <0>; }; - ir_recv: gpio-ir-receiver { + ir_recv: ir-receiver { compatible = "gpio-ir-receiver"; gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/rk3188.dtsi b/arch/arm/boot/dts/rk3188.dtsi index cdd4a0bd5133..44b54af0bbf9 100644 --- a/arch/arm/boot/dts/rk3188.dtsi +++ b/arch/arm/boot/dts/rk3188.dtsi @@ -379,7 +379,7 @@ rockchip,pins = <2 RK_PD3 1 &pcfg_pull_none>; }; - lcdc1_rgb24: ldcd1-rgb24 { + lcdc1_rgb24: lcdc1-rgb24 { rockchip,pins = <2 RK_PA0 1 &pcfg_pull_none>, <2 RK_PA1 1 &pcfg_pull_none>, <2 RK_PA2 1 &pcfg_pull_none>, @@ -607,7 +607,6 @@ &global_timer { interrupts = <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>; - status = "disabled"; }; &local_timer { diff --git a/arch/arm/boot/dts/rk3288-evb-act8846.dts b/arch/arm/boot/dts/rk3288-evb-act8846.dts index be695b8c1f67..8a635c243127 100644 --- a/arch/arm/boot/dts/rk3288-evb-act8846.dts +++ b/arch/arm/boot/dts/rk3288-evb-act8846.dts @@ -54,7 +54,7 @@ vin-supply = <&vcc_sys>; }; - hym8563@51 { + rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; diff --git a/arch/arm/boot/dts/rk3288-evb.dtsi b/arch/arm/boot/dts/rk3288-evb.dtsi index 399d6b9c5fd4..382d2839cf47 100644 --- a/arch/arm/boot/dts/rk3288-evb.dtsi +++ b/arch/arm/boot/dts/rk3288-evb.dtsi @@ -28,19 +28,19 @@ press-threshold-microvolt = <300000>; }; - menu { + button-menu { label = "Menu"; linux,code = <KEY_MENU>; press-threshold-microvolt = <640000>; }; - esc { + button-esc { label = "Esc"; linux,code = <KEY_ESC>; press-threshold-microvolt = <1000000>; }; - home { + button-home { label = "Home"; linux,code = <KEY_HOME>; press-threshold-microvolt = <1300000>; diff --git a/arch/arm/boot/dts/rk3288-firefly.dtsi b/arch/arm/boot/dts/rk3288-firefly.dtsi index 052afe5543e2..3836c61cfb76 100644 --- a/arch/arm/boot/dts/rk3288-firefly.dtsi +++ b/arch/arm/boot/dts/rk3288-firefly.dtsi @@ -233,11 +233,10 @@ vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; interrupt-parent = <&gpio7>; interrupts = <RK_PA4 IRQ_TYPE_EDGE_FALLING>; diff --git a/arch/arm/boot/dts/rk3288-miqi.dts b/arch/arm/boot/dts/rk3288-miqi.dts index 713f55e143c6..db1eb648e0e1 100644 --- a/arch/arm/boot/dts/rk3288-miqi.dts +++ b/arch/arm/boot/dts/rk3288-miqi.dts @@ -162,11 +162,10 @@ vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; }; diff --git a/arch/arm/boot/dts/rk3288-rock2-square.dts b/arch/arm/boot/dts/rk3288-rock2-square.dts index 80e0f07c8e87..13cfdaa95cc7 100644 --- a/arch/arm/boot/dts/rk3288-rock2-square.dts +++ b/arch/arm/boot/dts/rk3288-rock2-square.dts @@ -165,11 +165,10 @@ }; &i2c0 { - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; interrupt-parent = <&gpio0>; interrupts = <RK_PA4 IRQ_TYPE_EDGE_FALLING>; diff --git a/arch/arm/boot/dts/rk3288-vmarc-som.dtsi b/arch/arm/boot/dts/rk3288-vmarc-som.dtsi index 0ae2bd150e37..793951655b73 100644 --- a/arch/arm/boot/dts/rk3288-vmarc-som.dtsi +++ b/arch/arm/boot/dts/rk3288-vmarc-som.dtsi @@ -241,7 +241,6 @@ interrupt-parent = <&gpio5>; interrupts = <RK_PC3 IRQ_TYPE_LEVEL_LOW>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "hym8563"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm/boot/dts/rk3xxx.dtsi b/arch/arm/boot/dts/rk3xxx.dtsi index bf285091a9eb..cb4e42ede56a 100644 --- a/arch/arm/boot/dts/rk3xxx.dtsi +++ b/arch/arm/boot/dts/rk3xxx.dtsi @@ -76,6 +76,13 @@ reg = <0x1013c200 0x20>; interrupts = <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_EDGE_RISING)>; clocks = <&cru CORE_PERI>; + status = "disabled"; + /* The clock source and the sched_clock provided by the arm_global_timer + * on Rockchip rk3066a/rk3188 are quite unstable because their rates + * depend on the CPU frequency. + * Keep the arm_global_timer disabled in order to have the + * DW_APB_TIMER (rk3066a) or ROCKCHIP_TIMER (rk3188) selected by default. + */ }; local_timer: local-timer@1013c600 { diff --git a/arch/arm/boot/dts/sama7g5-pinfunc.h b/arch/arm/boot/dts/sama7g5-pinfunc.h index 4eb30445d205..6e87f0d4b8fc 100644 --- a/arch/arm/boot/dts/sama7g5-pinfunc.h +++ b/arch/arm/boot/dts/sama7g5-pinfunc.h @@ -261,7 +261,7 @@ #define PIN_PB2__FLEXCOM6_IO0 PINMUX_PIN(PIN_PB2, 2, 1) #define PIN_PB2__ADTRG PINMUX_PIN(PIN_PB2, 3, 1) #define PIN_PB2__A20 PINMUX_PIN(PIN_PB2, 4, 1) -#define PIN_PB2__FLEXCOM11_IO0 PINMUX_PIN(PIN_PB2, 6, 3) +#define PIN_PB2__FLEXCOM11_IO1 PINMUX_PIN(PIN_PB2, 6, 3) #define PIN_PB3 35 #define PIN_PB3__GPIO PINMUX_PIN(PIN_PB3, 0, 0) #define PIN_PB3__RF1 PINMUX_PIN(PIN_PB3, 1, 1) diff --git a/arch/arm/configs/clps711x_defconfig b/arch/arm/configs/clps711x_defconfig index 92481b2a88fa..adcee238822a 100644 --- a/arch/arm/configs/clps711x_defconfig +++ b/arch/arm/configs/clps711x_defconfig @@ -14,7 +14,8 @@ CONFIG_ARCH_EDB7211=y CONFIG_ARCH_P720T=y CONFIG_AEABI=y # CONFIG_COREDUMP is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/collie_defconfig b/arch/arm/configs/collie_defconfig index 2a2d2cb3ce2e..69341c33e0cc 100644 --- a/arch/arm/configs/collie_defconfig +++ b/arch/arm/configs/collie_defconfig @@ -13,7 +13,8 @@ CONFIG_CMDLINE="noinitrd root=/dev/mtdblock2 rootfstype=jffs2 fbcon=rotate:1" CONFIG_FPE_NWFPE=y CONFIG_PM=y # CONFIG_SWAP is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/multi_v4t_defconfig b/arch/arm/configs/multi_v4t_defconfig index e2fd822f741a..b60000a89aff 100644 --- a/arch/arm/configs/multi_v4t_defconfig +++ b/arch/arm/configs/multi_v4t_defconfig @@ -25,7 +25,8 @@ CONFIG_ARM_CLPS711X_CPUIDLE=y CONFIG_JUMP_LABEL=y CONFIG_PARTITION_ADVANCED=y # CONFIG_COREDUMP is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig index 70511fe4b3ec..246f1bba7df5 100644 --- a/arch/arm/configs/omap1_defconfig +++ b/arch/arm/configs/omap1_defconfig @@ -42,7 +42,8 @@ CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_PARTITION_ADVANCED=y CONFIG_BINFMT_MISC=y # CONFIG_SWAP is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y # CONFIG_VM_EVENT_COUNTERS is not set CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index d60cc9cc4c21..0a0f12df40b5 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -49,7 +49,8 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_LDM_PARTITION=y CONFIG_CMDLINE_PARTITION=y CONFIG_BINFMT_MISC=y -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y # CONFIG_COMPACTION is not set CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/arm/configs/tct_hammer_defconfig b/arch/arm/configs/tct_hammer_defconfig index 3b29ae1fb750..6bd38b6f22c4 100644 --- a/arch/arm/configs/tct_hammer_defconfig +++ b/arch/arm/configs/tct_hammer_defconfig @@ -19,7 +19,8 @@ CONFIG_FPE_NWFPE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_SWAP is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/xcep_defconfig b/arch/arm/configs/xcep_defconfig index ea59e4b6bfc5..6bd9f71b71fc 100644 --- a/arch/arm/configs/xcep_defconfig +++ b/arch/arm/configs/xcep_defconfig @@ -26,7 +26,8 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y # CONFIG_BLOCK is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y # CONFIG_COMPAT_BRK is not set # CONFIG_VM_EVENT_COUNTERS is not set CONFIG_NET=y diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index fe87397c3d8c..bdbc1e590891 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -17,7 +17,7 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_arch_fetch_caller_regs(regs, __ip) { \ (regs)->ARM_pc = (__ip); \ - (regs)->ARM_fp = (unsigned long) __builtin_frame_address(0); \ + frame_pointer((regs)) = (unsigned long) __builtin_frame_address(0); \ (regs)->ARM_sp = current_stack_pointer; \ (regs)->ARM_cpsr = SVC_MODE; \ } diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h index d16aba48fa0a..090011394477 100644 --- a/arch/arm/include/asm/pgtable-nommu.h +++ b/arch/arm/include/asm/pgtable-nommu.h @@ -45,12 +45,6 @@ typedef pte_t *pte_addr_t; /* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -#define ZERO_PAGE(vaddr) (virt_to_page(0)) - -/* * Mark the prot value as uncacheable and unbufferable. */ #define pgprot_noncached(prot) (prot) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 78a532068fec..ef48a55e9af8 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -10,6 +10,15 @@ #include <linux/const.h> #include <asm/proc-fns.h> +#ifndef __ASSEMBLY__ +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern struct page *empty_zero_page; +#define ZERO_PAGE(vaddr) (empty_zero_page) +#endif + #ifndef CONFIG_MMU #include <asm-generic/pgtable-nopud.h> @@ -139,13 +148,6 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, */ #ifndef __ASSEMBLY__ -/* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -extern struct page *empty_zero_page; -#define ZERO_PAGE(vaddr) (empty_zero_page) - extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S index ffed4d949042..e4904faf1753 100644 --- a/arch/arm/mach-at91/pm_suspend.S +++ b/arch/arm/mach-at91/pm_suspend.S @@ -169,10 +169,15 @@ sr_ena_2: cmp tmp1, #UDDRC_STAT_SELFREF_TYPE_SW bne sr_ena_2 - /* Put DDR PHY's DLL in bypass mode for non-backup modes. */ + /* Disable DX DLLs for non-backup modes. */ cmp r7, #AT91_PM_BACKUP beq sr_ena_3 + /* Do not soft reset the AC DLL. */ + ldr tmp1, [r3, DDR3PHY_ACDLLCR] + bic tmp1, tmp1, DDR3PHY_ACDLLCR_DLLSRST + str tmp1, [r3, DDR3PHY_ACDLLCR] + /* Disable DX DLLs. */ ldr tmp1, [r3, #DDR3PHY_DX0DLLCR] orr tmp1, tmp1, #DDR3PHY_DXDLLCR_DLLDIS diff --git a/arch/arm/mach-at91/sama5.c b/arch/arm/mach-at91/sama5.c index 67ed68fbe3a5..bf2b5c6a18c6 100644 --- a/arch/arm/mach-at91/sama5.c +++ b/arch/arm/mach-at91/sama5.c @@ -26,7 +26,7 @@ static void sama5_l2c310_write_sec(unsigned long val, unsigned reg) static void __init sama5_secure_cache_init(void) { sam_secure_init(); - if (sam_linux_is_optee_available()) + if (IS_ENABLED(CONFIG_OUTER_CACHE) && sam_linux_is_optee_available()) outer_cache.write_sec = sama5_l2c310_write_sec; } diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c index 25c9d184fa4c..1c57ac401649 100644 --- a/arch/arm/mach-mxs/mach-mxs.c +++ b/arch/arm/mach-mxs/mach-mxs.c @@ -393,8 +393,10 @@ static void __init mxs_machine_init(void) root = of_find_node_by_path("/"); ret = of_property_read_string(root, "model", &soc_dev_attr->machine); - if (ret) + if (ret) { + kfree(soc_dev_attr); return; + } soc_dev_attr->family = "Freescale MXS Family"; soc_dev_attr->soc_id = mxs_get_soc_id(); diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 46cccd6bf705..de988cba9a4b 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -105,6 +105,19 @@ static inline bool is_write_fault(unsigned int fsr) return (fsr & FSR_WRITE) && !(fsr & FSR_CM); } +static inline bool is_translation_fault(unsigned int fsr) +{ + int fs = fsr_fs(fsr); +#ifdef CONFIG_ARM_LPAE + if ((fs & FS_MMU_NOLL_MASK) == FS_TRANS_NOLL) + return true; +#else + if (fs == FS_L1_TRANS || fs == FS_L2_TRANS) + return true; +#endif + return false; +} + static void die_kernel_fault(const char *msg, struct mm_struct *mm, unsigned long addr, unsigned int fsr, struct pt_regs *regs) @@ -140,7 +153,8 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, if (addr < PAGE_SIZE) { msg = "NULL pointer dereference"; } else { - if (kfence_handle_page_fault(addr, is_write_fault(fsr), regs)) + if (is_translation_fault(fsr) && + kfence_handle_page_fault(addr, is_write_fault(fsr), regs)) return; msg = "paging request"; @@ -208,7 +222,7 @@ static inline bool is_permission_fault(unsigned int fsr) { int fs = fsr_fs(fsr); #ifdef CONFIG_ARM_LPAE - if ((fs & FS_PERM_NOLL_MASK) == FS_PERM_NOLL) + if ((fs & FS_MMU_NOLL_MASK) == FS_PERM_NOLL) return true; #else if (fs == FS_L1_PERM || fs == FS_L2_PERM) diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h index 83b5ab32d7a4..54927ba1fa6e 100644 --- a/arch/arm/mm/fault.h +++ b/arch/arm/mm/fault.h @@ -14,8 +14,9 @@ #ifdef CONFIG_ARM_LPAE #define FSR_FS_AEA 17 +#define FS_TRANS_NOLL 0x4 #define FS_PERM_NOLL 0xC -#define FS_PERM_NOLL_MASK 0x3C +#define FS_MMU_NOLL_MASK 0x3C static inline int fsr_fs(unsigned int fsr) { @@ -23,8 +24,10 @@ static inline int fsr_fs(unsigned int fsr) } #else #define FSR_FS_AEA 22 -#define FS_L1_PERM 0xD -#define FS_L2_PERM 0xF +#define FS_L1_TRANS 0x5 +#define FS_L2_TRANS 0x7 +#define FS_L1_PERM 0xD +#define FS_L2_PERM 0xF static inline int fsr_fs(unsigned int fsr) { diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index c42debaded95..c1494a4dee25 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -26,6 +26,13 @@ unsigned long vectors_base; +/* + * empty_zero_page is a special page that is used for + * zero-initialized data and COW. + */ +struct page *empty_zero_page; +EXPORT_SYMBOL(empty_zero_page); + #ifdef CONFIG_ARM_MPU struct mpu_rgn_info mpu_rgn_info; #endif @@ -148,9 +155,21 @@ void __init adjust_lowmem_bounds(void) */ void __init paging_init(const struct machine_desc *mdesc) { + void *zero_page; + early_trap_init((void *)vectors_base); mpu_setup(); + + /* allocate the zero page. */ + zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!zero_page) + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", + __func__, PAGE_SIZE, PAGE_SIZE); + bootmem_init(); + + empty_zero_page = virt_to_page(zero_page); + flush_dcache_page(empty_zero_page); } /* diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c6d15dd6cae3..7fc3457a8891 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -32,6 +32,7 @@ config ARM64 select ARCH_HAS_KCOV select ARCH_HAS_KEEPINITRD select ARCH_HAS_MEMBARRIER_SYNC_CORE + select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_DEVMAP select ARCH_HAS_PTE_SPECIAL diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi index 53f6660656ac..ca1d287a0a01 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi @@ -161,6 +161,7 @@ clocks = <&ccu CLK_BUS_VP9>, <&ccu CLK_VP9>; clock-names = "bus", "mod"; resets = <&ccu RST_BUS_VP9>; + iommus = <&iommu 5>; }; video-codec@1c0e000 { diff --git a/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts b/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts index 7e0aeb2db305..a0aeac619929 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts @@ -34,11 +34,25 @@ off-on-delay-us = <12000>; }; - extcon_usbotg1: extcon-usbotg1 { - compatible = "linux,extcon-usb-gpio"; + connector { + compatible = "gpio-usb-b-connector", "usb-b-connector"; + type = "micro"; + label = "X19"; pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usb1_extcon>; - id-gpio = <&gpio1 10 GPIO_ACTIVE_HIGH>; + pinctrl-0 = <&pinctrl_usb1_connector>; + id-gpios = <&gpio1 10 GPIO_ACTIVE_HIGH>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + usb_dr_connector: endpoint { + remote-endpoint = <&usb1_drd_sw>; + }; + }; + }; }; }; @@ -105,13 +119,19 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usbotg1>; dr_mode = "otg"; - extcon = <&extcon_usbotg1>; srp-disable; hnp-disable; adp-disable; power-active-high; over-current-active-low; + usb-role-switch; status = "okay"; + + port { + usb1_drd_sw: endpoint { + remote-endpoint = <&usb_dr_connector>; + }; + }; }; &usbotg2 { @@ -231,7 +251,7 @@ <MX8MM_IOMUXC_GPIO1_IO13_USB1_OTG_OC 0x84>; }; - pinctrl_usb1_extcon: usb1-extcongrp { + pinctrl_usb1_connector: usb1-connectorgrp { fsl,pins = <MX8MM_IOMUXC_GPIO1_IO10_GPIO1_IO10 0x1c0>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index dabd94dc30c4..50ef92915c67 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -1244,10 +1244,10 @@ clocks = <&clk IMX8MM_CLK_NAND_USDHC_BUS_RAWNAND_CLK>; }; - gpmi: nand-controller@33002000{ + gpmi: nand-controller@33002000 { compatible = "fsl,imx8mm-gpmi-nand", "fsl,imx7d-gpmi-nand"; #address-cells = <1>; - #size-cells = <1>; + #size-cells = <0>; reg = <0x33002000 0x2000>, <0x33004000 0x4000>; reg-names = "gpmi-nand", "bch"; interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index ad0b99adf691..67b554ba690c 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -1102,7 +1102,7 @@ gpmi: nand-controller@33002000 { compatible = "fsl,imx8mn-gpmi-nand", "fsl,imx7d-gpmi-nand"; #address-cells = <1>; - #size-cells = <1>; + #size-cells = <0>; reg = <0x33002000 0x2000>, <0x33004000 0x4000>; reg-names = "gpmi-nand", "bch"; interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts index 9f1469db554d..b4c1ef2559f2 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts @@ -544,14 +544,14 @@ pinctrl_pcie0: pcie0grp { fsl,pins = < - MX8MP_IOMUXC_I2C4_SCL__PCIE_CLKREQ_B 0x61 /* open drain, pull up */ - MX8MP_IOMUXC_SD1_DATA5__GPIO2_IO07 0x41 + MX8MP_IOMUXC_I2C4_SCL__PCIE_CLKREQ_B 0x60 /* open drain, pull up */ + MX8MP_IOMUXC_SD1_DATA5__GPIO2_IO07 0x40 >; }; pinctrl_pcie0_reg: pcie0reggrp { fsl,pins = < - MX8MP_IOMUXC_SD1_DATA4__GPIO2_IO06 0x41 + MX8MP_IOMUXC_SD1_DATA4__GPIO2_IO06 0x40 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx93-pinfunc.h b/arch/arm64/boot/dts/freescale/imx93-pinfunc.h index 4298a145f8a9..4298a145f8a9 100755..100644 --- a/arch/arm64/boot/dts/freescale/imx93-pinfunc.h +++ b/arch/arm64/boot/dts/freescale/imx93-pinfunc.h diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index a47acf9bdf24..a721cdd80489 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -668,7 +668,7 @@ apcs_glb: mailbox@b111000 { compatible = "qcom,ipq8074-apcs-apps-global"; - reg = <0x0b111000 0x6000>; + reg = <0x0b111000 0x1000>; #clock-cells = <1>; #mbox-cells = <1>; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index c0a2baffa49d..aba717644391 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -3504,7 +3504,7 @@ }; saw3: syscon@9a10000 { - compatible = "qcom,tcsr-msm8996", "syscon"; + compatible = "syscon"; reg = <0x09a10000 0x1000>; }; diff --git a/arch/arm64/boot/dts/qcom/sa8155p-adp.dts b/arch/arm64/boot/dts/qcom/sa8155p-adp.dts index 87ab0e1ecd16..4dee790f1049 100644 --- a/arch/arm64/boot/dts/qcom/sa8155p-adp.dts +++ b/arch/arm64/boot/dts/qcom/sa8155p-adp.dts @@ -43,7 +43,6 @@ regulator-always-on; regulator-boot-on; - regulator-allow-set-load; vin-supply = <&vreg_3p3>; }; @@ -137,6 +136,9 @@ regulator-max-microvolt = <880000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l7a_1p8: ldo7 { @@ -152,6 +154,9 @@ regulator-max-microvolt = <2960000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l11a_0p8: ldo11 { @@ -258,6 +263,9 @@ regulator-max-microvolt = <1200000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l7c_1p8: ldo7 { @@ -273,6 +281,9 @@ regulator-max-microvolt = <1200000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l10c_3p3: ldo10 { diff --git a/arch/arm64/boot/dts/qcom/sa8295p-adp.dts b/arch/arm64/boot/dts/qcom/sa8295p-adp.dts index b608b82dff03..2c62ba6a49c5 100644 --- a/arch/arm64/boot/dts/qcom/sa8295p-adp.dts +++ b/arch/arm64/boot/dts/qcom/sa8295p-adp.dts @@ -83,6 +83,9 @@ regulator-max-microvolt = <1200000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l4c: ldo4 { @@ -98,6 +101,9 @@ regulator-max-microvolt = <1200000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l7c: ldo7 { @@ -113,6 +119,9 @@ regulator-max-microvolt = <2504000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l17c: ldo17 { @@ -121,6 +130,9 @@ regulator-max-microvolt = <2504000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index 212580316d3e..4cdc88d33944 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -2296,7 +2296,8 @@ lpass_audiocc: clock-controller@3300000 { compatible = "qcom,sc7280-lpassaudiocc"; - reg = <0 0x03300000 0 0x30000>; + reg = <0 0x03300000 0 0x30000>, + <0 0x032a9000 0 0x1000>; clocks = <&rpmhcc RPMH_CXO_CLK>, <&lpass_aon LPASS_AON_CC_MAIN_RCG_CLK_SRC>; clock-names = "bi_tcxo", "lpass_aon_cc_main_rcg_clk_src"; diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts index fea7d8273ccd..5e30349efd20 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts @@ -124,6 +124,9 @@ regulator-max-microvolt = <2504000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l13c: ldo13 { @@ -146,6 +149,9 @@ regulator-max-microvolt = <1200000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l4d: ldo4 { diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index c32bcded2aef..212d63d5cbf2 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -885,13 +885,13 @@ ufs_mem_phy: phy@1d87000 { compatible = "qcom,sc8280xp-qmp-ufs-phy"; - reg = <0 0x01d87000 0 0xe10>; + reg = <0 0x01d87000 0 0x1c8>; #address-cells = <2>; #size-cells = <2>; ranges; clock-names = "ref", "ref_aux"; - clocks = <&rpmhcc RPMH_CXO_CLK>, + clocks = <&gcc GCC_UFS_REF_CLKREF_CLK>, <&gcc GCC_UFS_PHY_PHY_AUX_CLK>; resets = <&ufs_mem_hc 0>; @@ -953,13 +953,13 @@ ufs_card_phy: phy@1da7000 { compatible = "qcom,sc8280xp-qmp-ufs-phy"; - reg = <0 0x01da7000 0 0xe10>; + reg = <0 0x01da7000 0 0x1c8>; #address-cells = <2>; #size-cells = <2>; ranges; clock-names = "ref", "ref_aux"; - clocks = <&gcc GCC_UFS_1_CARD_CLKREF_CLK>, + clocks = <&gcc GCC_UFS_REF_CLKREF_CLK>, <&gcc GCC_UFS_CARD_PHY_AUX_CLK>; resets = <&ufs_card_hc 0>; @@ -1181,26 +1181,16 @@ usb_0_ssphy: usb3-phy@88eb400 { reg = <0 0x088eb400 0 0x100>, <0 0x088eb600 0 0x3ec>, - <0 0x088ec400 0 0x1f0>, + <0 0x088ec400 0 0x364>, <0 0x088eba00 0 0x100>, <0 0x088ebc00 0 0x3ec>, - <0 0x088ec700 0 0x64>; + <0 0x088ec200 0 0x18>; #phy-cells = <0>; #clock-cells = <0>; clocks = <&gcc GCC_USB3_PRIM_PHY_PIPE_CLK>; clock-names = "pipe0"; clock-output-names = "usb0_phy_pipe_clk_src"; }; - - usb_0_dpphy: dp-phy@88ed200 { - reg = <0 0x088ed200 0 0x200>, - <0 0x088ed400 0 0x200>, - <0 0x088eda00 0 0x200>, - <0 0x088ea600 0 0x200>, - <0 0x088ea800 0 0x200>; - #clock-cells = <1>; - #phy-cells = <0>; - }; }; usb_1_hsphy: phy@8902000 { @@ -1242,8 +1232,8 @@ usb_1_ssphy: usb3-phy@8903400 { reg = <0 0x08903400 0 0x100>, - <0 0x08903c00 0 0x3ec>, - <0 0x08904400 0 0x1f0>, + <0 0x08903600 0 0x3ec>, + <0 0x08904400 0 0x364>, <0 0x08903a00 0 0x100>, <0 0x08903c00 0 0x3ec>, <0 0x08904200 0 0x18>; @@ -1253,16 +1243,6 @@ clock-names = "pipe0"; clock-output-names = "usb1_phy_pipe_clk_src"; }; - - usb_1_dpphy: dp-phy@8904200 { - reg = <0 0x08904200 0 0x200>, - <0 0x08904400 0 0x200>, - <0 0x08904a00 0 0x200>, - <0 0x08904600 0 0x200>, - <0 0x08904800 0 0x200>; - #clock-cells = <1>; - #phy-cells = <0>; - }; }; system-cache-controller@9200000 { diff --git a/arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi b/arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi index 014fe3a31548..fb6e5a140c9f 100644 --- a/arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi @@ -348,6 +348,9 @@ regulator-max-microvolt = <2960000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l7c_3p0: ldo7 { @@ -367,6 +370,9 @@ regulator-max-microvolt = <2960000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l10c_3p3: ldo10 { diff --git a/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi b/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi index 549e0a2aa9fe..5428aab3058d 100644 --- a/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi @@ -317,6 +317,9 @@ regulator-max-microvolt = <2960000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l7c_2p85: ldo7 { @@ -339,6 +342,9 @@ regulator-max-microvolt = <2960000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l10c_3p3: ldo10 { diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index a5b62cadb129..e276eed1f8e2 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -334,6 +334,7 @@ exit-latency-us = <6562>; min-residency-us = <9987>; local-timer-stop; + status = "disabled"; }; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm8350-hdk.dts b/arch/arm64/boot/dts/qcom/sm8350-hdk.dts index 0fcf5bd88fc7..69ae6503c2f6 100644 --- a/arch/arm64/boot/dts/qcom/sm8350-hdk.dts +++ b/arch/arm64/boot/dts/qcom/sm8350-hdk.dts @@ -107,6 +107,9 @@ regulator-max-microvolt = <888000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l6b_1p2: ldo6 { @@ -115,6 +118,9 @@ regulator-max-microvolt = <1208000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l7b_2p96: ldo7 { @@ -123,6 +129,9 @@ regulator-max-microvolt = <2504000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l9b_1p2: ldo9 { @@ -131,6 +140,9 @@ regulator-max-microvolt = <1200000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/px30-evb.dts b/arch/arm64/boot/dts/rockchip/px30-evb.dts index 07008d84434c..c1bbd555f5f5 100644 --- a/arch/arm64/boot/dts/rockchip/px30-evb.dts +++ b/arch/arm64/boot/dts/rockchip/px30-evb.dts @@ -30,31 +30,31 @@ keyup-threshold-microvolt = <1800000>; poll-interval = <100>; - esc-key { + button-esc { label = "esc"; linux,code = <KEY_ESC>; press-threshold-microvolt = <1310000>; }; - home-key { + button-home { label = "home"; linux,code = <KEY_HOME>; press-threshold-microvolt = <624000>; }; - menu-key { + button-menu { label = "menu"; linux,code = <KEY_MENU>; press-threshold-microvolt = <987000>; }; - vol-down-key { + button-down { label = "volume down"; linux,code = <KEY_VOLUMEDOWN>; press-threshold-microvolt = <300000>; }; - vol-up-key { + button-up { label = "volume up"; linux,code = <KEY_VOLUMEUP>; press-threshold-microvolt = <17000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3308-evb.dts b/arch/arm64/boot/dts/rockchip/rk3308-evb.dts index 9fe9b0d11003..184b84fdde07 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-evb.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-evb.dts @@ -23,7 +23,7 @@ poll-interval = <100>; keyup-threshold-microvolt = <1800000>; - func-key { + button-func { linux,code = <KEY_FN>; label = "function"; press-threshold-microvolt = <18000>; @@ -37,31 +37,31 @@ poll-interval = <100>; keyup-threshold-microvolt = <1800000>; - esc-key { + button-esc { linux,code = <KEY_MICMUTE>; label = "micmute"; press-threshold-microvolt = <1130000>; }; - home-key { + button-home { linux,code = <KEY_MODE>; label = "mode"; press-threshold-microvolt = <901000>; }; - menu-key { + button-menu { linux,code = <KEY_PLAY>; label = "play"; press-threshold-microvolt = <624000>; }; - vol-down-key { + button-down { linux,code = <KEY_VOLUMEDOWN>; label = "volume down"; press-threshold-microvolt = <300000>; }; - vol-up-key { + button-up { linux,code = <KEY_VOLUMEUP>; label = "volume up"; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts index ea6820902ede..7ea48167747c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts @@ -19,7 +19,7 @@ stdout-path = "serial2:1500000n8"; }; - ir_rx { + ir-receiver { compatible = "gpio-ir-receiver"; gpios = <&gpio0 RK_PC0 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts b/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts index 43c928ac98f0..1deef53a4c94 100644 --- a/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts +++ b/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts @@ -25,7 +25,7 @@ keyup-threshold-microvolt = <1800000>; poll-interval = <100>; - recovery { + button-recovery { label = "recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <17000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts index 7f5bba0c6001..81d1064fdb21 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts @@ -208,11 +208,10 @@ vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; /* rtc_int is not connected */ }; diff --git a/arch/arm64/boot/dts/rockchip/rk3368-r88.dts b/arch/arm64/boot/dts/rockchip/rk3368-r88.dts index 38d757c00548..5589f3db6b36 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-r88.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-r88.dts @@ -192,11 +192,10 @@ vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; /* rtc_int is not connected */ }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi index ed3348b558f8..a47d9f758611 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi @@ -734,10 +734,6 @@ camera: &i2c7 { }; /* PINCTRL OVERRIDES */ -&ec_ap_int_l { - rockchip,pins = <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_up>; -}; - &ap_fw_wp { rockchip,pins = <0 RK_PB5 RK_FUNC_GPIO &pcfg_pull_none>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi index 2a332763c35c..9d9297bc5f04 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi @@ -123,7 +123,7 @@ keyup-threshold-microvolt = <1800000>; poll-interval = <100>; - recovery { + button-recovery { label = "Recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts b/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts index 452728b82e42..3bf8f959e42c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts @@ -39,7 +39,7 @@ keyup-threshold-microvolt = <1800000>; poll-interval = <100>; - recovery { + button-recovery { label = "Recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts b/arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts index 72182c58cc46..65cb21837b0c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts @@ -19,7 +19,7 @@ keyup-threshold-microvolt = <1500000>; poll-interval = <100>; - recovery { + button-recovery { label = "Recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi index 278123b4f911..b6e082f1f6d9 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi @@ -167,6 +167,7 @@ }; &emmc_phy { + rockchip,enable-strobe-pulldown; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts b/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts index 9e2e246e0bab..dba4d03bfc2b 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts @@ -52,13 +52,13 @@ press-threshold-microvolt = <300000>; }; - back { + button-back { label = "Back"; linux,code = <KEY_BACK>; press-threshold-microvolt = <985000>; }; - menu { + button-menu { label = "Menu"; linux,code = <KEY_MENU>; press-threshold-microvolt = <1314000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts index 04c752f49be9..115c14c0a3c6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts @@ -207,7 +207,7 @@ cap-sd-highspeed; cd-gpios = <&gpio0 RK_PA7 GPIO_ACTIVE_LOW>; disable-wp; - max-frequency = <150000000>; + max-frequency = <40000000>; pinctrl-names = "default"; pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>; vmmc-supply = <&vcc3v3_baseboard>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts index 5a2661ae0131..7ba1c28f70a9 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts @@ -98,13 +98,12 @@ }; &i2c0 { - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; interrupt-parent = <&gpio0>; interrupts = <RK_PA5 IRQ_TYPE_EDGE_FALLING>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi index 2f4b1b2e3ac7..bbf1e3f24585 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi @@ -41,7 +41,7 @@ keyup-threshold-microvolt = <1500000>; poll-interval = <100>; - recovery { + button-recovery { label = "Recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi index 645ced6617a6..1f76d3501bda 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi @@ -509,7 +509,6 @@ &i2s1 { rockchip,playback-channels = <2>; rockchip,capture-channels = <2>; - status = "okay"; }; &i2s2 { diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts index 13927e7d0724..dbec2b7173a0 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts @@ -33,13 +33,13 @@ press-threshold-microvolt = <300000>; }; - back { + button-back { label = "Back"; linux,code = <KEY_BACK>; press-threshold-microvolt = <985000>; }; - menu { + button-menu { label = "Menu"; linux,code = <KEY_MENU>; press-threshold-microvolt = <1314000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi b/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi index 935b8c68a71d..bf9eb0405b62 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi @@ -297,11 +297,10 @@ clock-frequency = <400000>; status = "okay"; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "hym8563"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi index 0d45868132b9..8d61f824c12d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi @@ -23,7 +23,7 @@ io-channel-names = "buttons"; keyup-threshold-microvolt = <1750000>; - recovery { + button-recovery { label = "recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts index a05460b92415..25a8c781f4e7 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts @@ -740,7 +740,7 @@ &uart1 { pinctrl-names = "default"; - pinctrl-0 = <&uart1m0_xfer &uart1m0_ctsn>; + pinctrl-0 = <&uart1m0_xfer &uart1m0_ctsn &uart1m0_rtsn>; status = "okay"; uart-has-rtscts; @@ -748,13 +748,14 @@ compatible = "brcm,bcm43438-bt"; clocks = <&rk817 1>; clock-names = "lpo"; - device-wakeup-gpios = <&gpio2 RK_PC1 GPIO_ACTIVE_HIGH>; - host-wakeup-gpios = <&gpio2 RK_PC0 GPIO_ACTIVE_HIGH>; + host-wakeup-gpios = <&gpio2 RK_PC1 GPIO_ACTIVE_HIGH>; + device-wakeup-gpios = <&gpio2 RK_PC0 GPIO_ACTIVE_HIGH>; shutdown-gpios = <&gpio2 RK_PB7 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&bt_host_wake_l &bt_wake_l &bt_enable_h>; vbat-supply = <&vcc_sys>; vddio-supply = <&vcca1v8_pmu>; + max-speed = <3000000>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts index 77b179cd20e7..b276eb0810c7 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts @@ -246,7 +246,7 @@ compatible = "rockchip,rk809"; reg = <0x20>; interrupt-parent = <&gpio0>; - interrupts = <RK_PA7 IRQ_TYPE_LEVEL_LOW>; + interrupts = <RK_PA3 IRQ_TYPE_LEVEL_LOW>; assigned-clocks = <&cru I2S1_MCLKOUT_TX>; assigned-clock-parents = <&cru CLK_I2S1_8CH_TX>; clock-names = "mclk"; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts b/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts index dba648c2f57e..9fd262334d77 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts @@ -142,7 +142,7 @@ assigned-clocks = <&cru SCLK_GMAC1_RX_TX>, <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru SCLK_GMAC1>; assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru SCLK_GMAC1>, <&gmac1_clkin>; clock_in_out = "input"; - phy-mode = "rgmii-id"; + phy-mode = "rgmii"; phy-supply = <&vcc_3v3>; pinctrl-names = "default"; pinctrl-0 = <&gmac1m0_miim @@ -432,11 +432,7 @@ &i2c3 { pinctrl-names = "default"; - pinctrl-0 = <&i2c3m1_xfer>; - status = "okay"; -}; - -&i2c5 { + pinctrl-0 = <&i2c3m0_xfer>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts index c282f6e79960..26d7fda275ed 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts @@ -500,7 +500,6 @@ interrupt-parent = <&gpio0>; interrupts = <RK_PD3 IRQ_TYPE_EDGE_FALLING>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "rtcic_32kout"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts index fb87a168fe96..539ef8cc7792 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts @@ -509,7 +509,6 @@ interrupt-parent = <&gpio0>; interrupts = <RK_PD3 IRQ_TYPE_LEVEL_LOW>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "rtcic_32kout"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 4b1ad810436f..4e8b66c74ea2 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -41,7 +41,7 @@ (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) #define MIDR_CPU_MODEL(imp, partnum) \ - (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \ + ((_AT(u32, imp) << MIDR_IMPLEMENTOR_SHIFT) | \ (0xf << MIDR_ARCHITECTURE_SHIFT) | \ ((partnum) << MIDR_PARTNUM_SHIFT)) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index d6cf535d8352..439e2bc5d5d8 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -14,16 +14,8 @@ #ifdef CONFIG_EFI extern void efi_init(void); - -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg); #else #define efi_init() - -static inline -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) -{ - return false; -} #endif int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0dd3cd38f54d..c36d56dbf940 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -862,12 +862,12 @@ static inline bool pte_user_accessible_page(pte_t pte) static inline bool pmd_user_accessible_page(pmd_t pmd) { - return pmd_present(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); + return pmd_leaf(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); } static inline bool pud_user_accessible_page(pud_t pud) { - return pud_present(pud) && pud_user(pud); + return pud_leaf(pud) && pud_user(pud); } #endif diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h index b383b4802a7b..d30217c21eff 100644 --- a/arch/arm64/include/asm/syscall_wrapper.h +++ b/arch/arm64/include/asm/syscall_wrapper.h @@ -8,7 +8,7 @@ #ifndef __ASM_SYSCALL_WRAPPER_H #define __ASM_SYSCALL_WRAPPER_H -struct pt_regs; +#include <asm/ptrace.h> #define SC_ARM64_REGS_TO_ARGS(x, ...) \ __MAP(x,__SC_ARGS \ diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S index 67babd5f04c2..75691a2641c1 100644 --- a/arch/arm64/kernel/efi-rt-wrapper.S +++ b/arch/arm64/kernel/efi-rt-wrapper.S @@ -6,7 +6,7 @@ #include <linux/linkage.h> SYM_FUNC_START(__efi_rt_asm_wrapper) - stp x29, x30, [sp, #-112]! + stp x29, x30, [sp, #-32]! mov x29, sp /* @@ -17,20 +17,6 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) stp x1, x18, [sp, #16] /* - * Preserve all callee saved registers and record the stack pointer - * value in a per-CPU variable so we can recover from synchronous - * exceptions occurring while running the firmware routines. - */ - stp x19, x20, [sp, #32] - stp x21, x22, [sp, #48] - stp x23, x24, [sp, #64] - stp x25, x26, [sp, #80] - stp x27, x28, [sp, #96] - - adr_this_cpu x8, __efi_rt_asm_recover_sp, x9 - str x29, [x8] - - /* * We are lucky enough that no EFI runtime services take more than * 5 arguments, so all are passed in registers rather than via the * stack. @@ -45,7 +31,7 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) ldp x1, x2, [sp, #16] cmp x2, x18 - ldp x29, x30, [sp], #112 + ldp x29, x30, [sp], #32 b.ne 0f ret 0: @@ -59,18 +45,3 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) mov x18, x2 b efi_handle_corrupted_x18 // tail call SYM_FUNC_END(__efi_rt_asm_wrapper) - -SYM_FUNC_START(__efi_rt_asm_recover) - ldr_this_cpu x8, __efi_rt_asm_recover_sp, x9 - mov sp, x8 - - ldp x0, x18, [sp, #16] - ldp x19, x20, [sp, #32] - ldp x21, x22, [sp, #48] - ldp x23, x24, [sp, #64] - ldp x25, x26, [sp, #80] - ldp x27, x28, [sp, #96] - ldp x29, x30, [sp], #112 - - b efi_handle_runtime_exception -SYM_FUNC_END(__efi_rt_asm_recover) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 8d36e66a6e64..a908a37f0367 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -9,10 +9,17 @@ #include <linux/efi.h> #include <linux/init.h> -#include <linux/percpu.h> #include <asm/efi.h> +static bool region_is_misaligned(const efi_memory_desc_t *md) +{ + if (PAGE_SIZE == EFI_PAGE_SIZE) + return false; + return !PAGE_ALIGNED(md->phys_addr) || + !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT); +} + /* * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be * executable, everything else can be mapped with the XN bits @@ -26,14 +33,22 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md) if (type == EFI_MEMORY_MAPPED_IO) return PROT_DEVICE_nGnRE; - if (WARN_ONCE(!PAGE_ALIGNED(md->phys_addr), - "UEFI Runtime regions are not aligned to 64 KB -- buggy firmware?")) + if (region_is_misaligned(md)) { + static bool __initdata code_is_misaligned; + /* - * If the region is not aligned to the page size of the OS, we - * can not use strict permissions, since that would also affect - * the mapping attributes of the adjacent regions. + * Regions that are not aligned to the OS page size cannot be + * mapped with strict permissions, as those might interfere + * with the permissions that are needed by the adjacent + * region's mapping. However, if we haven't encountered any + * misaligned runtime code regions so far, we can safely use + * non-executable permissions for non-code regions. */ - return pgprot_val(PAGE_KERNEL_EXEC); + code_is_misaligned |= (type == EFI_RUNTIME_SERVICES_CODE); + + return code_is_misaligned ? pgprot_val(PAGE_KERNEL_EXEC) + : pgprot_val(PAGE_KERNEL); + } /* R-- */ if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) == @@ -64,19 +79,16 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) bool page_mappings_only = (md->type == EFI_RUNTIME_SERVICES_CODE || md->type == EFI_RUNTIME_SERVICES_DATA); - if (!PAGE_ALIGNED(md->phys_addr) || - !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT)) { - /* - * If the end address of this region is not aligned to page - * size, the mapping is rounded up, and may end up sharing a - * page frame with the next UEFI memory region. If we create - * a block entry now, we may need to split it again when mapping - * the next region, and support for that is going to be removed - * from the MMU routines. So avoid block mappings altogether in - * that case. - */ + /* + * If this region is not aligned to the page size used by the OS, the + * mapping will be rounded outwards, and may end up sharing a page + * frame with an adjacent runtime memory region. Given that the page + * table descriptor covering the shared page will be rewritten when the + * adjacent region gets mapped, we must avoid block mappings here so we + * don't have to worry about splitting them when that happens. + */ + if (region_is_misaligned(md)) page_mappings_only = true; - } create_pgd_mapping(mm, md->phys_addr, md->virt_addr, md->num_pages << EFI_PAGE_SHIFT, @@ -103,6 +115,9 @@ int __init efi_set_mapping_permissions(struct mm_struct *mm, BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE && md->type != EFI_RUNTIME_SERVICES_DATA); + if (region_is_misaligned(md)) + return 0; + /* * Calling apply_to_page_range() is only safe on regions that are * guaranteed to be mapped down to pages. Since we are only called @@ -129,28 +144,3 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f); return s; } - -asmlinkage DEFINE_PER_CPU(u64, __efi_rt_asm_recover_sp); - -asmlinkage efi_status_t __efi_rt_asm_recover(void); - -asmlinkage efi_status_t efi_handle_runtime_exception(const char *f) -{ - pr_err(FW_BUG "Synchronous exception occurred in EFI runtime service %s()\n", f); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return EFI_ABORTED; -} - -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) -{ - /* Check whether the exception occurred while running the firmware */ - if (current_work() != &efi_rts_work.work || regs->pc >= TASK_SIZE_64) - return false; - - pr_err(FW_BUG "Unable to handle %s in EFI runtime service\n", msg); - add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); - dump_stack(); - - regs->pc = (u64)__efi_rt_asm_recover; - return true; -} diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 30cc2a9d1757..3b625f76ffba 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -235,11 +235,11 @@ SYM_TYPED_FUNC_START(ftrace_stub) ret SYM_FUNC_END(ftrace_stub) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER SYM_TYPED_FUNC_START(ftrace_stub_graph) ret SYM_FUNC_END(ftrace_stub_graph) -#ifdef CONFIG_FUNCTION_GRAPH_TRACER /* * void return_to_handler(void) * diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 3cb101e8cb29..5240f6acad64 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -36,7 +36,22 @@ void arch_dma_prep_coherent(struct page *page, size_t size) { unsigned long start = (unsigned long)page_address(page); - dcache_clean_poc(start, start + size); + /* + * The architecture only requires a clean to the PoC here in order to + * meet the requirements of the DMA API. However, some vendors (i.e. + * Qualcomm) abuse the DMA API for transferring buffers from the + * non-secure to the secure world, resetting the system if a non-secure + * access shows up after the buffer has been transferred: + * + * https://lore.kernel.org/r/20221114110329.68413-1-manivannan.sadhasivam@linaro.org + * + * Using clean+invalidate appears to make this issue less likely, but + * the drivers themselves still need fixing as the CPU could issue a + * speculative read from the buffer via the linear mapping irrespective + * of the cache maintenance we use. Once the drivers are fixed, we can + * relax this to a clean operation. + */ + dcache_clean_inval_poc(start, start + size); } #ifdef CONFIG_IOMMU_DMA diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 3eb2825d08cf..74f76514a48d 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -30,7 +30,6 @@ #include <asm/bug.h> #include <asm/cmpxchg.h> #include <asm/cpufeature.h> -#include <asm/efi.h> #include <asm/exception.h> #include <asm/daifflags.h> #include <asm/debug-monitors.h> @@ -398,9 +397,6 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr, msg = "paging request"; } - if (efi_runtime_fixup_exception(regs, msg)) - return; - die_kernel_fault(msg, addr, esr, regs); } diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index d107c3d434e2..5922178d7a06 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -26,7 +26,7 @@ bool can_set_direct_map(void) * mapped at page granularity, so that it is possible to * protect/unprotect single pages. */ - return rodata_full || debug_pagealloc_enabled() || + return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() || IS_ENABLED(CONFIG_KFENCE); } @@ -102,7 +102,8 @@ static int change_memory_common(unsigned long addr, int numpages, * If we are manipulating read-only permissions, apply the same * change to the linear mapping of the pages that back this VM area. */ - if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || + if (rodata_enabled && + rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || pgprot_val(clear_mask) == PTE_RDONLY)) { for (i = 0; i < area->nr_pages; i++) { __change_memory_common((u64)page_address(area->pages[i]), diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 903096bd87f8..386adde2feff 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -10,6 +10,7 @@ config LOONGARCH select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI + select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_INLINE_READ_LOCK if !PREEMPTION diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index f4cb54d5afd6..01b57b726322 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -97,7 +97,7 @@ KBUILD_LDFLAGS += -m $(ld-emul) ifdef CONFIG_LOONGARCH CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \ - egrep -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ + grep -E -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g') endif diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h index d06d4542b634..5332b1433f38 100644 --- a/arch/loongarch/include/asm/irq.h +++ b/arch/loongarch/include/asm/irq.h @@ -117,7 +117,7 @@ extern struct fwnode_handle *liointc_handle; extern struct fwnode_handle *pch_lpc_handle; extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS]; -extern irqreturn_t loongson3_ipi_interrupt(int irq, void *dev); +extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev); #include <asm-generic/irq.h> diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 946704bee599..79d5bfd913e0 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -349,13 +349,17 @@ static inline pte_t pte_mkclean(pte_t pte) static inline pte_t pte_mkdirty(pte_t pte) { - pte_val(pte) |= (_PAGE_DIRTY | _PAGE_MODIFIED); + pte_val(pte) |= _PAGE_MODIFIED; + if (pte_val(pte) & _PAGE_WRITE) + pte_val(pte) |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) |= (_PAGE_WRITE | _PAGE_DIRTY); + pte_val(pte) |= _PAGE_WRITE; + if (pte_val(pte) & _PAGE_MODIFIED) + pte_val(pte) |= _PAGE_DIRTY; return pte; } @@ -455,7 +459,9 @@ static inline int pmd_write(pmd_t pmd) static inline pmd_t pmd_mkwrite(pmd_t pmd) { - pmd_val(pmd) |= (_PAGE_WRITE | _PAGE_DIRTY); + pmd_val(pmd) |= _PAGE_WRITE; + if (pmd_val(pmd) & _PAGE_MODIFIED) + pmd_val(pmd) |= _PAGE_DIRTY; return pmd; } @@ -478,10 +484,13 @@ static inline pmd_t pmd_mkclean(pmd_t pmd) static inline pmd_t pmd_mkdirty(pmd_t pmd) { - pmd_val(pmd) |= (_PAGE_DIRTY | _PAGE_MODIFIED); + pmd_val(pmd) |= _PAGE_MODIFIED; + if (pmd_val(pmd) & _PAGE_WRITE) + pmd_val(pmd) |= _PAGE_DIRTY; return pmd; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_ACCESSED); diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index 71189b28bfb2..d82687390b4a 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -19,21 +19,21 @@ extern cpumask_t cpu_sibling_map[]; extern cpumask_t cpu_core_map[]; extern cpumask_t cpu_foreign_map[]; -void loongson3_smp_setup(void); -void loongson3_prepare_cpus(unsigned int max_cpus); -void loongson3_boot_secondary(int cpu, struct task_struct *idle); -void loongson3_init_secondary(void); -void loongson3_smp_finish(void); -void loongson3_send_ipi_single(int cpu, unsigned int action); -void loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action); +void loongson_smp_setup(void); +void loongson_prepare_cpus(unsigned int max_cpus); +void loongson_boot_secondary(int cpu, struct task_struct *idle); +void loongson_init_secondary(void); +void loongson_smp_finish(void); +void loongson_send_ipi_single(int cpu, unsigned int action); +void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action); #ifdef CONFIG_HOTPLUG_CPU -int loongson3_cpu_disable(void); -void loongson3_cpu_die(unsigned int cpu); +int loongson_cpu_disable(void); +void loongson_cpu_die(unsigned int cpu); #endif static inline void plat_smp_setup(void) { - loongson3_smp_setup(); + loongson_smp_setup(); } static inline int raw_smp_processor_id(void) @@ -78,35 +78,25 @@ extern void calculate_cpu_foreign_map(void); */ extern void show_ipi_list(struct seq_file *p, int prec); -/* - * This function sends a 'reschedule' IPI to another CPU. - * it goes straight through and wastes no time serializing - * anything. Worst case is that we lose a reschedule ... - */ -static inline void smp_send_reschedule(int cpu) -{ - loongson3_send_ipi_single(cpu, SMP_RESCHEDULE); -} - static inline void arch_send_call_function_single_ipi(int cpu) { - loongson3_send_ipi_single(cpu, SMP_CALL_FUNCTION); + loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION); } static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) { - loongson3_send_ipi_mask(mask, SMP_CALL_FUNCTION); + loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION); } #ifdef CONFIG_HOTPLUG_CPU static inline int __cpu_disable(void) { - return loongson3_cpu_disable(); + return loongson_cpu_disable(); } static inline void __cpu_die(unsigned int cpu) { - loongson3_cpu_die(cpu); + loongson_cpu_die(cpu); } extern void play_dead(void); diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 335398482038..8319cc409009 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -56,23 +56,6 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) return ioremap_cache(phys, size); } -void __init acpi_boot_table_init(void) -{ - /* - * If acpi_disabled, bail out - */ - if (acpi_disabled) - return; - - /* - * Initialize the ACPI boot-time table parser. - */ - if (acpi_table_init()) { - disable_acpi(); - return; - } -} - #ifdef CONFIG_SMP static int set_processor_mask(u32 id, u32 flags) { @@ -156,13 +139,21 @@ static void __init acpi_process_madt(void) loongson_sysconf.nr_cpus = num_processors; } -int __init acpi_boot_init(void) +void __init acpi_boot_table_init(void) { /* * If acpi_disabled, bail out */ if (acpi_disabled) - return -1; + return; + + /* + * Initialize the ACPI boot-time table parser. + */ + if (acpi_table_init()) { + disable_acpi(); + return; + } loongson_sysconf.boot_cpu_id = read_csr_cpuid(); @@ -173,8 +164,6 @@ int __init acpi_boot_init(void) /* Do not enable ACPI SPCR console by default */ acpi_parse_spcr(earlycon_acpi_spcr_enable, false); - - return 0; } #ifdef CONFIG_ACPI_NUMA diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c index 1ba19c76563e..0524bf1169b7 100644 --- a/arch/loongarch/kernel/irq.c +++ b/arch/loongarch/kernel/irq.c @@ -117,7 +117,7 @@ void __init init_IRQ(void) if (ipi_irq < 0) panic("IPI IRQ mapping failed\n"); irq_set_percpu_devid(ipi_irq); - r = request_percpu_irq(ipi_irq, loongson3_ipi_interrupt, "IPI", &ipi_dummy_dev); + r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &ipi_dummy_dev); if (r < 0) panic("IPI IRQ request failed\n"); #endif diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 2526b68f1c0f..ddb8ba4eb399 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -152,7 +152,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) childregs->csr_crmd = p->thread.csr_crmd; childregs->csr_prmd = p->thread.csr_prmd; childregs->csr_ecfg = p->thread.csr_ecfg; - return 0; + goto out; } /* user thread */ @@ -171,14 +171,15 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) */ childregs->csr_euen = 0; + if (clone_flags & CLONE_SETTLS) + childregs->regs[2] = tls; + +out: clear_tsk_thread_flag(p, TIF_USEDFPU); clear_tsk_thread_flag(p, TIF_USEDSIMD); clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE); clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE); - if (clone_flags & CLONE_SETTLS) - childregs->regs[2] = tls; - return 0; } diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 1eb63fa9bc81..ae436def7ee9 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -257,7 +257,6 @@ void __init platform_init(void) #ifdef CONFIG_ACPI acpi_gbl_use_default_register_widths = false; acpi_boot_table_init(); - acpi_boot_init(); #endif #ifdef CONFIG_NUMA diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 781a4d4bdddc..14508d429ffa 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -136,12 +136,12 @@ static void ipi_write_action(int cpu, u32 action) } } -void loongson3_send_ipi_single(int cpu, unsigned int action) +void loongson_send_ipi_single(int cpu, unsigned int action) { ipi_write_action(cpu_logical_map(cpu), (u32)action); } -void loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action) +void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action) { unsigned int i; @@ -149,7 +149,18 @@ void loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action) ipi_write_action(cpu_logical_map(i), (u32)action); } -irqreturn_t loongson3_ipi_interrupt(int irq, void *dev) +/* + * This function sends a 'reschedule' IPI to another CPU. + * it goes straight through and wastes no time serializing + * anything. Worst case is that we lose a reschedule ... + */ +void smp_send_reschedule(int cpu) +{ + loongson_send_ipi_single(cpu, SMP_RESCHEDULE); +} +EXPORT_SYMBOL_GPL(smp_send_reschedule); + +irqreturn_t loongson_ipi_interrupt(int irq, void *dev) { unsigned int action; unsigned int cpu = smp_processor_id(); @@ -169,7 +180,7 @@ irqreturn_t loongson3_ipi_interrupt(int irq, void *dev) return IRQ_HANDLED; } -void __init loongson3_smp_setup(void) +void __init loongson_smp_setup(void) { cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package; cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package; @@ -178,7 +189,7 @@ void __init loongson3_smp_setup(void) pr_info("Detected %i available CPU(s)\n", loongson_sysconf.nr_cpus); } -void __init loongson3_prepare_cpus(unsigned int max_cpus) +void __init loongson_prepare_cpus(unsigned int max_cpus) { int i = 0; @@ -193,7 +204,7 @@ void __init loongson3_prepare_cpus(unsigned int max_cpus) /* * Setup the PC, SP, and TP of a secondary processor and start it running! */ -void loongson3_boot_secondary(int cpu, struct task_struct *idle) +void loongson_boot_secondary(int cpu, struct task_struct *idle) { unsigned long entry; @@ -205,13 +216,13 @@ void loongson3_boot_secondary(int cpu, struct task_struct *idle) csr_mail_send(entry, cpu_logical_map(cpu), 0); - loongson3_send_ipi_single(cpu, SMP_BOOT_CPU); + loongson_send_ipi_single(cpu, SMP_BOOT_CPU); } /* * SMP init and finish on secondary CPUs */ -void loongson3_init_secondary(void) +void loongson_init_secondary(void) { unsigned int cpu = smp_processor_id(); unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | @@ -231,7 +242,7 @@ void loongson3_init_secondary(void) cpu_logical_map(cpu) / loongson_sysconf.cores_per_package; } -void loongson3_smp_finish(void) +void loongson_smp_finish(void) { local_irq_enable(); iocsr_write64(0, LOONGARCH_IOCSR_MBUF0); @@ -240,7 +251,7 @@ void loongson3_smp_finish(void) #ifdef CONFIG_HOTPLUG_CPU -int loongson3_cpu_disable(void) +int loongson_cpu_disable(void) { unsigned long flags; unsigned int cpu = smp_processor_id(); @@ -262,7 +273,7 @@ int loongson3_cpu_disable(void) return 0; } -void loongson3_cpu_die(unsigned int cpu) +void loongson_cpu_die(unsigned int cpu) { while (per_cpu(cpu_state, cpu) != CPU_DEAD) cpu_relax(); @@ -300,19 +311,19 @@ void play_dead(void) */ #ifdef CONFIG_PM -static int loongson3_ipi_suspend(void) +static int loongson_ipi_suspend(void) { return 0; } -static void loongson3_ipi_resume(void) +static void loongson_ipi_resume(void) { iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN); } -static struct syscore_ops loongson3_ipi_syscore_ops = { - .resume = loongson3_ipi_resume, - .suspend = loongson3_ipi_suspend, +static struct syscore_ops loongson_ipi_syscore_ops = { + .resume = loongson_ipi_resume, + .suspend = loongson_ipi_suspend, }; /* @@ -321,7 +332,7 @@ static struct syscore_ops loongson3_ipi_syscore_ops = { */ static int __init ipi_pm_init(void) { - register_syscore_ops(&loongson3_ipi_syscore_ops); + register_syscore_ops(&loongson_ipi_syscore_ops); return 0; } @@ -425,7 +436,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) { init_new_context(current, &init_mm); current_thread_info()->cpu = 0; - loongson3_prepare_cpus(max_cpus); + loongson_prepare_cpus(max_cpus); set_cpu_sibling_map(0); set_cpu_core_map(0); calculate_cpu_foreign_map(); @@ -436,7 +447,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) int __cpu_up(unsigned int cpu, struct task_struct *tidle) { - loongson3_boot_secondary(cpu, tidle); + loongson_boot_secondary(cpu, tidle); /* Wait for CPU to start and be ready to sync counters */ if (!wait_for_completion_timeout(&cpu_starting, @@ -465,7 +476,7 @@ asmlinkage void start_secondary(void) cpu_probe(); constant_clockevent_init(); - loongson3_init_secondary(); + loongson_init_secondary(); set_cpu_sibling_map(cpu); set_cpu_core_map(cpu); @@ -487,11 +498,11 @@ asmlinkage void start_secondary(void) complete(&cpu_running); /* - * irq will be enabled in loongson3_smp_finish(), enabling it too + * irq will be enabled in loongson_smp_finish(), enabling it too * early is dangerous. */ WARN_ON_ONCE(!irqs_disabled()); - loongson3_smp_finish(); + loongson_smp_finish(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index b206d9159205..4571c3c87cd4 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -43,7 +43,8 @@ static bool unwind_by_prologue(struct unwind_state *state) { struct stack_info *info = &state->stack_info; union loongarch_instruction *ip, *ip_end; - unsigned long frame_size = 0, frame_ra = -1; + long frame_ra = -1; + unsigned long frame_size = 0; unsigned long size, offset, pc = state->pc; if (state->sp >= info->end || state->sp < info->begin) diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index d8ee8fbc8c67..58781c6e4191 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -10,6 +10,8 @@ #include <asm/regdef.h> #include <asm/stackframe.h> +#define INVTLB_ADDR_GFALSE_AND_ASID 5 + #define PTRS_PER_PGD_BITS (PAGE_SHIFT - 3) #define PTRS_PER_PUD_BITS (PAGE_SHIFT - 3) #define PTRS_PER_PMD_BITS (PAGE_SHIFT - 3) @@ -136,13 +138,10 @@ tlb_huge_update_load: ori t0, ra, _PAGE_VALID st.d t0, t1, 0 #endif - tlbsrch - addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16) - addi.d ra, t1, 0 - csrxchg ra, t1, LOONGARCH_CSR_TLBIDX - tlbwr - - csrxchg zero, t1, LOONGARCH_CSR_TLBIDX + csrrd ra, LOONGARCH_CSR_ASID + csrrd t1, LOONGARCH_CSR_BADV + andi ra, ra, CSR_ASID_ASID + invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1 /* * A huge PTE describes an area the size of the @@ -287,13 +286,11 @@ tlb_huge_update_store: ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) st.d t0, t1, 0 #endif - tlbsrch - addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16) - addi.d ra, t1, 0 - csrxchg ra, t1, LOONGARCH_CSR_TLBIDX - tlbwr + csrrd ra, LOONGARCH_CSR_ASID + csrrd t1, LOONGARCH_CSR_BADV + andi ra, ra, CSR_ASID_ASID + invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1 - csrxchg zero, t1, LOONGARCH_CSR_TLBIDX /* * A huge PTE describes an area the size of the * configured huge page size. This is twice the @@ -436,6 +433,11 @@ tlb_huge_update_modify: ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) st.d t0, t1, 0 #endif + csrrd ra, LOONGARCH_CSR_ASID + csrrd t1, LOONGARCH_CSR_BADV + andi ra, ra, CSR_ASID_ASID + invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1 + /* * A huge PTE describes an area the size of the * configured huge page size. This is twice the @@ -466,7 +468,7 @@ tlb_huge_update_modify: addu16i.d t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) csrxchg t1, t0, LOONGARCH_CSR_TLBIDX - tlbwr + tlbfill /* Reset default page size */ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16) diff --git a/arch/m68k/emu/nfcon.c b/arch/m68k/emu/nfcon.c index 557d60867f98..6fdc13610565 100644 --- a/arch/m68k/emu/nfcon.c +++ b/arch/m68k/emu/nfcon.c @@ -49,7 +49,7 @@ static void nfcon_write(struct console *con, const char *str, static struct tty_driver *nfcon_device(struct console *con, int *index) { *index = 0; - return (con->flags & CON_ENABLED) ? nfcon_tty_driver : NULL; + return console_is_registered(con) ? nfcon_tty_driver : NULL; } static struct console nf_console = { @@ -107,6 +107,11 @@ static int __init nf_debug_setup(char *arg) stderr_id = nf_get_id("NF_STDERR"); if (stderr_id) { + /* + * The console will be enabled when debug=nfcon is specified + * as a kernel parameter. Since this is a non-standard way + * of enabling consoles, it must be explicitly enabled. + */ nf_console.flags |= CON_ENABLED; register_console(&nf_console); } @@ -151,7 +156,7 @@ static int __init nfcon_init(void) nfcon_tty_driver = driver; - if (!(nf_console.flags & CON_ENABLED)) + if (!console_is_registered(&nf_console)) register_console(&nf_console); return 0; diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 4ebb56d6d959..cc88af6fa7a4 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -215,11 +215,3 @@ config MB_MANAGER Say N here unless you know what you are doing. endmenu - -menu "Bus Options" - -config PCI_XILINX - bool "Xilinx PCI host bridge support" - depends on PCI - -endmenu diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index 3f8a86c4336a..02e6be9c5b0d 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -67,12 +67,12 @@ linux.bin.ub linux.bin.gz: linux.bin linux.bin: vmlinux linux.bin linux.bin.gz linux.bin.ub: $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ - @echo 'Kernel: $(boot)/$@ is ready' ' (#'`cat .version`')' + @echo 'Kernel: $(boot)/$@ is ready' ' (#'$(or $(KBUILD_BUILD_VERSION),`cat .version`)')' PHONY += simpleImage.$(DTB) simpleImage.$(DTB): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(addprefix $(boot)/$@., ub unstrip strip) - @echo 'Kernel: $(boot)/$@ is ready' ' (#'`cat .version`')' + @echo 'Kernel: $(boot)/$@ is ready' ' (#'$(or $(KBUILD_BUILD_VERSION),`cat .version`)')' define archhelp echo '* linux.bin - Create raw binary' diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig index 8150daf04a76..756feb9369a6 100644 --- a/arch/microblaze/configs/mmu_defconfig +++ b/arch/microblaze/configs/mmu_defconfig @@ -19,7 +19,6 @@ CONFIG_HZ_100=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE_FORCE=y CONFIG_HIGHMEM=y -CONFIG_PCI_XILINX=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/microblaze/include/asm/pci-bridge.h b/arch/microblaze/include/asm/pci-bridge.h index 171b40a2d905..be5f504bead4 100644 --- a/arch/microblaze/include/asm/pci-bridge.h +++ b/arch/microblaze/include/asm/pci-bridge.h @@ -25,75 +25,17 @@ static inline int pcibios_vaddr_is_ioport(void __iomem *address) */ struct pci_controller { struct pci_bus *bus; - char is_dynamic; - struct device_node *dn; struct list_head list_node; - struct device *parent; - - int first_busno; - int last_busno; - - int self_busno; void __iomem *io_base_virt; - resource_size_t io_base_phys; - - resource_size_t pci_io_size; - - /* Some machines (PReP) have a non 1:1 mapping of - * the PCI memory space in the CPU bus space - */ - resource_size_t pci_mem_offset; - - /* Some machines have a special region to forward the ISA - * "memory" cycles such as VGA memory regions. Left to 0 - * if unsupported - */ - resource_size_t isa_mem_phys; - resource_size_t isa_mem_size; - - struct pci_ops *ops; - unsigned int __iomem *cfg_addr; - void __iomem *cfg_data; - - /* - * Used for variants of PCI indirect handling and possible quirks: - * SET_CFG_TYPE - used on 4xx or any PHB that does explicit type0/1 - * EXT_REG - provides access to PCI-e extended registers - * SURPRESS_PRIMARY_BUS - we suppress the setting of PCI_PRIMARY_BUS - * on Freescale PCI-e controllers since they used the PCI_PRIMARY_BUS - * to determine which bus number to match on when generating type0 - * config cycles - * NO_PCIE_LINK - the Freescale PCI-e controllers have issues with - * hanging if we don't have link and try to do config cycles to - * anything but the PHB. Only allow talking to the PHB if this is - * set. - * BIG_ENDIAN - cfg_addr is a big endian register - * BROKEN_MRM - the 440EPx/GRx chips have an errata that causes hangs - * on the PLB4. Effectively disable MRM commands by setting this. - */ -#define INDIRECT_TYPE_SET_CFG_TYPE 0x00000001 -#define INDIRECT_TYPE_EXT_REG 0x00000002 -#define INDIRECT_TYPE_SURPRESS_PRIMARY_BUS 0x00000004 -#define INDIRECT_TYPE_NO_PCIE_LINK 0x00000008 -#define INDIRECT_TYPE_BIG_ENDIAN 0x00000010 -#define INDIRECT_TYPE_BROKEN_MRM 0x00000020 - u32 indirect_type; /* Currently, we limit ourselves to 1 IO range and 3 mem * ranges since the common pci_bus structure can't handle more */ struct resource io_resource; - struct resource mem_resources[3]; - int global_number; /* PCI domain number */ }; #ifdef CONFIG_PCI -static inline struct pci_controller *pci_bus_to_host(const struct pci_bus *bus) -{ - return bus->sysdata; -} - static inline int isa_vaddr_is_ioport(void __iomem *address) { /* No specific ISA handling on ppc32 at this stage, it @@ -103,39 +45,5 @@ static inline int isa_vaddr_is_ioport(void __iomem *address) } #endif /* CONFIG_PCI */ -/* These are used for config access before all the PCI probing - has been done. */ -extern int early_read_config_byte(struct pci_controller *hose, int bus, - int dev_fn, int where, u8 *val); -extern int early_read_config_word(struct pci_controller *hose, int bus, - int dev_fn, int where, u16 *val); -extern int early_read_config_dword(struct pci_controller *hose, int bus, - int dev_fn, int where, u32 *val); -extern int early_write_config_byte(struct pci_controller *hose, int bus, - int dev_fn, int where, u8 val); -extern int early_write_config_word(struct pci_controller *hose, int bus, - int dev_fn, int where, u16 val); -extern int early_write_config_dword(struct pci_controller *hose, int bus, - int dev_fn, int where, u32 val); - -extern int early_find_capability(struct pci_controller *hose, int bus, - int dev_fn, int cap); - -extern void setup_indirect_pci(struct pci_controller *hose, - resource_size_t cfg_addr, - resource_size_t cfg_data, u32 flags); - -/* Get the PCI host controller for an OF device */ -extern struct pci_controller *pci_find_hose_for_OF_device( - struct device_node *node); - -/* Fill up host controller resources from the OF node */ -extern void pci_process_bridge_OF_ranges(struct pci_controller *hose, - struct device_node *dev, int primary); - -/* Allocate & free a PCI host bridge structure */ -extern struct pci_controller *pcibios_alloc_controller(struct device_node *dev); -extern void pcibios_free_controller(struct pci_controller *phb); - #endif /* __KERNEL__ */ #endif /* _ASM_MICROBLAZE_PCI_BRIDGE_H */ diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h index d90528064604..91f1f71c266a 100644 --- a/arch/microblaze/include/asm/pci.h +++ b/arch/microblaze/include/asm/pci.h @@ -21,15 +21,6 @@ #define PCIBIOS_MIN_IO 0x1000 #define PCIBIOS_MIN_MEM 0x10000000 -/* Values for the `which' argument to sys_pciconfig_iobase syscall. */ -#define IOBASE_BRIDGE_NUMBER 0 -#define IOBASE_MEMORY 1 -#define IOBASE_IO 2 -#define IOBASE_ISA_IO 3 -#define IOBASE_ISA_MEM 4 - -#define pcibios_scan_all_fns(a, b) 0 - /* * Set this to 1 if you want the kernel to re-assign all PCI * bus numbers (don't do that on ppc64 yet !) @@ -41,33 +32,13 @@ extern int pci_domain_nr(struct pci_bus *bus); /* Decide whether to display the domain number in /proc */ extern int pci_proc_domain(struct pci_bus *bus); -struct vm_area_struct; - /* Tell PCI code what kind of PCI resource mappings we support */ #define HAVE_PCI_MMAP 1 #define ARCH_GENERIC_PCI_MMAP_RESOURCE 1 -#define arch_can_pci_mmap_io() 1 - -extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, - size_t count); -extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, - size_t count); -extern int pci_mmap_legacy_page_range(struct pci_bus *bus, - struct vm_area_struct *vma, - enum pci_mmap_state mmap_state); - -#define HAVE_PCI_LEGACY 1 - -extern void pcibios_resource_survey(void); struct file; -/* This part of code was originally in xilinx-pci.h */ -#ifdef CONFIG_PCI_XILINX -extern void __init xilinx_pci_init(void); -#else static inline void __init xilinx_pci_init(void) { return; } -#endif #endif /* __KERNEL__ */ #endif /* __ASM_MICROBLAZE_PCI_H */ diff --git a/arch/microblaze/pci/Makefile b/arch/microblaze/pci/Makefile index 0251c20e1d62..f8267d20e067 100644 --- a/arch/microblaze/pci/Makefile +++ b/arch/microblaze/pci/Makefile @@ -3,5 +3,4 @@ # Makefile # -obj-$(CONFIG_PCI) += pci-common.o indirect_pci.o iomap.o -obj-$(CONFIG_PCI_XILINX) += xilinx_pci.o +obj-$(CONFIG_PCI) += iomap.o diff --git a/arch/microblaze/pci/indirect_pci.c b/arch/microblaze/pci/indirect_pci.c deleted file mode 100644 index 1caf7d3e0eef..000000000000 --- a/arch/microblaze/pci/indirect_pci.c +++ /dev/null @@ -1,158 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Support for indirect PCI bridges. - * - * Copyright (C) 1998 Gabriel Paubert. - */ - -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/string.h> -#include <linux/init.h> - -#include <linux/io.h> -#include <asm/pci-bridge.h> - -static int -indirect_read_config(struct pci_bus *bus, unsigned int devfn, int offset, - int len, u32 *val) -{ - struct pci_controller *hose = pci_bus_to_host(bus); - volatile void __iomem *cfg_data; - u8 cfg_type = 0; - u32 bus_no, reg; - - if (hose->indirect_type & INDIRECT_TYPE_NO_PCIE_LINK) { - if (bus->number != hose->first_busno) - return PCIBIOS_DEVICE_NOT_FOUND; - if (devfn != 0) - return PCIBIOS_DEVICE_NOT_FOUND; - } - - if (hose->indirect_type & INDIRECT_TYPE_SET_CFG_TYPE) - if (bus->number != hose->first_busno) - cfg_type = 1; - - bus_no = (bus->number == hose->first_busno) ? - hose->self_busno : bus->number; - - if (hose->indirect_type & INDIRECT_TYPE_EXT_REG) - reg = ((offset & 0xf00) << 16) | (offset & 0xfc); - else - reg = offset & 0xfc; /* Only 3 bits for function */ - - if (hose->indirect_type & INDIRECT_TYPE_BIG_ENDIAN) - out_be32(hose->cfg_addr, (0x80000000 | (bus_no << 16) | - (devfn << 8) | reg | cfg_type)); - else - out_le32(hose->cfg_addr, (0x80000000 | (bus_no << 16) | - (devfn << 8) | reg | cfg_type)); - - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - cfg_data = hose->cfg_data + (offset & 3); /* Only 3 bits for function */ - switch (len) { - case 1: - *val = in_8(cfg_data); - break; - case 2: - *val = in_le16(cfg_data); - break; - default: - *val = in_le32(cfg_data); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static int -indirect_write_config(struct pci_bus *bus, unsigned int devfn, int offset, - int len, u32 val) -{ - struct pci_controller *hose = pci_bus_to_host(bus); - volatile void __iomem *cfg_data; - u8 cfg_type = 0; - u32 bus_no, reg; - - if (hose->indirect_type & INDIRECT_TYPE_NO_PCIE_LINK) { - if (bus->number != hose->first_busno) - return PCIBIOS_DEVICE_NOT_FOUND; - if (devfn != 0) - return PCIBIOS_DEVICE_NOT_FOUND; - } - - if (hose->indirect_type & INDIRECT_TYPE_SET_CFG_TYPE) - if (bus->number != hose->first_busno) - cfg_type = 1; - - bus_no = (bus->number == hose->first_busno) ? - hose->self_busno : bus->number; - - if (hose->indirect_type & INDIRECT_TYPE_EXT_REG) - reg = ((offset & 0xf00) << 16) | (offset & 0xfc); - else - reg = offset & 0xfc; - - if (hose->indirect_type & INDIRECT_TYPE_BIG_ENDIAN) - out_be32(hose->cfg_addr, (0x80000000 | (bus_no << 16) | - (devfn << 8) | reg | cfg_type)); - else - out_le32(hose->cfg_addr, (0x80000000 | (bus_no << 16) | - (devfn << 8) | reg | cfg_type)); - - /* suppress setting of PCI_PRIMARY_BUS */ - if (hose->indirect_type & INDIRECT_TYPE_SURPRESS_PRIMARY_BUS) - if ((offset == PCI_PRIMARY_BUS) && - (bus->number == hose->first_busno)) - val &= 0xffffff00; - - /* Workaround for PCI_28 Errata in 440EPx/GRx */ - if ((hose->indirect_type & INDIRECT_TYPE_BROKEN_MRM) && - offset == PCI_CACHE_LINE_SIZE) { - val = 0; - } - - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - cfg_data = hose->cfg_data + (offset & 3); - switch (len) { - case 1: - out_8(cfg_data, val); - break; - case 2: - out_le16(cfg_data, val); - break; - default: - out_le32(cfg_data, val); - break; - } - - return PCIBIOS_SUCCESSFUL; -} - -static struct pci_ops indirect_pci_ops = { - .read = indirect_read_config, - .write = indirect_write_config, -}; - -void __init -setup_indirect_pci(struct pci_controller *hose, - resource_size_t cfg_addr, - resource_size_t cfg_data, u32 flags) -{ - resource_size_t base = cfg_addr & PAGE_MASK; - void __iomem *mbase; - - mbase = ioremap(base, PAGE_SIZE); - hose->cfg_addr = mbase + (cfg_addr & ~PAGE_MASK); - if ((cfg_data & PAGE_MASK) != base) - mbase = ioremap(cfg_data & PAGE_MASK, PAGE_SIZE); - hose->cfg_data = mbase + (cfg_data & ~PAGE_MASK); - hose->ops = &indirect_pci_ops; - hose->indirect_type = flags; -} diff --git a/arch/microblaze/pci/iomap.c b/arch/microblaze/pci/iomap.c index bde74af4c1cd..b2ee8ace9b6e 100644 --- a/arch/microblaze/pci/iomap.c +++ b/arch/microblaze/pci/iomap.c @@ -11,6 +11,42 @@ #include <linux/io.h> #include <asm/pci-bridge.h> +static DEFINE_SPINLOCK(hose_spinlock); +LIST_HEAD(hose_list); + +unsigned long isa_io_base; +EXPORT_SYMBOL(isa_io_base); + +static resource_size_t pcibios_io_size(const struct pci_controller *hose) +{ + return resource_size(&hose->io_resource); +} + +int pcibios_vaddr_is_ioport(void __iomem *address) +{ + int ret = 0; + struct pci_controller *hose; + resource_size_t size; + + spin_lock(&hose_spinlock); + list_for_each_entry(hose, &hose_list, list_node) { + size = pcibios_io_size(hose); + if (address >= hose->io_base_virt && + address < (hose->io_base_virt + size)) { + ret = 1; + break; + } + } + spin_unlock(&hose_spinlock); + return ret; +} + +/* Display the domain number in /proc */ +int pci_proc_domain(struct pci_bus *bus) +{ + return pci_domain_nr(bus); +} + void pci_iounmap(struct pci_dev *dev, void __iomem *addr) { if (isa_vaddr_is_ioport(addr)) diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c deleted file mode 100644 index 33bab7eec731..000000000000 --- a/arch/microblaze/pci/pci-common.c +++ /dev/null @@ -1,1067 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Contains common pci routines for ALL ppc platform - * (based on pci_32.c and pci_64.c) - * - * Port for PPC64 David Engebretsen, IBM Corp. - * Contains common pci routines for ppc64 platform, pSeries and iSeries brands. - * - * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM - * Rework, based on alpha PCI code. - * - * Common pmac/prep/chrp pci routines. -- Cort - */ - -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/string.h> -#include <linux/init.h> -#include <linux/memblock.h> -#include <linux/mm.h> -#include <linux/shmem_fs.h> -#include <linux/list.h> -#include <linux/syscalls.h> -#include <linux/irq.h> -#include <linux/vmalloc.h> -#include <linux/slab.h> -#include <linux/of.h> -#include <linux/of_address.h> -#include <linux/of_irq.h> -#include <linux/of_pci.h> -#include <linux/export.h> - -#include <asm/processor.h> -#include <linux/io.h> -#include <asm/pci-bridge.h> -#include <asm/byteorder.h> - -static DEFINE_SPINLOCK(hose_spinlock); -LIST_HEAD(hose_list); - -/* XXX kill that some day ... */ -static int global_phb_number; /* Global phb counter */ - -/* ISA Memory physical address */ -resource_size_t isa_mem_base; - -unsigned long isa_io_base; -EXPORT_SYMBOL(isa_io_base); - -static int pci_bus_count; - -struct pci_controller *pcibios_alloc_controller(struct device_node *dev) -{ - struct pci_controller *phb; - - phb = zalloc_maybe_bootmem(sizeof(struct pci_controller), GFP_KERNEL); - if (!phb) - return NULL; - spin_lock(&hose_spinlock); - phb->global_number = global_phb_number++; - list_add_tail(&phb->list_node, &hose_list); - spin_unlock(&hose_spinlock); - phb->dn = dev; - phb->is_dynamic = mem_init_done; - return phb; -} - -void pcibios_free_controller(struct pci_controller *phb) -{ - spin_lock(&hose_spinlock); - list_del(&phb->list_node); - spin_unlock(&hose_spinlock); - - if (phb->is_dynamic) - kfree(phb); -} - -static resource_size_t pcibios_io_size(const struct pci_controller *hose) -{ - return resource_size(&hose->io_resource); -} - -int pcibios_vaddr_is_ioport(void __iomem *address) -{ - int ret = 0; - struct pci_controller *hose; - resource_size_t size; - - spin_lock(&hose_spinlock); - list_for_each_entry(hose, &hose_list, list_node) { - size = pcibios_io_size(hose); - if (address >= hose->io_base_virt && - address < (hose->io_base_virt + size)) { - ret = 1; - break; - } - } - spin_unlock(&hose_spinlock); - return ret; -} - -unsigned long pci_address_to_pio(phys_addr_t address) -{ - struct pci_controller *hose; - resource_size_t size; - unsigned long ret = ~0; - - spin_lock(&hose_spinlock); - list_for_each_entry(hose, &hose_list, list_node) { - size = pcibios_io_size(hose); - if (address >= hose->io_base_phys && - address < (hose->io_base_phys + size)) { - unsigned long base = - (unsigned long)hose->io_base_virt - _IO_BASE; - ret = base + (address - hose->io_base_phys); - break; - } - } - spin_unlock(&hose_spinlock); - - return ret; -} -EXPORT_SYMBOL_GPL(pci_address_to_pio); - -/* This routine is meant to be used early during boot, when the - * PCI bus numbers have not yet been assigned, and you need to - * issue PCI config cycles to an OF device. - * It could also be used to "fix" RTAS config cycles if you want - * to set pci_assign_all_buses to 1 and still use RTAS for PCI - * config cycles. - */ -struct pci_controller *pci_find_hose_for_OF_device(struct device_node *node) -{ - while (node) { - struct pci_controller *hose, *tmp; - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) - if (hose->dn == node) - return hose; - node = node->parent; - } - return NULL; -} - -void pcibios_set_master(struct pci_dev *dev) -{ - /* No special bus mastering setup handling */ -} - -/* - * Platform support for /proc/bus/pci/X/Y mmap()s. - */ - -int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma) -{ - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - resource_size_t ioaddr = pci_resource_start(pdev, bar); - - if (!hose) - return -EINVAL; /* should never happen */ - - /* Convert to an offset within this PCI controller */ - ioaddr -= (unsigned long)hose->io_base_virt - _IO_BASE; - - vma->vm_pgoff += (ioaddr + hose->io_base_phys) >> PAGE_SHIFT; - return 0; -} - -/* This provides legacy IO read access on a bus */ -int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size) -{ - unsigned long offset; - struct pci_controller *hose = pci_bus_to_host(bus); - struct resource *rp = &hose->io_resource; - void __iomem *addr; - - /* Check if port can be supported by that bus. We only check - * the ranges of the PHB though, not the bus itself as the rules - * for forwarding legacy cycles down bridges are not our problem - * here. So if the host bridge supports it, we do it. - */ - offset = (unsigned long)hose->io_base_virt - _IO_BASE; - offset += port; - - if (!(rp->flags & IORESOURCE_IO)) - return -ENXIO; - if (offset < rp->start || (offset + size) > rp->end) - return -ENXIO; - addr = hose->io_base_virt + port; - - switch (size) { - case 1: - *((u8 *)val) = in_8(addr); - return 1; - case 2: - if (port & 1) - return -EINVAL; - *((u16 *)val) = in_le16(addr); - return 2; - case 4: - if (port & 3) - return -EINVAL; - *((u32 *)val) = in_le32(addr); - return 4; - } - return -EINVAL; -} - -/* This provides legacy IO write access on a bus */ -int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size) -{ - unsigned long offset; - struct pci_controller *hose = pci_bus_to_host(bus); - struct resource *rp = &hose->io_resource; - void __iomem *addr; - - /* Check if port can be supported by that bus. We only check - * the ranges of the PHB though, not the bus itself as the rules - * for forwarding legacy cycles down bridges are not our problem - * here. So if the host bridge supports it, we do it. - */ - offset = (unsigned long)hose->io_base_virt - _IO_BASE; - offset += port; - - if (!(rp->flags & IORESOURCE_IO)) - return -ENXIO; - if (offset < rp->start || (offset + size) > rp->end) - return -ENXIO; - addr = hose->io_base_virt + port; - - /* WARNING: The generic code is idiotic. It gets passed a pointer - * to what can be a 1, 2 or 4 byte quantity and always reads that - * as a u32, which means that we have to correct the location of - * the data read within those 32 bits for size 1 and 2 - */ - switch (size) { - case 1: - out_8(addr, val >> 24); - return 1; - case 2: - if (port & 1) - return -EINVAL; - out_le16(addr, val >> 16); - return 2; - case 4: - if (port & 3) - return -EINVAL; - out_le32(addr, val); - return 4; - } - return -EINVAL; -} - -/* This provides legacy IO or memory mmap access on a bus */ -int pci_mmap_legacy_page_range(struct pci_bus *bus, - struct vm_area_struct *vma, - enum pci_mmap_state mmap_state) -{ - struct pci_controller *hose = pci_bus_to_host(bus); - resource_size_t offset = - ((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT; - resource_size_t size = vma->vm_end - vma->vm_start; - struct resource *rp; - - pr_debug("pci_mmap_legacy_page_range(%04x:%02x, %s @%llx..%llx)\n", - pci_domain_nr(bus), bus->number, - mmap_state == pci_mmap_mem ? "MEM" : "IO", - (unsigned long long)offset, - (unsigned long long)(offset + size - 1)); - - if (mmap_state == pci_mmap_mem) { - /* Hack alert ! - * - * Because X is lame and can fail starting if it gets an error - * trying to mmap legacy_mem (instead of just moving on without - * legacy memory access) we fake it here by giving it anonymous - * memory, effectively behaving just like /dev/zero - */ - if ((offset + size) > hose->isa_mem_size) { - pr_debug("Process %s (pid:%d) mapped non-existing PCI", - current->comm, current->pid); - pr_debug("legacy memory for 0%04x:%02x\n", - pci_domain_nr(bus), bus->number); - if (vma->vm_flags & VM_SHARED) - return shmem_zero_setup(vma); - return 0; - } - offset += hose->isa_mem_phys; - } else { - unsigned long io_offset = (unsigned long)hose->io_base_virt - - _IO_BASE; - unsigned long roffset = offset + io_offset; - rp = &hose->io_resource; - if (!(rp->flags & IORESOURCE_IO)) - return -ENXIO; - if (roffset < rp->start || (roffset + size) > rp->end) - return -ENXIO; - offset += hose->io_base_phys; - } - pr_debug(" -> mapping phys %llx\n", (unsigned long long)offset); - - vma->vm_pgoff = offset >> PAGE_SHIFT; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - vma->vm_end - vma->vm_start, - vma->vm_page_prot); -} - -void pci_resource_to_user(const struct pci_dev *dev, int bar, - const struct resource *rsrc, - resource_size_t *start, resource_size_t *end) -{ - struct pci_bus_region region; - - if (rsrc->flags & IORESOURCE_IO) { - pcibios_resource_to_bus(dev->bus, ®ion, - (struct resource *) rsrc); - *start = region.start; - *end = region.end; - return; - } - - /* We pass a CPU physical address to userland for MMIO instead of a - * BAR value because X is lame and expects to be able to use that - * to pass to /dev/mem! - * - * That means we may have 64-bit values where some apps only expect - * 32 (like X itself since it thinks only Sparc has 64-bit MMIO). - */ - *start = rsrc->start; - *end = rsrc->end; -} - -/** - * pci_process_bridge_OF_ranges - Parse PCI bridge resources from device tree - * @hose: newly allocated pci_controller to be setup - * @dev: device node of the host bridge - * @primary: set if primary bus (32 bits only, soon to be deprecated) - * - * This function will parse the "ranges" property of a PCI host bridge device - * node and setup the resource mapping of a pci controller based on its - * content. - * - * Life would be boring if it wasn't for a few issues that we have to deal - * with here: - * - * - We can only cope with one IO space range and up to 3 Memory space - * ranges. However, some machines (thanks Apple !) tend to split their - * space into lots of small contiguous ranges. So we have to coalesce. - * - * - We can only cope with all memory ranges having the same offset - * between CPU addresses and PCI addresses. Unfortunately, some bridges - * are setup for a large 1:1 mapping along with a small "window" which - * maps PCI address 0 to some arbitrary high address of the CPU space in - * order to give access to the ISA memory hole. - * The way out of here that I've chosen for now is to always set the - * offset based on the first resource found, then override it if we - * have a different offset and the previous was set by an ISA hole. - * - * - Some busses have IO space not starting at 0, which causes trouble with - * the way we do our IO resource renumbering. The code somewhat deals with - * it for 64 bits but I would expect problems on 32 bits. - * - * - Some 32 bits platforms such as 4xx can have physical space larger than - * 32 bits so we need to use 64 bits values for the parsing - */ -void pci_process_bridge_OF_ranges(struct pci_controller *hose, - struct device_node *dev, int primary) -{ - int memno = 0, isa_hole = -1; - unsigned long long isa_mb = 0; - struct resource *res; - struct of_pci_range range; - struct of_pci_range_parser parser; - - pr_info("PCI host bridge %pOF %s ranges:\n", - dev, primary ? "(primary)" : ""); - - /* Check for ranges property */ - if (of_pci_range_parser_init(&parser, dev)) - return; - - pr_debug("Parsing ranges property...\n"); - for_each_of_pci_range(&parser, &range) { - /* Read next ranges element */ - - /* If we failed translation or got a zero-sized region - * (some FW try to feed us with non sensical zero sized regions - * such as power3 which look like some kind of attempt - * at exposing the VGA memory hole) - */ - if (range.cpu_addr == OF_BAD_ADDR || range.size == 0) - continue; - - /* Act based on address space type */ - res = NULL; - switch (range.flags & IORESOURCE_TYPE_BITS) { - case IORESOURCE_IO: - pr_info(" IO 0x%016llx..0x%016llx -> 0x%016llx\n", - range.cpu_addr, range.cpu_addr + range.size - 1, - range.pci_addr); - - /* We support only one IO range */ - if (hose->pci_io_size) { - pr_info(" \\--> Skipped (too many) !\n"); - continue; - } - /* On 32 bits, limit I/O space to 16MB */ - if (range.size > 0x01000000) - range.size = 0x01000000; - - /* 32 bits needs to map IOs here */ - hose->io_base_virt = ioremap(range.cpu_addr, - range.size); - - /* Expect trouble if pci_addr is not 0 */ - if (primary) - isa_io_base = - (unsigned long)hose->io_base_virt; - /* pci_io_size and io_base_phys always represent IO - * space starting at 0 so we factor in pci_addr - */ - hose->pci_io_size = range.pci_addr + range.size; - hose->io_base_phys = range.cpu_addr - range.pci_addr; - - /* Build resource */ - res = &hose->io_resource; - range.cpu_addr = range.pci_addr; - - break; - case IORESOURCE_MEM: - pr_info(" MEM 0x%016llx..0x%016llx -> 0x%016llx %s\n", - range.cpu_addr, range.cpu_addr + range.size - 1, - range.pci_addr, - (range.flags & IORESOURCE_PREFETCH) ? - "Prefetch" : ""); - - /* We support only 3 memory ranges */ - if (memno >= 3) { - pr_info(" \\--> Skipped (too many) !\n"); - continue; - } - /* Handles ISA memory hole space here */ - if (range.pci_addr == 0) { - isa_mb = range.cpu_addr; - isa_hole = memno; - if (primary || isa_mem_base == 0) - isa_mem_base = range.cpu_addr; - hose->isa_mem_phys = range.cpu_addr; - hose->isa_mem_size = range.size; - } - - /* We get the PCI/Mem offset from the first range or - * the, current one if the offset came from an ISA - * hole. If they don't match, bugger. - */ - if (memno == 0 || - (isa_hole >= 0 && range.pci_addr != 0 && - hose->pci_mem_offset == isa_mb)) - hose->pci_mem_offset = range.cpu_addr - - range.pci_addr; - else if (range.pci_addr != 0 && - hose->pci_mem_offset != range.cpu_addr - - range.pci_addr) { - pr_info(" \\--> Skipped (offset mismatch) !\n"); - continue; - } - - /* Build resource */ - res = &hose->mem_resources[memno++]; - break; - } - if (res != NULL) { - res->name = dev->full_name; - res->flags = range.flags; - res->start = range.cpu_addr; - res->end = range.cpu_addr + range.size - 1; - res->parent = res->child = res->sibling = NULL; - } - } - - /* If there's an ISA hole and the pci_mem_offset is -not- matching - * the ISA hole offset, then we need to remove the ISA hole from - * the resource list for that brige - */ - if (isa_hole >= 0 && hose->pci_mem_offset != isa_mb) { - unsigned int next = isa_hole + 1; - pr_info(" Removing ISA hole at 0x%016llx\n", isa_mb); - if (next < memno) - memmove(&hose->mem_resources[isa_hole], - &hose->mem_resources[next], - sizeof(struct resource) * (memno - next)); - hose->mem_resources[--memno].flags = 0; - } -} - -/* Display the domain number in /proc */ -int pci_proc_domain(struct pci_bus *bus) -{ - return pci_domain_nr(bus); -} - -/* This header fixup will do the resource fixup for all devices as they are - * probed, but not for bridge ranges - */ -static void pcibios_fixup_resources(struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - int i; - - if (!hose) { - pr_err("No host bridge for PCI dev %s !\n", - pci_name(dev)); - return; - } - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { - struct resource *res = dev->resource + i; - if (!res->flags) - continue; - if (res->start == 0) { - pr_debug("PCI:%s Resource %d %016llx-%016llx [%x]", - pci_name(dev), i, - (unsigned long long)res->start, - (unsigned long long)res->end, - (unsigned int)res->flags); - pr_debug("is unassigned\n"); - res->end -= res->start; - res->start = 0; - res->flags |= IORESOURCE_UNSET; - continue; - } - - pr_debug("PCI:%s Resource %d %016llx-%016llx [%x]\n", - pci_name(dev), i, - (unsigned long long)res->start, - (unsigned long long)res->end, - (unsigned int)res->flags); - } -} -DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_resources); - -int pcibios_device_add(struct pci_dev *dev) -{ - dev->irq = of_irq_parse_and_map_pci(dev, 0, 0); - - return 0; -} - -/* - * Reparent resource children of pr that conflict with res - * under res, and make res replace those children. - */ -static int __init reparent_resources(struct resource *parent, - struct resource *res) -{ - struct resource *p, **pp; - struct resource **firstpp = NULL; - - for (pp = &parent->child; (p = *pp) != NULL; pp = &p->sibling) { - if (p->end < res->start) - continue; - if (res->end < p->start) - break; - if (p->start < res->start || p->end > res->end) - return -1; /* not completely contained */ - if (firstpp == NULL) - firstpp = pp; - } - if (firstpp == NULL) - return -1; /* didn't find any conflicting entries? */ - res->parent = parent; - res->child = *firstpp; - res->sibling = *pp; - *firstpp = res; - *pp = NULL; - for (p = res->child; p != NULL; p = p->sibling) { - p->parent = res; - pr_debug("PCI: Reparented %s [%llx..%llx] under %s\n", - p->name, - (unsigned long long)p->start, - (unsigned long long)p->end, res->name); - } - return 0; -} - -/* - * Handle resources of PCI devices. If the world were perfect, we could - * just allocate all the resource regions and do nothing more. It isn't. - * On the other hand, we cannot just re-allocate all devices, as it would - * require us to know lots of host bridge internals. So we attempt to - * keep as much of the original configuration as possible, but tweak it - * when it's found to be wrong. - * - * Known BIOS problems we have to work around: - * - I/O or memory regions not configured - * - regions configured, but not enabled in the command register - * - bogus I/O addresses above 64K used - * - expansion ROMs left enabled (this may sound harmless, but given - * the fact the PCI specs explicitly allow address decoders to be - * shared between expansion ROMs and other resource regions, it's - * at least dangerous) - * - * Our solution: - * (1) Allocate resources for all buses behind PCI-to-PCI bridges. - * This gives us fixed barriers on where we can allocate. - * (2) Allocate resources for all enabled devices. If there is - * a collision, just mark the resource as unallocated. Also - * disable expansion ROMs during this step. - * (3) Try to allocate resources for disabled devices. If the - * resources were assigned correctly, everything goes well, - * if they weren't, they won't disturb allocation of other - * resources. - * (4) Assign new addresses to resources which were either - * not configured at all or misconfigured. If explicitly - * requested by the user, configure expansion ROM address - * as well. - */ - -static void pcibios_allocate_bus_resources(struct pci_bus *bus) -{ - struct pci_bus *b; - int i; - struct resource *res, *pr; - - pr_debug("PCI: Allocating bus resources for %04x:%02x...\n", - pci_domain_nr(bus), bus->number); - - pci_bus_for_each_resource(bus, res, i) { - if (!res || !res->flags - || res->start > res->end || res->parent) - continue; - if (bus->parent == NULL) - pr = (res->flags & IORESOURCE_IO) ? - &ioport_resource : &iomem_resource; - else { - /* Don't bother with non-root busses when - * re-assigning all resources. We clear the - * resource flags as if they were colliding - * and as such ensure proper re-allocation - * later. - */ - pr = pci_find_parent_resource(bus->self, res); - if (pr == res) { - /* this happens when the generic PCI - * code (wrongly) decides that this - * bridge is transparent -- paulus - */ - continue; - } - } - - pr_debug("PCI: %s (bus %d) bridge rsrc %d: %016llx-%016llx ", - bus->self ? pci_name(bus->self) : "PHB", - bus->number, i, - (unsigned long long)res->start, - (unsigned long long)res->end); - pr_debug("[0x%x], parent %p (%s)\n", - (unsigned int)res->flags, - pr, (pr && pr->name) ? pr->name : "nil"); - - if (pr && !(pr->flags & IORESOURCE_UNSET)) { - struct pci_dev *dev = bus->self; - - if (request_resource(pr, res) == 0) - continue; - /* - * Must be a conflict with an existing entry. - * Move that entry (or entries) under the - * bridge resource and try again. - */ - if (reparent_resources(pr, res) == 0) - continue; - - if (dev && i < PCI_BRIDGE_RESOURCE_NUM && - pci_claim_bridge_resource(dev, - i + PCI_BRIDGE_RESOURCES) == 0) - continue; - - } - pr_warn("PCI: Cannot allocate resource region "); - pr_cont("%d of PCI bridge %d, will remap\n", i, bus->number); - res->start = res->end = 0; - res->flags = 0; - } - - list_for_each_entry(b, &bus->children, node) - pcibios_allocate_bus_resources(b); -} - -static inline void alloc_resource(struct pci_dev *dev, int idx) -{ - struct resource *pr, *r = &dev->resource[idx]; - - pr_debug("PCI: Allocating %s: Resource %d: %016llx..%016llx [%x]\n", - pci_name(dev), idx, - (unsigned long long)r->start, - (unsigned long long)r->end, - (unsigned int)r->flags); - - pr = pci_find_parent_resource(dev, r); - if (!pr || (pr->flags & IORESOURCE_UNSET) || - request_resource(pr, r) < 0) { - pr_warn("PCI: Cannot allocate resource region %d ", idx); - pr_cont("of device %s, will remap\n", pci_name(dev)); - if (pr) - pr_debug("PCI: parent is %p: %016llx-%016llx [%x]\n", - pr, - (unsigned long long)pr->start, - (unsigned long long)pr->end, - (unsigned int)pr->flags); - /* We'll assign a new address later */ - r->flags |= IORESOURCE_UNSET; - r->end -= r->start; - r->start = 0; - } -} - -static void __init pcibios_allocate_resources(int pass) -{ - struct pci_dev *dev = NULL; - int idx, disabled; - u16 command; - struct resource *r; - - for_each_pci_dev(dev) { - pci_read_config_word(dev, PCI_COMMAND, &command); - for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) { - r = &dev->resource[idx]; - if (r->parent) /* Already allocated */ - continue; - if (!r->flags || (r->flags & IORESOURCE_UNSET)) - continue; /* Not assigned at all */ - /* We only allocate ROMs on pass 1 just in case they - * have been screwed up by firmware - */ - if (idx == PCI_ROM_RESOURCE) - disabled = 1; - if (r->flags & IORESOURCE_IO) - disabled = !(command & PCI_COMMAND_IO); - else - disabled = !(command & PCI_COMMAND_MEMORY); - if (pass == disabled) - alloc_resource(dev, idx); - } - if (pass) - continue; - r = &dev->resource[PCI_ROM_RESOURCE]; - if (r->flags) { - /* Turn the ROM off, leave the resource region, - * but keep it unregistered. - */ - u32 reg; - pci_read_config_dword(dev, dev->rom_base_reg, ®); - if (reg & PCI_ROM_ADDRESS_ENABLE) { - pr_debug("PCI: Switching off ROM of %s\n", - pci_name(dev)); - r->flags &= ~IORESOURCE_ROM_ENABLE; - pci_write_config_dword(dev, dev->rom_base_reg, - reg & ~PCI_ROM_ADDRESS_ENABLE); - } - } - } -} - -static void __init pcibios_reserve_legacy_regions(struct pci_bus *bus) -{ - struct pci_controller *hose = pci_bus_to_host(bus); - resource_size_t offset; - struct resource *res, *pres; - int i; - - pr_debug("Reserving legacy ranges for domain %04x\n", - pci_domain_nr(bus)); - - /* Check for IO */ - if (!(hose->io_resource.flags & IORESOURCE_IO)) - goto no_io; - offset = (unsigned long)hose->io_base_virt - _IO_BASE; - res = kzalloc(sizeof(struct resource), GFP_KERNEL); - BUG_ON(res == NULL); - res->name = "Legacy IO"; - res->flags = IORESOURCE_IO; - res->start = offset; - res->end = (offset + 0xfff) & 0xfffffffful; - pr_debug("Candidate legacy IO: %pR\n", res); - if (request_resource(&hose->io_resource, res)) { - pr_debug("PCI %04x:%02x Cannot reserve Legacy IO %pR\n", - pci_domain_nr(bus), bus->number, res); - kfree(res); - } - - no_io: - /* Check for memory */ - offset = hose->pci_mem_offset; - pr_debug("hose mem offset: %016llx\n", (unsigned long long)offset); - for (i = 0; i < 3; i++) { - pres = &hose->mem_resources[i]; - if (!(pres->flags & IORESOURCE_MEM)) - continue; - pr_debug("hose mem res: %pR\n", pres); - if ((pres->start - offset) <= 0xa0000 && - (pres->end - offset) >= 0xbffff) - break; - } - if (i >= 3) - return; - res = kzalloc(sizeof(struct resource), GFP_KERNEL); - BUG_ON(res == NULL); - res->name = "Legacy VGA memory"; - res->flags = IORESOURCE_MEM; - res->start = 0xa0000 + offset; - res->end = 0xbffff + offset; - pr_debug("Candidate VGA memory: %pR\n", res); - if (request_resource(pres, res)) { - pr_debug("PCI %04x:%02x Cannot reserve VGA memory %pR\n", - pci_domain_nr(bus), bus->number, res); - kfree(res); - } -} - -void __init pcibios_resource_survey(void) -{ - struct pci_bus *b; - - /* Allocate and assign resources. If we re-assign everything, then - * we skip the allocate phase - */ - list_for_each_entry(b, &pci_root_buses, node) - pcibios_allocate_bus_resources(b); - - pcibios_allocate_resources(0); - pcibios_allocate_resources(1); - - /* Before we start assigning unassigned resource, we try to reserve - * the low IO area and the VGA memory area if they intersect the - * bus available resources to avoid allocating things on top of them - */ - list_for_each_entry(b, &pci_root_buses, node) - pcibios_reserve_legacy_regions(b); - - /* Now proceed to assigning things that were left unassigned */ - pr_debug("PCI: Assigning unassigned resources...\n"); - pci_assign_unassigned_resources(); -} - -static void pcibios_setup_phb_resources(struct pci_controller *hose, - struct list_head *resources) -{ - unsigned long io_offset; - struct resource *res; - int i; - - /* Hookup PHB IO resource */ - res = &hose->io_resource; - - /* Fixup IO space offset */ - io_offset = (unsigned long)hose->io_base_virt - isa_io_base; - res->start = (res->start + io_offset) & 0xffffffffu; - res->end = (res->end + io_offset) & 0xffffffffu; - - if (!res->flags) { - pr_warn("PCI: I/O resource not set for host "); - pr_cont("bridge %pOF (domain %d)\n", - hose->dn, hose->global_number); - /* Workaround for lack of IO resource only on 32-bit */ - res->start = (unsigned long)hose->io_base_virt - isa_io_base; - res->end = res->start + IO_SPACE_LIMIT; - res->flags = IORESOURCE_IO; - } - pci_add_resource_offset(resources, res, - (__force resource_size_t)(hose->io_base_virt - _IO_BASE)); - - pr_debug("PCI: PHB IO resource = %016llx-%016llx [%lx]\n", - (unsigned long long)res->start, - (unsigned long long)res->end, - (unsigned long)res->flags); - - /* Hookup PHB Memory resources */ - for (i = 0; i < 3; ++i) { - res = &hose->mem_resources[i]; - if (!res->flags) { - if (i > 0) - continue; - pr_err("PCI: Memory resource 0 not set for "); - pr_cont("host bridge %pOF (domain %d)\n", - hose->dn, hose->global_number); - - /* Workaround for lack of MEM resource only on 32-bit */ - res->start = hose->pci_mem_offset; - res->end = (resource_size_t)-1LL; - res->flags = IORESOURCE_MEM; - - } - pci_add_resource_offset(resources, res, hose->pci_mem_offset); - - pr_debug("PCI: PHB MEM resource %d = %016llx-%016llx [%lx]\n", - i, (unsigned long long)res->start, - (unsigned long long)res->end, - (unsigned long)res->flags); - } - - pr_debug("PCI: PHB MEM offset = %016llx\n", - (unsigned long long)hose->pci_mem_offset); - pr_debug("PCI: PHB IO offset = %08lx\n", - (unsigned long)hose->io_base_virt - _IO_BASE); -} - -static void pcibios_scan_phb(struct pci_controller *hose) -{ - LIST_HEAD(resources); - struct pci_bus *bus; - struct device_node *node = hose->dn; - - pr_debug("PCI: Scanning PHB %pOF\n", node); - - pcibios_setup_phb_resources(hose, &resources); - - bus = pci_scan_root_bus(hose->parent, hose->first_busno, - hose->ops, hose, &resources); - if (bus == NULL) { - pr_err("Failed to create bus for PCI domain %04x\n", - hose->global_number); - pci_free_resource_list(&resources); - return; - } - bus->busn_res.start = hose->first_busno; - hose->bus = bus; - - hose->last_busno = bus->busn_res.end; -} - -static int __init pcibios_init(void) -{ - struct pci_controller *hose, *tmp; - int next_busno = 0; - - pr_info("PCI: Probing PCI hardware\n"); - - /* Scan all of the recorded PCI controllers. */ - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { - hose->last_busno = 0xff; - pcibios_scan_phb(hose); - if (next_busno <= hose->last_busno) - next_busno = hose->last_busno + 1; - } - pci_bus_count = next_busno; - - /* Call common code to handle resource allocation */ - pcibios_resource_survey(); - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { - if (hose->bus) - pci_bus_add_devices(hose->bus); - } - - return 0; -} - -subsys_initcall(pcibios_init); - -static struct pci_controller *pci_bus_to_hose(int bus) -{ - struct pci_controller *hose, *tmp; - - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) - if (bus >= hose->first_busno && bus <= hose->last_busno) - return hose; - return NULL; -} - -/* Provide information on locations of various I/O regions in physical - * memory. Do this on a per-card basis so that we choose the right - * root bridge. - * Note that the returned IO or memory base is a physical address - */ - -long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn) -{ - struct pci_controller *hose; - long result = -EOPNOTSUPP; - - hose = pci_bus_to_hose(bus); - if (!hose) - return -ENODEV; - - switch (which) { - case IOBASE_BRIDGE_NUMBER: - return (long)hose->first_busno; - case IOBASE_MEMORY: - return (long)hose->pci_mem_offset; - case IOBASE_IO: - return (long)hose->io_base_phys; - case IOBASE_ISA_IO: - return (long)isa_io_base; - case IOBASE_ISA_MEM: - return (long)isa_mem_base; - } - - return result; -} - -/* - * Null PCI config access functions, for the case when we can't - * find a hose. - */ -#define NULL_PCI_OP(rw, size, type) \ -static int \ -null_##rw##_config_##size(struct pci_dev *dev, int offset, type val) \ -{ \ - return PCIBIOS_DEVICE_NOT_FOUND; \ -} - -static int -null_read_config(struct pci_bus *bus, unsigned int devfn, int offset, - int len, u32 *val) -{ - return PCIBIOS_DEVICE_NOT_FOUND; -} - -static int -null_write_config(struct pci_bus *bus, unsigned int devfn, int offset, - int len, u32 val) -{ - return PCIBIOS_DEVICE_NOT_FOUND; -} - -static struct pci_ops null_pci_ops = { - .read = null_read_config, - .write = null_write_config, -}; - -/* - * These functions are used early on before PCI scanning is done - * and all of the pci_dev and pci_bus structures have been created. - */ -static struct pci_bus * -fake_pci_bus(struct pci_controller *hose, int busnr) -{ - static struct pci_bus bus; - - if (!hose) - pr_err("Can't find hose for PCI bus %d!\n", busnr); - - bus.number = busnr; - bus.sysdata = hose; - bus.ops = hose ? hose->ops : &null_pci_ops; - return &bus; -} - -#define EARLY_PCI_OP(rw, size, type) \ -int early_##rw##_config_##size(struct pci_controller *hose, int bus, \ - int devfn, int offset, type value) \ -{ \ - return pci_bus_##rw##_config_##size(fake_pci_bus(hose, bus), \ - devfn, offset, value); \ -} - -EARLY_PCI_OP(read, byte, u8 *) -EARLY_PCI_OP(read, word, u16 *) -EARLY_PCI_OP(read, dword, u32 *) -EARLY_PCI_OP(write, byte, u8) -EARLY_PCI_OP(write, word, u16) -EARLY_PCI_OP(write, dword, u32) - -int early_find_capability(struct pci_controller *hose, int bus, int devfn, - int cap) -{ - return pci_bus_find_capability(fake_pci_bus(hose, bus), devfn, cap); -} diff --git a/arch/microblaze/pci/xilinx_pci.c b/arch/microblaze/pci/xilinx_pci.c deleted file mode 100644 index f4cb86fffcee..000000000000 --- a/arch/microblaze/pci/xilinx_pci.c +++ /dev/null @@ -1,170 +0,0 @@ -/* - * PCI support for Xilinx plbv46_pci soft-core which can be used on - * Xilinx Virtex ML410 / ML510 boards. - * - * Copyright 2009 Roderick Colenbrander - * Copyright 2009 Secret Lab Technologies Ltd. - * - * The pci bridge fixup code was copied from ppc4xx_pci.c and was written - * by Benjamin Herrenschmidt. - * Copyright 2007 Ben. Herrenschmidt <benh@kernel.crashing.org>, IBM Corp. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#include <linux/ioport.h> -#include <linux/of.h> -#include <linux/of_address.h> -#include <linux/pci.h> -#include <linux/io.h> - -#define XPLB_PCI_ADDR 0x10c -#define XPLB_PCI_DATA 0x110 -#define XPLB_PCI_BUS 0x114 - -#define PCI_HOST_ENABLE_CMD (PCI_COMMAND_SERR | PCI_COMMAND_PARITY | \ - PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY) - -static const struct of_device_id xilinx_pci_match[] = { - { .compatible = "xlnx,plbv46-pci-1.03.a", }, - {} -}; - -/** - * xilinx_pci_fixup_bridge - Block Xilinx PHB configuration. - */ -static void xilinx_pci_fixup_bridge(struct pci_dev *dev) -{ - struct pci_controller *hose; - int i; - - if (dev->devfn || dev->bus->self) - return; - - hose = pci_bus_to_host(dev->bus); - if (!hose) - return; - - if (!of_match_node(xilinx_pci_match, hose->dn)) - return; - - /* Hide the PCI host BARs from the kernel as their content doesn't - * fit well in the resource management - */ - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { - dev->resource[i].start = 0; - dev->resource[i].end = 0; - dev->resource[i].flags = 0; - } - - dev_info(&dev->dev, "Hiding Xilinx plb-pci host bridge resources %s\n", - pci_name(dev)); -} -DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, xilinx_pci_fixup_bridge); - -#ifdef DEBUG -/** - * xilinx_pci_exclude_device - Don't do config access for non-root bus - * - * This is a hack. Config access to any bus other than bus 0 does not - * currently work on the ML510 so we prevent it here. - */ -static int -xilinx_pci_exclude_device(struct pci_controller *hose, u_char bus, u8 devfn) -{ - return (bus != 0); -} - -/** - * xilinx_early_pci_scan - List pci config space for available devices - * - * List pci devices in very early phase. - */ -static void __init xilinx_early_pci_scan(struct pci_controller *hose) -{ - u32 bus = 0; - u32 val, dev, func, offset; - - /* Currently we have only 2 device connected - up-to 32 devices */ - for (dev = 0; dev < 2; dev++) { - /* List only first function number - up-to 8 functions */ - for (func = 0; func < 1; func++) { - pr_info("%02x:%02x:%02x", bus, dev, func); - /* read the first 64 standardized bytes */ - /* Up-to 192 bytes can be list of capabilities */ - for (offset = 0; offset < 64; offset += 4) { - early_read_config_dword(hose, bus, - PCI_DEVFN(dev, func), offset, &val); - if (offset == 0 && val == 0xFFFFFFFF) { - pr_cont("\nABSENT"); - break; - } - if (!(offset % 0x10)) - pr_cont("\n%04x: ", offset); - - pr_cont("%08x ", val); - } - pr_info("\n"); - } - } -} -#else -static void __init xilinx_early_pci_scan(struct pci_controller *hose) -{ -} -#endif - -/** - * xilinx_pci_init - Find and register a Xilinx PCI host bridge - */ -void __init xilinx_pci_init(void) -{ - struct pci_controller *hose; - struct resource r; - void __iomem *pci_reg; - struct device_node *pci_node; - - pci_node = of_find_matching_node(NULL, xilinx_pci_match); - if (!pci_node) - return; - - if (of_address_to_resource(pci_node, 0, &r)) { - pr_err("xilinx-pci: cannot resolve base address\n"); - return; - } - - hose = pcibios_alloc_controller(pci_node); - if (!hose) { - pr_err("xilinx-pci: pcibios_alloc_controller() failed\n"); - return; - } - - /* Setup config space */ - setup_indirect_pci(hose, r.start + XPLB_PCI_ADDR, - r.start + XPLB_PCI_DATA, - INDIRECT_TYPE_SET_CFG_TYPE); - - /* According to the xilinx plbv46_pci documentation the soft-core starts - * a self-init when the bus master enable bit is set. Without this bit - * set the pci bus can't be scanned. - */ - early_write_config_word(hose, 0, 0, PCI_COMMAND, PCI_HOST_ENABLE_CMD); - - /* Set the max latency timer to 255 */ - early_write_config_byte(hose, 0, 0, PCI_LATENCY_TIMER, 0xff); - - /* Set the max bus number to 255, and bus/subbus no's to 0 */ - pci_reg = of_iomap(pci_node, 0); - WARN_ON(!pci_reg); - out_be32(pci_reg + XPLB_PCI_BUS, 0x000000ff); - iounmap(pci_reg); - - /* Register the host bridge with the linux kernel! */ - pci_process_bridge_OF_ranges(hose, pci_node, - INDIRECT_TYPE_SET_CFG_TYPE); - - pr_info("xilinx-pci: Registered PCI host bridge\n"); - xilinx_early_pci_scan(hose); -} diff --git a/arch/mips/alchemy/common/gpiolib.c b/arch/mips/alchemy/common/gpiolib.c index a17d7a8909c4..1b16daaa86ae 100644 --- a/arch/mips/alchemy/common/gpiolib.c +++ b/arch/mips/alchemy/common/gpiolib.c @@ -31,7 +31,7 @@ #include <linux/init.h> #include <linux/kernel.h> #include <linux/types.h> -#include <linux/gpio.h> +#include <linux/gpio/driver.h> #include <asm/mach-au1x00/gpio-au1000.h> #include <asm/mach-au1x00/gpio-au1300.h> diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index 5b38a802e101..c5dd415254d3 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c @@ -9,6 +9,7 @@ #define DISABLE_BRANCH_PROFILING +#define __NO_FORTIFY #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> diff --git a/arch/mips/include/asm/fw/fw.h b/arch/mips/include/asm/fw/fw.h index d0ef8b4892bb..d0494ce4b337 100644 --- a/arch/mips/include/asm/fw/fw.h +++ b/arch/mips/include/asm/fw/fw.h @@ -26,6 +26,6 @@ extern char *fw_getcmdline(void); extern void fw_meminit(void); extern char *fw_getenv(char *name); extern unsigned long fw_getenvl(char *name); -extern void fw_init_early_console(char port); +extern void fw_init_early_console(void); #endif /* __ASM_FW_H_ */ diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 6caec386ad2f..4678627673df 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -622,6 +622,7 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return pmd; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_ACCESSED); diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c index 71a882c8c6eb..f7978d50a2ba 100644 --- a/arch/mips/kernel/jump_label.c +++ b/arch/mips/kernel/jump_label.c @@ -56,7 +56,7 @@ void arch_jump_label_transform(struct jump_entry *e, * The branch offset must fit in the instruction's 26 * bit field. */ - WARN_ON((offset >= BIT(25)) || + WARN_ON((offset >= (long)BIT(25)) || (offset < -(long)BIT(25))); insn.j_format.opcode = bc6_op; diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S index cfde14b48fd8..f5b2ef979b43 100644 --- a/arch/mips/kernel/relocate_kernel.S +++ b/arch/mips/kernel/relocate_kernel.S @@ -145,8 +145,7 @@ LEAF(kexec_smp_wait) * kexec_args[0..3] are used to prepare register values. */ -kexec_args: - EXPORT(kexec_args) +EXPORT(kexec_args) arg0: PTR_WD 0x0 arg1: PTR_WD 0x0 arg2: PTR_WD 0x0 @@ -159,8 +158,7 @@ arg3: PTR_WD 0x0 * their registers a0-a3. secondary_kexec_args[0..3] are used * to prepare register values. */ -secondary_kexec_args: - EXPORT(secondary_kexec_args) +EXPORT(secondary_kexec_args) s_arg0: PTR_WD 0x0 s_arg1: PTR_WD 0x0 s_arg2: PTR_WD 0x0 @@ -171,19 +169,16 @@ kexec_flag: #endif -kexec_start_address: - EXPORT(kexec_start_address) +EXPORT(kexec_start_address) PTR_WD 0x0 .size kexec_start_address, PTRSIZE -kexec_indirection_page: - EXPORT(kexec_indirection_page) +EXPORT(kexec_indirection_page) PTR_WD 0 .size kexec_indirection_page, PTRSIZE relocate_new_kernel_end: -relocate_new_kernel_size: - EXPORT(relocate_new_kernel_size) +EXPORT(relocate_new_kernel_size) PTR_WD relocate_new_kernel_end - relocate_new_kernel .size relocate_new_kernel_size, PTRSIZE diff --git a/arch/mips/loongson64/reset.c b/arch/mips/loongson64/reset.c index 758d5d26aaaa..e420800043b0 100644 --- a/arch/mips/loongson64/reset.c +++ b/arch/mips/loongson64/reset.c @@ -16,6 +16,7 @@ #include <asm/bootinfo.h> #include <asm/idle.h> #include <asm/reboot.h> +#include <asm/bug.h> #include <loongson.h> #include <boot_param.h> @@ -159,8 +160,17 @@ static int __init mips_reboot_setup(void) #ifdef CONFIG_KEXEC kexec_argv = kmalloc(KEXEC_ARGV_SIZE, GFP_KERNEL); + if (WARN_ON(!kexec_argv)) + return -ENOMEM; + kdump_argv = kmalloc(KEXEC_ARGV_SIZE, GFP_KERNEL); + if (WARN_ON(!kdump_argv)) + return -ENOMEM; + kexec_envp = kmalloc(KEXEC_ENVP_SIZE, GFP_KERNEL); + if (WARN_ON(!kexec_envp)) + return -ENOMEM; + fw_arg1 = KEXEC_ARGV_ADDR; memcpy(kexec_envp, (void *)fw_arg2, KEXEC_ENVP_SIZE); diff --git a/arch/mips/pic32/pic32mzda/early_console.c b/arch/mips/pic32/pic32mzda/early_console.c index 25372e62783b..3cd1b408fa1c 100644 --- a/arch/mips/pic32/pic32mzda/early_console.c +++ b/arch/mips/pic32/pic32mzda/early_console.c @@ -27,7 +27,7 @@ #define U_BRG(x) (UART_BASE(x) + 0x40) static void __iomem *uart_base; -static char console_port = -1; +static int console_port = -1; static int __init configure_uart_pins(int port) { @@ -47,7 +47,7 @@ static int __init configure_uart_pins(int port) return 0; } -static void __init configure_uart(char port, int baud) +static void __init configure_uart(int port, int baud) { u32 pbclk; @@ -60,7 +60,7 @@ static void __init configure_uart(char port, int baud) uart_base + PIC32_SET(U_STA(port))); } -static void __init setup_early_console(char port, int baud) +static void __init setup_early_console(int port, int baud) { if (configure_uart_pins(port)) return; @@ -130,16 +130,15 @@ _out: return baud; } -void __init fw_init_early_console(char port) +void __init fw_init_early_console(void) { char *arch_cmdline = pic32_getcmdline(); - int baud = -1; + int baud, port; uart_base = ioremap(PIC32_BASE_UART, 0xc00); baud = get_baud_from_cmdline(arch_cmdline); - if (port == -1) - port = get_port_from_cmdline(arch_cmdline); + port = get_port_from_cmdline(arch_cmdline); if (port == -1) port = EARLY_CONSOLE_PORT; diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c index 08c46cf122d7..53b227a9074c 100644 --- a/arch/mips/pic32/pic32mzda/init.c +++ b/arch/mips/pic32/pic32mzda/init.c @@ -47,7 +47,7 @@ void __init plat_mem_setup(void) strscpy(arcs_cmdline, boot_command_line, COMMAND_LINE_SIZE); #ifdef CONFIG_EARLY_PRINTK - fw_init_early_console(-1); + fw_init_early_console(); #endif pic32_config_init(); } diff --git a/arch/nios2/boot/Makefile b/arch/nios2/boot/Makefile index 8c3ad76602f3..29c11a06b750 100644 --- a/arch/nios2/boot/Makefile +++ b/arch/nios2/boot/Makefile @@ -20,7 +20,7 @@ $(obj)/vmlinux.bin: vmlinux FORCE $(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE $(call if_changed,gzip) -$(obj)/vmImage: $(obj)/vmlinux.gz +$(obj)/vmImage: $(obj)/vmlinux.gz FORCE $(call if_changed,uimage) @$(kecho) 'Kernel: $@ is ready' diff --git a/arch/openrisc/configs/or1ksim_defconfig b/arch/openrisc/configs/or1ksim_defconfig index 6e1e004047c7..0116e465238f 100644 --- a/arch/openrisc/configs/or1ksim_defconfig +++ b/arch/openrisc/configs/or1ksim_defconfig @@ -10,7 +10,8 @@ CONFIG_EXPERT=y # CONFIG_AIO is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y CONFIG_MODULES=y # CONFIG_BLOCK is not set CONFIG_OPENRISC_BUILTIN_DTB="or1ksim" diff --git a/arch/openrisc/configs/simple_smp_defconfig b/arch/openrisc/configs/simple_smp_defconfig index ff49d868e040..b990cb6c9309 100644 --- a/arch/openrisc/configs/simple_smp_defconfig +++ b/arch/openrisc/configs/simple_smp_defconfig @@ -16,7 +16,8 @@ CONFIG_EXPERT=y # CONFIG_AIO is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y CONFIG_MODULES=y # CONFIG_BLOCK is not set CONFIG_OPENRISC_BUILTIN_DTB="simple_smp" diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 4745bb9998bd..6d8492b6e2b8 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -602,6 +602,7 @@ ____##func(struct pt_regs *regs) /* kernel/traps.c */ DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception); #ifdef CONFIG_PPC_BOOK3S_64 +DECLARE_INTERRUPT_HANDLER_RAW(machine_check_early_boot); DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async); #endif DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception); diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 7786e3ac7611..8c3862b4c259 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -142,7 +142,7 @@ SECTIONS #endif .data.rel.ro : AT(ADDR(.data.rel.ro) - LOAD_OFFSET) { - *(.data.rel.ro*) + *(.data.rel.ro .data.rel.ro.*) } .branch_lt : AT(ADDR(.branch_lt) - LOAD_OFFSET) { diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 43f1c76d48ce..a379b0ce19ff 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -113,23 +113,19 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; - /* First arg comes in as a 32 bits pointer. */ - EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3)); - EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0)); + /* Initialize tail_call_cnt, to be skipped if we do tail calls. */ + EMIT(PPC_RAW_LI(_R4, 0)); + +#define BPF_TAILCALL_PROLOGUE_SIZE 4 + EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); - /* - * Initialize tail_call_cnt in stack frame if we do tail calls. - * Otherwise, put in NOPs so that it can be skipped when we are - * invoked through a tail call. - */ if (ctx->seen & SEEN_TAILCALL) - EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_1) - 1, _R1, - bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); - else - EMIT(PPC_RAW_NOP()); + EMIT(PPC_RAW_STW(_R4, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); -#define BPF_TAILCALL_PROLOGUE_SIZE 16 + /* First arg comes in as a 32 bits pointer. */ + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3)); + EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0)); /* * We need a stack frame, but we don't necessarily need to @@ -170,24 +166,24 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx for (i = BPF_PPC_NVR_MIN; i <= 31; i++) if (bpf_is_seen_register(ctx, i)) EMIT(PPC_RAW_LWZ(i, _R1, bpf_jit_stack_offsetof(ctx, i))); -} - -void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) -{ - EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); - - bpf_jit_emit_common_epilogue(image, ctx); - - /* Tear down our stack frame */ if (ctx->seen & SEEN_FUNC) EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + /* Tear down our stack frame */ EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx))); if (ctx->seen & SEEN_FUNC) EMIT(PPC_RAW_MTLR(_R0)); +} + +void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) +{ + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); + + bpf_jit_emit_common_epilogue(image, ctx); + EMIT(PPC_RAW_BLR()); } @@ -244,7 +240,6 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29)); EMIT(PPC_RAW_ADD(_R3, _R3, b2p_bpf_array)); EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_array, ptrs))); - EMIT(PPC_RAW_STW(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); /* * if (prog == NULL) @@ -255,19 +250,14 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o /* goto *(prog->bpf_func + prologue_size); */ EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func))); - - if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); - EMIT(PPC_RAW_ADDIC(_R3, _R3, BPF_TAILCALL_PROLOGUE_SIZE)); - - if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_MTLR(_R0)); - EMIT(PPC_RAW_MTCTR(_R3)); EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_1))); + /* Put tail_call_cnt in r4 */ + EMIT(PPC_RAW_MR(_R4, _R0)); + /* tear restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index fa78595a6089..593cf09264d8 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -317,9 +317,9 @@ config SMP config NR_CPUS int "Maximum number of CPUs (2-512)" depends on SMP - range 2 512 if !SBI_V01 - range 2 32 if SBI_V01 && 32BIT - range 2 64 if SBI_V01 && 64BIT + range 2 512 if !RISCV_SBI_V01 + range 2 32 if RISCV_SBI_V01 && 32BIT + range 2 64 if RISCV_SBI_V01 && 64BIT default "32" if 32BIT default "64" if 64BIT diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index ced0d4e47938..900a50526d77 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -3,6 +3,8 @@ #include "fu540-c000.dtsi" #include <dt-bindings/gpio/gpio.h> +#include <dt-bindings/leds/common.h> +#include <dt-bindings/pwm/pwm.h> /* Clock frequency (in Hz) of the PCB crystal for rtcclk */ #define RTCCLK_FREQ 1000000 @@ -42,6 +44,42 @@ compatible = "gpio-restart"; gpios = <&gpio 10 GPIO_ACTIVE_LOW>; }; + + led-controller { + compatible = "pwm-leds"; + + led-d1 { + pwms = <&pwm0 0 7812500 PWM_POLARITY_INVERTED>; + active-low; + color = <LED_COLOR_ID_GREEN>; + max-brightness = <255>; + label = "d1"; + }; + + led-d2 { + pwms = <&pwm0 1 7812500 PWM_POLARITY_INVERTED>; + active-low; + color = <LED_COLOR_ID_GREEN>; + max-brightness = <255>; + label = "d2"; + }; + + led-d3 { + pwms = <&pwm0 2 7812500 PWM_POLARITY_INVERTED>; + active-low; + color = <LED_COLOR_ID_GREEN>; + max-brightness = <255>; + label = "d3"; + }; + + led-d4 { + pwms = <&pwm0 3 7812500 PWM_POLARITY_INVERTED>; + active-low; + color = <LED_COLOR_ID_GREEN>; + max-brightness = <255>; + label = "d4"; + }; + }; }; &uart0 { diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig index 96fe8def644c..79b3ccd58ff0 100644 --- a/arch/riscv/configs/nommu_k210_defconfig +++ b/arch/riscv/configs/nommu_k210_defconfig @@ -25,7 +25,8 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y # CONFIG_MMU is not set CONFIG_SOC_CANAAN=y CONFIG_NONPORTABLE=y diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig index 379740654373..6b80bb13b8ed 100644 --- a/arch/riscv/configs/nommu_k210_sdcard_defconfig +++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig @@ -17,7 +17,8 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y # CONFIG_MMU is not set CONFIG_SOC_CANAAN=y CONFIG_NONPORTABLE=y diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig index 1a56eda5ce46..4cf0f297091e 100644 --- a/arch/riscv/configs/nommu_virt_defconfig +++ b/arch/riscv/configs/nommu_virt_defconfig @@ -22,7 +22,8 @@ CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y # CONFIG_MMU is not set CONFIG_SOC_VIRT=y CONFIG_NONPORTABLE=y diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 1b471ff73178..816e753de636 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -23,6 +23,7 @@ #define REG_L __REG_SEL(ld, lw) #define REG_S __REG_SEL(sd, sw) #define REG_SC __REG_SEL(sc.d, sc.w) +#define REG_AMOSWAP_AQ __REG_SEL(amoswap.d.aq, amoswap.w.aq) #define REG_ASM __REG_SEL(.dword, .word) #define SZREG __REG_SEL(8, 4) #define LGREG __REG_SEL(3, 2) diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h index f74879a8f1ea..e229d7be4b66 100644 --- a/arch/riscv/include/asm/efi.h +++ b/arch/riscv/include/asm/efi.h @@ -10,6 +10,7 @@ #include <asm/mmu_context.h> #include <asm/ptrace.h> #include <asm/tlbflush.h> +#include <asm/pgalloc.h> #ifdef CONFIG_EFI extern void efi_init(void); @@ -20,7 +21,10 @@ extern void efi_init(void); int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); -#define arch_efi_call_virt_setup() efi_virtmap_load() +#define arch_efi_call_virt_setup() ({ \ + sync_kernel_mappings(efi_mm.pgd); \ + efi_virtmap_load(); \ + }) #define arch_efi_call_virt_teardown() efi_virtmap_unload() #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE) diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index 947f23d7b6af..59dc12b5b7e8 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -127,6 +127,13 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) #define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) #endif /* __PAGETABLE_PMD_FOLDED */ +static inline void sync_kernel_mappings(pgd_t *pgd) +{ + memcpy(pgd + USER_PTRS_PER_PGD, + init_mm.pgd + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); +} + static inline pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; @@ -135,9 +142,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) if (likely(pgd != NULL)) { memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); /* Copy kernel mappings */ - memcpy(pgd + USER_PTRS_PER_PGD, - init_mm.pgd + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + sync_kernel_mappings(pgd); } return pgd; } diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 7ec936910a96..92ec2d9d7273 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -600,6 +600,7 @@ static inline int pmd_dirty(pmd_t pmd) return pte_dirty(pmd_pte(pmd)); } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return pte_young(pmd_pte(pmd)); diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index d3443be7eedc..3831b638ecab 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -50,6 +50,9 @@ void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops); /* Clear IPI for current CPU */ void riscv_clear_ipi(void); +/* Check other CPUs stop or not */ +bool smp_crash_stop_failed(void); + /* Secondary hart entry */ asmlinkage void smp_callin(void); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index b9eda3fcbd6d..186abd146eaf 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -404,6 +404,19 @@ handle_syscall_trace_exit: #ifdef CONFIG_VMAP_STACK handle_kernel_stack_overflow: + /* + * Takes the psuedo-spinlock for the shadow stack, in case multiple + * harts are concurrently overflowing their kernel stacks. We could + * store any value here, but since we're overflowing the kernel stack + * already we only have SP to use as a scratch register. So we just + * swap in the address of the spinlock, as that's definately non-zero. + * + * Pairs with a store_release in handle_bad_stack(). + */ +1: la sp, spin_shadow_stack + REG_AMOSWAP_AQ sp, sp, (sp) + bnez sp, 1b + la sp, shadow_stack addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index ee79e6839b86..2d139b724bc8 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -15,6 +15,8 @@ #include <linux/compiler.h> /* For unreachable() */ #include <linux/cpu.h> /* For cpu_down() */ #include <linux/reboot.h> +#include <linux/interrupt.h> +#include <linux/irq.h> /* * kexec_image_info - Print received image details @@ -138,20 +140,35 @@ void machine_shutdown(void) #endif } -/* Override the weak function in kernel/panic.c */ -void crash_smp_send_stop(void) +static void machine_kexec_mask_interrupts(void) { - static int cpus_stopped; + unsigned int i; + struct irq_desc *desc; - /* - * This function can be called twice in panic path, but obviously - * we execute this only once. - */ - if (cpus_stopped) - return; + for_each_irq_desc(i, desc) { + struct irq_chip *chip; + int ret; + + chip = irq_desc_get_chip(desc); + if (!chip) + continue; + + /* + * First try to remove the active state. If this + * fails, try to EOI the interrupt. + */ + ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); + + if (ret && irqd_irq_inprogress(&desc->irq_data) && + chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); - smp_send_stop(); - cpus_stopped = 1; + if (chip->irq_mask) + chip->irq_mask(&desc->irq_data); + + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) + chip->irq_disable(&desc->irq_data); + } } /* @@ -169,6 +186,8 @@ machine_crash_shutdown(struct pt_regs *regs) crash_smp_send_stop(); crash_save_cpu(regs, smp_processor_id()); + machine_kexec_mask_interrupts(); + pr_info("Starting crashdump kernel...\n"); } @@ -195,6 +214,11 @@ machine_kexec(struct kimage *image) void *control_code_buffer = page_address(image->control_code_page); riscv_kexec_method kexec_method = NULL; +#ifdef CONFIG_SMP + WARN(smp_crash_stop_failed(), + "Some CPUs may be stale, kdump will be unreliable.\n"); +#endif + if (image->type != KEXEC_TYPE_CRASH) kexec_method = control_code_buffer; else diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index b0c63e8e867e..8955f2432c2d 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -164,6 +164,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); + memset(&p->thread.s, 0, sizeof(p->thread.s)); + /* p->thread holds context to be restored by __switch_to() */ if (unlikely(args->fn)) { /* Kernel thread */ diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index ad76bb59b059..86acd690d529 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -283,6 +283,7 @@ void __init setup_arch(char **cmdline_p) else pr_err("No DTB found in kernel mappings\n"); #endif + early_init_fdt_scan_reserved_mem(); misc_mem_init(); init_resources(); @@ -321,10 +322,11 @@ subsys_initcall(topology_init); void free_initmem(void) { - if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) - set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), - IS_ENABLED(CONFIG_64BIT) ? - set_memory_rw : set_memory_rw_nx); + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { + set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), set_memory_rw_nx); + if (IS_ENABLED(CONFIG_64BIT)) + set_kernel_memory(__init_begin, __init_end, set_memory_nx); + } free_initmem_default(POISON_FREE_INITMEM); } diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 760a64518c58..8c3b59f1f9b8 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -12,6 +12,7 @@ #include <linux/clockchips.h> #include <linux/interrupt.h> #include <linux/module.h> +#include <linux/kexec.h> #include <linux/profile.h> #include <linux/smp.h> #include <linux/sched.h> @@ -22,11 +23,13 @@ #include <asm/sbi.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> +#include <asm/cpu_ops.h> enum ipi_message_type { IPI_RESCHEDULE, IPI_CALL_FUNC, IPI_CPU_STOP, + IPI_CPU_CRASH_STOP, IPI_IRQ_WORK, IPI_TIMER, IPI_MAX @@ -71,6 +74,32 @@ static void ipi_stop(void) wait_for_interrupt(); } +#ifdef CONFIG_KEXEC_CORE +static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0); + +static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) +{ + crash_save_cpu(regs, cpu); + + atomic_dec(&waiting_for_crash_ipi); + + local_irq_disable(); + +#ifdef CONFIG_HOTPLUG_CPU + if (cpu_has_hotplug(cpu)) + cpu_ops[cpu]->cpu_stop(); +#endif + + for(;;) + wait_for_interrupt(); +} +#else +static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) +{ + unreachable(); +} +#endif + static const struct riscv_ipi_ops *ipi_ops __ro_after_init; void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops) @@ -124,8 +153,9 @@ void arch_irq_work_raise(void) void handle_IPI(struct pt_regs *regs) { - unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits; - unsigned long *stats = ipi_data[smp_processor_id()].stats; + unsigned int cpu = smp_processor_id(); + unsigned long *pending_ipis = &ipi_data[cpu].bits; + unsigned long *stats = ipi_data[cpu].stats; riscv_clear_ipi(); @@ -154,6 +184,10 @@ void handle_IPI(struct pt_regs *regs) ipi_stop(); } + if (ops & (1 << IPI_CPU_CRASH_STOP)) { + ipi_cpu_crash_stop(cpu, get_irq_regs()); + } + if (ops & (1 << IPI_IRQ_WORK)) { stats[IPI_IRQ_WORK]++; irq_work_run(); @@ -176,6 +210,7 @@ static const char * const ipi_names[] = { [IPI_RESCHEDULE] = "Rescheduling interrupts", [IPI_CALL_FUNC] = "Function call interrupts", [IPI_CPU_STOP] = "CPU stop interrupts", + [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", [IPI_IRQ_WORK] = "IRQ work interrupts", [IPI_TIMER] = "Timer broadcast interrupts", }; @@ -235,6 +270,64 @@ void smp_send_stop(void) cpumask_pr_args(cpu_online_mask)); } +#ifdef CONFIG_KEXEC_CORE +/* + * The number of CPUs online, not counting this CPU (which may not be + * fully online and so not counted in num_online_cpus()). + */ +static inline unsigned int num_other_online_cpus(void) +{ + unsigned int this_cpu_online = cpu_online(smp_processor_id()); + + return num_online_cpus() - this_cpu_online; +} + +void crash_smp_send_stop(void) +{ + static int cpus_stopped; + cpumask_t mask; + unsigned long timeout; + + /* + * This function can be called twice in panic path, but obviously + * we execute this only once. + */ + if (cpus_stopped) + return; + + cpus_stopped = 1; + + /* + * If this cpu is the only one alive at this point in time, online or + * not, there are no stop messages to be sent around, so just back out. + */ + if (num_other_online_cpus() == 0) + return; + + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + + atomic_set(&waiting_for_crash_ipi, num_other_online_cpus()); + + pr_crit("SMP: stopping secondary CPUs\n"); + send_ipi_mask(&mask, IPI_CPU_CRASH_STOP); + + /* Wait up to one second for other CPUs to stop */ + timeout = USEC_PER_SEC; + while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--) + udelay(1); + + if (atomic_read(&waiting_for_crash_ipi) > 0) + pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", + cpumask_pr_args(&mask)); +} + +bool smp_crash_stop_failed(void) +{ + return (atomic_read(&waiting_for_crash_ipi) > 0); +} +#endif + void smp_send_reschedule(int cpu) { send_ipi_single(cpu, IPI_RESCHEDULE); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index f3e96d60a2ff..7abd8e4c4df6 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -221,11 +221,29 @@ asmlinkage unsigned long get_overflow_stack(void) OVERFLOW_STACK_SIZE; } +/* + * A pseudo spinlock to protect the shadow stack from being used by multiple + * harts concurrently. This isn't a real spinlock because the lock side must + * be taken without a valid stack and only a single register, it's only taken + * while in the process of panicing anyway so the performance and error + * checking a proper spinlock gives us doesn't matter. + */ +unsigned long spin_shadow_stack; + asmlinkage void handle_bad_stack(struct pt_regs *regs) { unsigned long tsk_stk = (unsigned long)current->stack; unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack); + /* + * We're done with the shadow stack by this point, as we're on the + * overflow stack. Tell any other concurrent overflowing harts that + * they can proceed with panicing by releasing the pseudo-spinlock. + * + * This pairs with an amoswap.aq in handle_kernel_stack_overflow. + */ + smp_store_release(&spin_shadow_stack, 0); + console_verbose(); pr_emerg("Insufficient stack space to handle exception!\n"); diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index f2e065671e4d..06e6b27f3bcc 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -17,6 +17,7 @@ vdso-syms += flush_icache obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o ccflags-y := -fno-stack-protector +ccflags-y += -DDISABLE_BRANCH_PROFILING ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) @@ -28,9 +29,12 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) obj-y += vdso.o CPPFLAGS_vdso.lds += -P -C -U$(ARCH) +ifneq ($(filter vgettimeofday, $(vdso-syms)),) +CPPFLAGS_vdso.lds += -DHAS_VGETTIMEOFDAY +endif # Disable -pg to prevent insert call site -CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) # Disable profiling and instrumentation for VDSO code GCOV_PROFILE := n diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 01d94aae5bf5..150b1a572e61 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -68,9 +68,11 @@ VERSION LINUX_4.15 { global: __vdso_rt_sigreturn; +#ifdef HAS_VGETTIMEOFDAY __vdso_gettimeofday; __vdso_clock_gettime; __vdso_clock_getres; +#endif __vdso_getcpu; __vdso_flush_icache; local: *; diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index b56a0a75533f..50a1b6edd491 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -262,7 +262,6 @@ static void __init setup_bootmem(void) memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); } - early_init_fdt_scan_reserved_mem(); dma_contiguous_reserve(dma32_phys_limit); if (IS_ENABLED(CONFIG_64BIT)) hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 318fce77601d..48a11deb502c 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -73,6 +73,7 @@ config S390 select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV select ARCH_HAS_MEM_ENCRYPT + select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SCALED_CPUTIME select ARCH_HAS_SET_MEMORY @@ -568,8 +569,7 @@ config EXPOLINE_FULL endchoice config RELOCATABLE - bool "Build a relocatable kernel" - default y + def_bool y help This builds a kernel image that retains relocation information so it can be loaded at an arbitrary address. @@ -578,10 +578,11 @@ config RELOCATABLE bootup process. The relocations make the kernel image about 15% larger (compressed 10%), but are discarded at runtime. + Note: this option exists only for documentation purposes, please do + not remove it. config RANDOMIZE_BASE bool "Randomize the address of the kernel image (KASLR)" - depends on RELOCATABLE default y help In support of Kernel Address Space Layout Randomization (KASLR), diff --git a/arch/s390/Makefile b/arch/s390/Makefile index de6d8b2ea4d8..b3235ab0ace8 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -14,10 +14,8 @@ KBUILD_AFLAGS_MODULE += -fPIC KBUILD_CFLAGS_MODULE += -fPIC KBUILD_AFLAGS += -m64 KBUILD_CFLAGS += -m64 -ifeq ($(CONFIG_RELOCATABLE),y) KBUILD_CFLAGS += -fPIE LDFLAGS_vmlinux := -pie -endif aflags_dwarf := -Wa,-gdwarf-2 KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__ ifndef CONFIG_AS_IS_LLVM diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 883357a211a3..d52c3e2e16bc 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -37,9 +37,8 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o -obj-y += version.o pgm_check_info.o ctype.o ipl_data.o +obj-y += version.o pgm_check_info.o ctype.o ipl_data.o machine_kexec_reloc.o obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o -obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 6e7f01ca53e6..47ca3264c023 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -291,8 +291,7 @@ void startup_kernel(void) clear_bss_section(); copy_bootdata(); - if (IS_ENABLED(CONFIG_RELOCATABLE)) - handle_relocs(__kaslr_offset); + handle_relocs(__kaslr_offset); if (__kaslr_offset) { /* diff --git a/arch/s390/configs/btf.config b/arch/s390/configs/btf.config new file mode 100644 index 000000000000..39227b4511af --- /dev/null +++ b/arch/s390/configs/btf.config @@ -0,0 +1 @@ +CONFIG_DEBUG_INFO_BTF=y diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 2a827002934b..63807bd0b536 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -723,52 +723,42 @@ CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_SM2=m CONFIG_CRYPTO_CURVE25519=m -CONFIG_CRYPTO_GCM=y -CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_SEQIV=y -CONFIG_CRYPTO_CFB=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_ADIANTUM=m -CONFIG_CRYPTO_HCTR2=m -CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_CRC32_S390=y -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m -CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SHA3_256_S390=m -CONFIG_CRYPTO_SHA3_512_S390=m -CONFIG_CRYPTO_SM3_GENERIC=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_TI=m -CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m -CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_SEED=m -CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_CFB=m +CONFIG_CRYPTO_HCTR2=m +CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_AEGIS128=m +CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_GCM=y +CONFIG_CRYPTO_SEQIV=y +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3_GENERIC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m @@ -779,6 +769,16 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y +CONFIG_CRYPTO_CRC32_S390=y +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m +CONFIG_CRYPTO_SHA3_256_S390=m +CONFIG_CRYPTO_SHA3_512_S390=m +CONFIG_CRYPTO_GHASH_S390=m +CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_DES_S390=m +CONFIG_CRYPTO_CHACHA_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@ -797,7 +797,6 @@ CONFIG_CMA_SIZE_MBYTES=0 CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO_DWARF4=y -CONFIG_DEBUG_INFO_BTF=y CONFIG_GDB_SCRIPTS=y CONFIG_HEADERS_INSTALL=y CONFIG_DEBUG_SECTION_MISMATCH=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index fb780e80e4c8..4f9a98247442 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -707,53 +707,43 @@ CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_SM2=m CONFIG_CRYPTO_CURVE25519=m -CONFIG_CRYPTO_GCM=y -CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_SEQIV=y -CONFIG_CRYPTO_CFB=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_OFB=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_ADIANTUM=m -CONFIG_CRYPTO_HCTR2=m -CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_CRC32_S390=y -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m -CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SHA3_256_S390=m -CONFIG_CRYPTO_SHA3_512_S390=m -CONFIG_CRYPTO_SM3_GENERIC=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_TI=m -CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m -CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_SEED=m -CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_CFB=m +CONFIG_CRYPTO_HCTR2=m +CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_OFB=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_AEGIS128=m +CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_GCM=y +CONFIG_CRYPTO_SEQIV=y +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3_GENERIC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m @@ -764,6 +754,16 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y +CONFIG_CRYPTO_CRC32_S390=y +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m +CONFIG_CRYPTO_SHA3_256_S390=m +CONFIG_CRYPTO_SHA3_512_S390=m +CONFIG_CRYPTO_GHASH_S390=m +CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_DES_S390=m +CONFIG_CRYPTO_CHACHA_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@ -781,7 +781,6 @@ CONFIG_CMA_SIZE_MBYTES=0 CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO_DWARF4=y -CONFIG_DEBUG_INFO_BTF=y CONFIG_GDB_SCRIPTS=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/s390/configs/kasan.config b/arch/s390/configs/kasan.config new file mode 100644 index 000000000000..700a8b25c3ff --- /dev/null +++ b/arch/s390/configs/kasan.config @@ -0,0 +1,3 @@ +CONFIG_KASAN=y +CONFIG_KASAN_INLINE=y +CONFIG_KASAN_VMALLOC=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index a5576b8d4081..5fe9948be644 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -74,7 +74,6 @@ CONFIG_PRINTK_TIME=y # CONFIG_SYMBOLIC_ERRNAME is not set CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF4=y -CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_FS=y CONFIG_PANIC_ON_OOPS=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index f1cb9391190d..11e901286414 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -763,6 +763,7 @@ static inline int pmd_dirty(pmd_t pmd) return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0; diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 87be3e855bf7..c907f747d2a0 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -199,7 +199,16 @@ unsigned long __get_wchan(struct task_struct *p); /* Has task runtime instrumentation enabled ? */ #define is_ri_task(tsk) (!!(tsk)->thread.ri_cb) -register unsigned long current_stack_pointer asm("r15"); +/* avoid using global register due to gcc bug in versions < 8.4 */ +#define current_stack_pointer (__current_stack_pointer()) + +static __always_inline unsigned long __current_stack_pointer(void) +{ + unsigned long sp; + + asm volatile("lgr %0,15" : "=d" (sp)); + return sp; +} static __always_inline unsigned short stap(void) { diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index dd74fe664ed1..e4ef67e4da0a 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -46,7 +46,7 @@ struct save_area { u64 fprs[16]; u32 fpc; u32 prefix; - u64 todpreg; + u32 todpreg; u64 timer; u64 todcmp; u64 vxrs_low[16]; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 45d4b8182b07..bc491a73815c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1207,6 +1207,8 @@ static int kvm_s390_vm_get_migration(struct kvm *kvm, return 0; } +static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); + static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) { struct kvm_s390_vm_tod_clock gtod; @@ -1216,7 +1218,7 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) return -EINVAL; - kvm_s390_set_tod_clock(kvm, >od); + __kvm_s390_set_tod_clock(kvm, >od); VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", gtod.epoch_idx, gtod.tod); @@ -1247,7 +1249,7 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) sizeof(gtod.tod))) return -EFAULT; - kvm_s390_set_tod_clock(kvm, >od); + __kvm_s390_set_tod_clock(kvm, >od); VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); return 0; } @@ -1259,6 +1261,16 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) if (attr->flags) return -EINVAL; + mutex_lock(&kvm->lock); + /* + * For protected guests, the TOD is managed by the ultravisor, so trying + * to change it will never bring the expected results. + */ + if (kvm_s390_pv_is_protected(kvm)) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + switch (attr->attr) { case KVM_S390_VM_TOD_EXT: ret = kvm_s390_set_tod_ext(kvm, attr); @@ -1273,6 +1285,9 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) ret = -ENXIO; break; } + +out_unlock: + mutex_unlock(&kvm->lock); return ret; } @@ -4377,13 +4392,6 @@ static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_t preempt_enable(); } -void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) -{ - mutex_lock(&kvm->lock); - __kvm_s390_set_tod_clock(kvm, gtod); - mutex_unlock(&kvm->lock); -} - int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) { if (!mutex_trylock(&kvm->lock)) diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index f6fd668f887e..4755492dfabc 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -363,7 +363,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index c50c1645c0ae..ded1af2ddae9 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -126,7 +126,7 @@ int kvm_s390_pci_aen_init(u8 nisc) return -EPERM; mutex_lock(&aift->aift_lock); - aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev), + aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev *), GFP_KERNEL); if (!aift->kzdev) { rc = -ENOMEM; diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 94138f8f0c1c..ace2541ababd 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -546,8 +546,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI)) scb_s->eca |= scb_o->eca & ECA_CEI; /* Epoch Extension */ - if (test_kvm_facility(vcpu->kvm, 139)) + if (test_kvm_facility(vcpu->kvm, 139)) { scb_s->ecd |= scb_o->ecd & ECD_MEF; + scb_s->epdx = scb_o->epdx; + } /* etoken */ if (test_kvm_facility(vcpu->kvm, 156)) diff --git a/arch/sh/configs/rsk7201_defconfig b/arch/sh/configs/rsk7201_defconfig index 619c18699459..376e95fa77bc 100644 --- a/arch/sh/configs/rsk7201_defconfig +++ b/arch/sh/configs/rsk7201_defconfig @@ -10,7 +10,8 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_BLK_DEV_INITRD=y # CONFIG_AIO is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y CONFIG_PROFILING=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/sh/configs/rsk7203_defconfig b/arch/sh/configs/rsk7203_defconfig index d00fafc021e1..1d5fd67a3949 100644 --- a/arch/sh/configs/rsk7203_defconfig +++ b/arch/sh/configs/rsk7203_defconfig @@ -11,7 +11,8 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_KALLSYMS_ALL=y -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y CONFIG_PROFILING=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig index 122216123e63..78e0e7be57ee 100644 --- a/arch/sh/configs/se7206_defconfig +++ b/arch/sh/configs/se7206_defconfig @@ -21,7 +21,8 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_KALLSYMS_ALL=y # CONFIG_ELF_CORE is not set # CONFIG_COMPAT_BRK is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y CONFIG_PROFILING=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/sh/configs/shmin_defconfig b/arch/sh/configs/shmin_defconfig index c0b6f40d01cc..e078b193a78a 100644 --- a/arch/sh/configs/shmin_defconfig +++ b/arch/sh/configs/shmin_defconfig @@ -9,7 +9,8 @@ CONFIG_LOG_BUF_SHIFT=14 # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SHMEM is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y # CONFIG_BLK_DEV_BSG is not set CONFIG_CPU_SUBTYPE_SH7706=y CONFIG_MEMORY_START=0x0c000000 diff --git a/arch/sh/configs/shx3_defconfig b/arch/sh/configs/shx3_defconfig index 32ec6eb1eabc..aa353dff7f19 100644 --- a/arch/sh/configs/shx3_defconfig +++ b/arch/sh/configs/shx3_defconfig @@ -20,7 +20,8 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_KALLSYMS_ALL=y -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y CONFIG_PROFILING=y CONFIG_KPROBES=y CONFIG_MODULES=y diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index a779418ceba9..3bc9736bddb1 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -693,6 +693,7 @@ static inline unsigned long pmd_dirty(pmd_t pmd) return pte_dirty(pte); } +#define pmd_young pmd_young static inline unsigned long pmd_young(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c index 0224fcb36e22..427dd5a61a38 100644 --- a/arch/um/kernel/kmsg_dump.c +++ b/arch/um/kernel/kmsg_dump.c @@ -16,20 +16,26 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper, struct console *con; unsigned long flags; size_t len = 0; + int cookie; - /* only dump kmsg when no console is available */ - if (!console_trylock()) - return; + /* + * If no consoles are available to output crash information, dump + * the kmsg buffer to stdout. + */ - for_each_console(con) { - if(strcmp(con->name, "tty") == 0 && - (con->flags & (CON_ENABLED | CON_CONSDEV)) != 0) { + cookie = console_srcu_read_lock(); + for_each_console_srcu(con) { + /* + * The ttynull console and disabled consoles are ignored + * since they cannot output. All other consoles are + * expected to output the crash information. + */ + if (strcmp(con->name, "ttynull") != 0 && + (console_srcu_read_flags(con) & CON_ENABLED)) { break; } } - - console_unlock(); - + console_srcu_read_unlock(cookie); if (con) return; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 67745ceab0db..7d11f718ff0c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -81,6 +81,7 @@ config X86 select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_MEMBARRIER_SYNC_CORE + select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_PTE_DEVMAP if X86_64 diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 9860ca5979f8..9e38ffaadb5d 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -83,7 +83,7 @@ cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \ $(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE $(call if_changed,image) - @$(kecho) 'Kernel: $@ is ready' ' (#'`cat .version`')' + @$(kecho) 'Kernel: $@ is ready' ' (#'$(or $(KBUILD_BUILD_VERSION),`cat .version`)')' OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 311eae30e089..6976416b2c9f 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -210,11 +210,10 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, pgprot_decrypted(vma->vm_page_prot)); } } else if (sym_offset == image->sym_hvclock_page) { - struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page(); + pfn = hv_get_tsc_pfn(); - if (tsc_pg && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK)) - return vmf_insert_pfn(vma, vmf->address, - virt_to_phys(tsc_pg) >> PAGE_SHIFT); + if (pfn && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK)) + return vmf_insert_pfn(vma, vmf->address, pfn); } else if (sym_offset == image->sym_timens_page) { struct page *timens_page = find_timens_vvar_page(vma); diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 8b70237c33f7..d6f3703e4119 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -861,8 +861,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) pmu_enabled = cpuc->enabled; cpuc->enabled = 0; - /* stop everything (includes BRS) */ - amd_pmu_disable_all(); + amd_brs_disable_all(); /* Drain BRS is in use (could be inactive) */ if (cpuc->lbr_users) @@ -873,7 +872,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) cpuc->enabled = pmu_enabled; if (pmu_enabled) - amd_pmu_enable_all(0); + amd_brs_enable_all(); return amd_pmu_adjust_nmi_window(handled); } diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index d568afc705d2..83f15fe411b3 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -553,6 +553,7 @@ static void uncore_clean_online(void) hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) { hlist_del(&uncore->node); + kfree(uncore->events); kfree(uncore); } } diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c index 03bbcc2fa2ff..35936188db01 100644 --- a/arch/x86/events/intel/p4.c +++ b/arch/x86/events/intel/p4.c @@ -24,7 +24,7 @@ struct p4_event_bind { unsigned int escr_msr[2]; /* ESCR MSR for this event */ unsigned int escr_emask; /* valid ESCR EventMask bits */ unsigned int shared; /* event is shared across threads */ - char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on absence */ + signed char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on absence */ }; struct p4_pebs_bind { diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 82ef87e9a897..42a55794004a 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1263,6 +1263,15 @@ static int pt_buffer_try_single(struct pt_buffer *buf, int nr_pages) if (1 << order != nr_pages) goto out; + /* + * Some processors cannot always support single range for more than + * 4KB - refer errata TGL052, ADL037 and RPL017. Future processors might + * also be affected, so for now rather than trying to keep track of + * which ones, just disable it for all. + */ + if (nr_pages > 1) + goto out; + buf->single = true; buf->nr_pages = nr_pages; ret = 0; diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 29774126e931..41ef036ebb7b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -77,7 +77,7 @@ static int hyperv_init_ghcb(void) static int hv_cpu_init(unsigned int cpu) { union hv_vp_assist_msr_contents msr = { 0 }; - struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; + struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu]; int ret; ret = hv_common_cpu_init(cpu); @@ -87,34 +87,32 @@ static int hv_cpu_init(unsigned int cpu) if (!hv_vp_assist_page) return 0; - if (!*hvp) { - if (hv_root_partition) { - /* - * For root partition we get the hypervisor provided VP assist - * page, instead of allocating a new page. - */ - rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); - *hvp = memremap(msr.pfn << - HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT, - PAGE_SIZE, MEMREMAP_WB); - } else { - /* - * The VP assist page is an "overlay" page (see Hyper-V TLFS's - * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed - * out to make sure we always write the EOI MSR in - * hv_apic_eoi_write() *after* the EOI optimization is disabled - * in hv_cpu_die(), otherwise a CPU may not be stopped in the - * case of CPU offlining and the VM will hang. - */ + if (hv_root_partition) { + /* + * For root partition we get the hypervisor provided VP assist + * page, instead of allocating a new page. + */ + rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); + *hvp = memremap(msr.pfn << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT, + PAGE_SIZE, MEMREMAP_WB); + } else { + /* + * The VP assist page is an "overlay" page (see Hyper-V TLFS's + * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed + * out to make sure we always write the EOI MSR in + * hv_apic_eoi_write() *after* the EOI optimization is disabled + * in hv_cpu_die(), otherwise a CPU may not be stopped in the + * case of CPU offlining and the VM will hang. + */ + if (!*hvp) *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO); - if (*hvp) - msr.pfn = vmalloc_to_pfn(*hvp); - } - WARN_ON(!(*hvp)); - if (*hvp) { - msr.enable = 1; - wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); - } + if (*hvp) + msr.pfn = vmalloc_to_pfn(*hvp); + + } + if (!WARN_ON(!(*hvp))) { + msr.enable = 1; + wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); } return hyperv_init_ghcb(); @@ -444,7 +442,7 @@ void __init hyperv_init(void) if (hv_root_partition) { struct page *pg; - void *src, *dst; + void *src; /* * For the root partition, the hypervisor will set up its @@ -459,13 +457,13 @@ void __init hyperv_init(void) wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); pg = vmalloc_to_page(hv_hypercall_pg); - dst = kmap_local_page(pg); src = memremap(hypercall_msr.guest_physical_address << PAGE_SHIFT, PAGE_SIZE, MEMREMAP_WB); - BUG_ON(!(src && dst)); - memcpy(dst, src, HV_HYP_PAGE_SIZE); + BUG_ON(!src); + memcpy_to_page(pg, 0, src, HV_HYP_PAGE_SIZE); memunmap(src); - kunmap_local(dst); + + hv_remap_tsc_clocksource(); } else { hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); @@ -537,8 +535,7 @@ common_free: void hyperv_cleanup(void) { union hv_x64_msr_hypercall_contents hypercall_msr; - - unregister_syscore_ops(&hv_syscore_ops); + union hv_reference_tsc_msr tsc_msr; /* Reset our OS id */ wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); @@ -552,12 +549,14 @@ void hyperv_cleanup(void) hv_hypercall_pg = NULL; /* Reset the hypercall page */ - hypercall_msr.as_uint64 = 0; - wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + hypercall_msr.as_uint64 = hv_get_register(HV_X64_MSR_HYPERCALL); + hypercall_msr.enable = 0; + hv_set_register(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); /* Reset the TSC page */ - hypercall_msr.as_uint64 = 0; - wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64); + tsc_msr.as_uint64 = hv_get_register(HV_X64_MSR_REFERENCE_TSC); + tsc_msr.enable = 0; + hv_set_register(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); } void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b71f4f2ecdd5..b2da7cb64b31 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -305,6 +305,9 @@ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ + +#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 3089ec352743..6d9368ea3701 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -374,11 +374,20 @@ struct hv_nested_enlightenments_control { struct hv_vp_assist_page { __u32 apic_assist; __u32 reserved1; - __u64 vtl_control[3]; + __u32 vtl_entry_reason; + __u32 vtl_reserved; + __u64 vtl_ret_x64rax; + __u64 vtl_ret_x64rcx; struct hv_nested_enlightenments_control nested_control; __u8 enlighten_vmentry; __u8 reserved2[7]; __u64 current_nested_vmcs; + __u8 synthetic_time_unhalted_timer_expired; + __u8 reserved3[7]; + __u8 virtualization_fault_information[40]; + __u8 reserved4[8]; + __u8 intercept_message[256]; + __u8 vtl_ret_actions[256]; } __packed; struct hv_enlightened_vmcs { diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7551b6f9c31c..f05ebaa26f0f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -501,7 +501,12 @@ struct kvm_pmc { bool intr; }; +/* More counters may conflict with other existing Architectural MSRs */ +#define KVM_INTEL_PMC_MAX_GENERIC 8 +#define MSR_ARCH_PERFMON_PERFCTR_MAX (MSR_ARCH_PERFMON_PERFCTR0 + KVM_INTEL_PMC_MAX_GENERIC - 1) +#define MSR_ARCH_PERFMON_EVENTSEL_MAX (MSR_ARCH_PERFMON_EVENTSEL0 + KVM_INTEL_PMC_MAX_GENERIC - 1) #define KVM_PMC_MAX_FIXED 3 +#define KVM_AMD_PMC_MAX_GENERIC 6 struct kvm_pmu { unsigned nr_arch_gp_counters; unsigned nr_arch_fixed_counters; @@ -516,7 +521,7 @@ struct kvm_pmu { u64 reserved_bits; u64 raw_event_mask; u8 version; - struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; + struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC]; struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED]; struct irq_work irq_work; DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 10ac52705892..4a2af82553e4 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -535,6 +535,11 @@ #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 + +#define MSR_AMD64_DE_CFG 0xc0011029 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT) + #define MSR_AMD64_BU_CFG2 0xc001102a #define MSR_AMD64_IBSFETCHCTL 0xc0011030 #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 @@ -640,9 +645,6 @@ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL #define FAM10H_MMIO_CONF_BASE_SHIFT 20 #define MSR_FAM10H_NODE_ID 0xc001100c -#define MSR_F10H_DECFG 0xc0011029 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index c936ce9f0c47..dfdb103ae4f6 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -321,7 +321,7 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; DECLARE_PER_CPU(u64, x86_spec_ctrl_current); -extern void write_spec_ctrl_current(u64 val, bool force); +extern void update_spec_ctrl_cond(u64 val); extern u64 spec_ctrl_current(void); /* diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5059799bebe3..286a71810f9e 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -139,6 +139,7 @@ static inline int pmd_dirty(pmd_t pmd) return pmd_flags(pmd) & _PAGE_DIRTY; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_ACCESSED; @@ -1438,6 +1439,14 @@ static inline bool arch_has_hw_pte_young(void) return true; } +#ifdef CONFIG_XEN_PV +#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young +static inline bool arch_has_hw_nonleaf_pmd_young(void) +{ + return !cpu_feature_enabled(X86_FEATURE_XENPV); +} +#endif + #ifdef CONFIG_PAGE_TABLE_CHECK static inline bool pte_user_accessible_page(pte_t pte) { diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 60ece592b220..dbb38a6b4dfb 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -37,7 +37,7 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text"); * rsi = lockval (second argument) * rdx = internal variable (set to 0) */ -asm (".pushsection .spinlock.text;" +asm (".pushsection .spinlock.text, \"ax\";" ".globl " PV_UNLOCK ";" ".type " PV_UNLOCK ", @function;" ".align 4,0x90;" diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 5393babc0598..cb0386fc4dc3 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -13,7 +13,7 @@ * Takes the guest view of SPEC_CTRL MSR as a parameter and also * the guest's version of VIRT_SPEC_CTRL, if emulated. */ -extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest); +extern void x86_virt_spec_ctrl(u64 guest_virt_spec_ctrl, bool guest); /** * x86_spec_ctrl_set_guest - Set speculation control registers for the guest @@ -24,9 +24,9 @@ extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bo * Avoids writing to the MSR if the content/bits are the same */ static inline -void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +void x86_spec_ctrl_set_guest(u64 guest_virt_spec_ctrl) { - x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true); + x86_virt_spec_ctrl(guest_virt_spec_ctrl, true); } /** @@ -38,9 +38,9 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) * Avoids writing to the MSR if the content/bits are the same */ static inline -void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +void x86_spec_ctrl_restore_host(u64 guest_virt_spec_ctrl) { - x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false); + x86_virt_spec_ctrl(guest_virt_spec_ctrl, false); } /* AMD specific Speculative Store Bypass MSR data */ diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index cb50589a7102..437308004ef2 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -19,7 +19,6 @@ #include <asm/suspend.h> #include <asm/tlbflush.h> #include <asm/tdx.h> -#include "../kvm/vmx/vmx.h" #ifdef CONFIG_XEN #include <xen/interface/xen.h> @@ -108,9 +107,4 @@ static void __used common(void) OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); - - if (IS_ENABLED(CONFIG_KVM_INTEL)) { - BLANK(); - OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl); - } } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 860b60273df3..c75d75b9f11a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -770,8 +770,6 @@ static void init_amd_gh(struct cpuinfo_x86 *c) set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); } -#define MSR_AMD64_DE_CFG 0xC0011029 - static void init_amd_ln(struct cpuinfo_x86 *c) { /* @@ -965,8 +963,8 @@ static void init_amd(struct cpuinfo_x86 *c) * msr_set_bit() uses the safe accessors, too, even if the MSR * is not present. */ - msr_set_bit(MSR_F10H_DECFG, - MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); + msr_set_bit(MSR_AMD64_DE_CFG, + MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT); /* A serializing LFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index da7c361f47e0..6daf84229548 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -60,11 +60,18 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); static DEFINE_MUTEX(spec_ctrl_mutex); +/* Update SPEC_CTRL MSR and its cached copy unconditionally */ +static void update_spec_ctrl(u64 val) +{ + this_cpu_write(x86_spec_ctrl_current, val); + wrmsrl(MSR_IA32_SPEC_CTRL, val); +} + /* * Keep track of the SPEC_CTRL MSR value for the current task, which may differ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). */ -void write_spec_ctrl_current(u64 val, bool force) +void update_spec_ctrl_cond(u64 val) { if (this_cpu_read(x86_spec_ctrl_current) == val) return; @@ -75,7 +82,7 @@ void write_spec_ctrl_current(u64 val, bool force) * When KERNEL_IBRS this MSR is written on return-to-user, unless * forced the update can be delayed until that time. */ - if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) + if (!cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) wrmsrl(MSR_IA32_SPEC_CTRL, val); } @@ -196,22 +203,15 @@ void __init check_bugs(void) } /* - * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is - * done in vmenter.S. + * NOTE: This function is *only* called for SVM, since Intel uses + * MSR_IA32_SPEC_CTRL for SSBD. */ void -x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) +x86_virt_spec_ctrl(u64 guest_virt_spec_ctrl, bool setguest) { - u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current(); + u64 guestval, hostval; struct thread_info *ti = current_thread_info(); - if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { - if (hostval != guestval) { - msrval = setguest ? guestval : hostval; - wrmsrl(MSR_IA32_SPEC_CTRL, msrval); - } - } - /* * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported. @@ -1335,7 +1335,7 @@ static void __init spec_ctrl_disable_kernel_rrsba(void) if (ia32_cap & ARCH_CAP_RRSBA) { x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } } @@ -1457,7 +1457,7 @@ static void __init spectre_v2_select_mitigation(void) if (spectre_v2_in_ibrs_mode(mode)) { x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } switch (mode) { @@ -1571,7 +1571,7 @@ static void __init spectre_v2_select_mitigation(void) static void update_stibp_msr(void * __unused) { u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); - write_spec_ctrl_current(val, true); + update_spec_ctrl(val); } /* Update x86_spec_ctrl_base in case SMT state changed. */ @@ -1804,7 +1804,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } } @@ -2055,7 +2055,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 21fd425088fe..c393b8773ace 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -326,8 +326,8 @@ static void init_hygon(struct cpuinfo_x86 *c) * msr_set_bit() uses the safe accessors, too, even if the MSR * is not present. */ - msr_set_bit(MSR_F10H_DECFG, - MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); + msr_set_bit(MSR_AMD64_DE_CFG, + MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT); /* A serializing LFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 831613959a92..46668e255421 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -475,6 +475,12 @@ static bool __init ms_hyperv_x2apic_available(void) * (logically) generates MSIs directly to the system APIC irq domain. * There is no HPET, and PCI MSI/MSI-X interrupts are remapped by the * pci-hyperv host bridge. + * + * Note: for a Hyper-V root partition, this will always return false. + * The hypervisor doesn't expose these HYPERV_CPUID_VIRT_STACK_* cpuids by + * default, they are implemented as intercepts by the Windows Hyper-V stack. + * Even a nested root partition (L2 root) will not get them because the + * nested (L1) hypervisor filters them out. */ static bool __init ms_hyperv_msi_ext_dest_id(void) { diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index ebe79d60619f..da8b8ea6b063 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -356,6 +356,9 @@ static int sgx_validate_offset_length(struct sgx_encl *encl, if (!length || !IS_ALIGNED(length, PAGE_SIZE)) return -EINVAL; + if (offset + length < offset) + return -EINVAL; + if (offset + length - PAGE_SIZE >= encl->size) return -EINVAL; diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index ec7bbac3a9f2..8009c8346d8f 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -58,24 +58,6 @@ static void tsx_enable(void) wrmsrl(MSR_IA32_TSX_CTRL, tsx); } -static bool tsx_ctrl_is_supported(void) -{ - u64 ia32_cap = x86_read_arch_cap_msr(); - - /* - * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this - * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. - * - * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a - * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES - * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get - * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, - * tsx= cmdline requests will do nothing on CPUs without - * MSR_IA32_TSX_CTRL support. - */ - return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR); -} - static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) { if (boot_cpu_has_bug(X86_BUG_TAA)) @@ -135,7 +117,7 @@ static void tsx_clear_cpuid(void) rdmsrl(MSR_TSX_FORCE_ABORT, msr); msr |= MSR_TFA_TSX_CPUID_CLEAR; wrmsrl(MSR_TSX_FORCE_ABORT, msr); - } else if (tsx_ctrl_is_supported()) { + } else if (cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL)) { rdmsrl(MSR_IA32_TSX_CTRL, msr); msr |= TSX_CTRL_CPUID_CLEAR; wrmsrl(MSR_IA32_TSX_CTRL, msr); @@ -158,7 +140,8 @@ static void tsx_dev_mode_disable(void) u64 mcu_opt_ctrl; /* Check if RTM_ALLOW exists */ - if (!boot_cpu_has_bug(X86_BUG_TAA) || !tsx_ctrl_is_supported() || + if (!boot_cpu_has_bug(X86_BUG_TAA) || + !cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL) || !cpu_feature_enabled(X86_FEATURE_SRBDS_CTRL)) return; @@ -191,7 +174,20 @@ void __init tsx_init(void) return; } - if (!tsx_ctrl_is_supported()) { + /* + * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this + * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. + * + * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a + * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES + * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get + * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, + * tsx= cmdline requests will do nothing on CPUs without + * MSR_IA32_TSX_CTRL support. + */ + if (x86_read_arch_cap_msr() & ARCH_CAP_TSX_CTRL_MSR) { + setup_force_cpu_cap(X86_FEATURE_MSR_TSX_CTRL); + } else { tsx_ctrl_state = TSX_CTRL_NOT_SUPPORTED; return; } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 3b28c5b25e12..d00db56a8868 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -605,9 +605,9 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal) if (test_thread_flag(TIF_NEED_FPU_LOAD)) fpregs_restore_userregs(); save_fpregs_to_fpstate(dst_fpu); + fpregs_unlock(); if (!(clone_flags & CLONE_THREAD)) fpu_inherit_perms(dst_fpu); - fpregs_unlock(); /* * Children never inherit PASID state. diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c21b7347a26d..e436c9c1ef3b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } if (updmsr) - write_spec_ctrl_current(msr, false); + update_spec_ctrl_cond(msr); } static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 178015a820f0..d3fdec706f1d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -15,6 +15,7 @@ #include <linux/context_tracking.h> #include <linux/interrupt.h> #include <linux/kallsyms.h> +#include <linux/kmsan.h> #include <linux/spinlock.h> #include <linux/kprobes.h> #include <linux/uaccess.h> @@ -301,6 +302,12 @@ static noinstr bool handle_bug(struct pt_regs *regs) { bool handled = false; + /* + * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug() + * is a rare case that uses @regs without passing them to + * irqentry_enter(). + */ + kmsan_unpoison_entry_regs(regs); if (!is_valid_bugaddr(regs->ip)) return handled; diff --git a/arch/x86/kvm/.gitignore b/arch/x86/kvm/.gitignore new file mode 100644 index 000000000000..615d6ff35c00 --- /dev/null +++ b/arch/x86/kvm/.gitignore @@ -0,0 +1,2 @@ +/kvm-asm-offsets.s +/kvm-asm-offsets.h diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 30f244b64523..f453a0f96e24 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -34,3 +34,15 @@ endif obj-$(CONFIG_KVM) += kvm.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o obj-$(CONFIG_KVM_AMD) += kvm-amd.o + +AFLAGS_svm/vmenter.o := -iquote $(obj) +$(obj)/svm/vmenter.o: $(obj)/kvm-asm-offsets.h + +AFLAGS_vmx/vmenter.o := -iquote $(obj) +$(obj)/vmx/vmenter.o: $(obj)/kvm-asm-offsets.h + +$(obj)/kvm-asm-offsets.h: $(obj)/kvm-asm-offsets.s FORCE + $(call filechk,offsets,__KVM_ASM_OFFSETS_H__) + +targets += kvm-asm-offsets.s +clean-files += kvm-asm-offsets.h diff --git a/arch/x86/kvm/kvm-asm-offsets.c b/arch/x86/kvm/kvm-asm-offsets.c new file mode 100644 index 000000000000..24a710d37323 --- /dev/null +++ b/arch/x86/kvm/kvm-asm-offsets.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Generate definitions needed by assembly language modules. + * This code generates raw asm output which is post-processed to extract + * and format the required data. + */ +#define COMPILE_OFFSETS + +#include <linux/kbuild.h> +#include "vmx/vmx.h" +#include "svm/svm.h" + +static void __used common(void) +{ + if (IS_ENABLED(CONFIG_KVM_AMD)) { + BLANK(); + OFFSET(SVM_vcpu_arch_regs, vcpu_svm, vcpu.arch.regs); + OFFSET(SVM_current_vmcb, vcpu_svm, current_vmcb); + OFFSET(SVM_spec_ctrl, vcpu_svm, spec_ctrl); + OFFSET(SVM_vmcb01, vcpu_svm, vmcb01); + OFFSET(KVM_VMCB_pa, kvm_vmcb_info, pa); + OFFSET(SD_save_area_pa, svm_cpu_data, save_area_pa); + } + + if (IS_ENABLED(CONFIG_KVM_INTEL)) { + BLANK(); + OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl); + } +} diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6f81539061d6..b6f96d47e596 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2443,6 +2443,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, { bool list_unstable, zapped_root = false; + lockdep_assert_held_write(&kvm->mmu_lock); trace_kvm_mmu_prepare_zap_page(sp); ++kvm->stat.mmu_shadow_zapped; *nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list); @@ -4262,14 +4263,14 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault if (is_page_fault_stale(vcpu, fault, mmu_seq)) goto out_unlock; - r = make_mmu_pages_available(vcpu); - if (r) - goto out_unlock; - - if (is_tdp_mmu_fault) + if (is_tdp_mmu_fault) { r = kvm_tdp_mmu_map(vcpu, fault); - else + } else { + r = make_mmu_pages_available(vcpu); + if (r) + goto out_unlock; r = __direct_map(vcpu, fault); + } out_unlock: if (is_tdp_mmu_fault) @@ -6056,7 +6057,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) write_lock(&kvm->mmu_lock); - kvm_mmu_invalidate_begin(kvm, gfn_start, gfn_end); + kvm_mmu_invalidate_begin(kvm, 0, -1ul); flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end); @@ -6070,7 +6071,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end - gfn_start); - kvm_mmu_invalidate_end(kvm, gfn_start, gfn_end); + kvm_mmu_invalidate_end(kvm, 0, -1ul); write_unlock(&kvm->mmu_lock); } diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index d9b9a0f0db17..de1fd7369736 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -56,7 +56,7 @@ static const struct x86_cpu_id vmx_icl_pebs_cpu[] = { * code. Each pmc, stored in kvm_pmc.idx field, is unique across * all perf counters (both gp and fixed). The mapping relationship * between pmc and perf counters is as the following: - * * Intel: [0 .. INTEL_PMC_MAX_GENERIC-1] <=> gp counters + * * Intel: [0 .. KVM_INTEL_PMC_MAX_GENERIC-1] <=> gp counters * [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed * * AMD: [0 .. AMD64_NUM_COUNTERS-1] and, for families 15H * and later, [0 .. AMD64_NUM_COUNTERS_CORE-1] <=> gp counters diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 4c620999d230..995bc0f90759 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1091,6 +1091,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm) static void nested_svm_triple_fault(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); + + if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SHUTDOWN)) + return; + + kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu); nested_svm_simple_vmexit(to_svm(vcpu), SVM_EXIT_SHUTDOWN); } @@ -1125,6 +1131,9 @@ void svm_free_nested(struct vcpu_svm *svm) if (!svm->nested.initialized) return; + if (WARN_ON_ONCE(svm->vmcb != svm->vmcb01.ptr)) + svm_switch_vmcb(svm, &svm->vmcb01); + svm_vcpu_free_msrpm(svm->nested.msrpm); svm->nested.msrpm = NULL; @@ -1143,9 +1152,6 @@ void svm_free_nested(struct vcpu_svm *svm) svm->nested.initialized = false; } -/* - * Forcibly leave nested mode in order to be able to reset the VCPU later on. - */ void svm_leave_nested(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index b68956299fa8..9d65cd095691 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -192,9 +192,10 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu) struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); int i; - BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC); + BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > AMD64_NUM_COUNTERS_CORE); + BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > INTEL_PMC_MAX_GENERIC); - for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) { + for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC ; i++) { pmu->gp_counters[i].type = KVM_PMC_GP; pmu->gp_counters[i].vcpu = vcpu; pmu->gp_counters[i].idx = i; @@ -207,7 +208,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu) struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); int i; - for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) { + for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC; i++) { struct kvm_pmc *pmc = &pmu->gp_counters[i]; pmc_stop_counter(pmc); diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 28064060413a..efaaef2b7ae1 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -196,7 +196,7 @@ static void sev_asid_free(struct kvm_sev_info *sev) __set_bit(sev->asid, sev_reclaim_asid_bitmap); for_each_possible_cpu(cpu) { - sd = per_cpu(svm_data, cpu); + sd = per_cpu_ptr(&svm_data, cpu); sd->sev_vmcbs[sev->asid] = NULL; } @@ -605,7 +605,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) save->dr6 = svm->vcpu.arch.dr6; pr_debug("Virtual Machine Save Area (VMSA):\n"); - print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false); + print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false); return 0; } @@ -2600,7 +2600,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm) void pre_sev_run(struct vcpu_svm *svm, int cpu) { - struct svm_cpu_data *sd = per_cpu(svm_data, cpu); + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); int asid = sev_get_asid(svm->vcpu.kvm); /* Assign the asid allocated with this SEV guest */ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 58f0077d9357..ce362e88a567 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -245,7 +245,7 @@ struct kvm_ldttss_desc { u32 zero1; } __attribute__((packed)); -DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); +DEFINE_PER_CPU(struct svm_cpu_data, svm_data); /* * Only MSR_TSC_AUX is switched via the user return hook. EFER is switched via @@ -346,12 +346,6 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) return 0; } -static int is_external_interrupt(u32 info) -{ - info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; - return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); -} - static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -581,12 +575,7 @@ static int svm_hardware_enable(void) pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me); return -EINVAL; } - sd = per_cpu(svm_data, me); - if (!sd) { - pr_err("%s: svm_data is NULL on %d\n", __func__, me); - return -EINVAL; - } - + sd = per_cpu_ptr(&svm_data, me); sd->asid_generation = 1; sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; sd->next_asid = sd->max_asid + 1; @@ -597,7 +586,7 @@ static int svm_hardware_enable(void) wrmsrl(MSR_EFER, efer | EFER_SVME); - wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area)); + wrmsrl(MSR_VM_HSAVE_PA, sd->save_area_pa); if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { /* @@ -646,42 +635,37 @@ static int svm_hardware_enable(void) static void svm_cpu_uninit(int cpu) { - struct svm_cpu_data *sd = per_cpu(svm_data, cpu); + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); - if (!sd) + if (!sd->save_area) return; - per_cpu(svm_data, cpu) = NULL; kfree(sd->sev_vmcbs); __free_page(sd->save_area); - kfree(sd); + sd->save_area_pa = 0; + sd->save_area = NULL; } static int svm_cpu_init(int cpu) { - struct svm_cpu_data *sd; + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); int ret = -ENOMEM; - sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); - if (!sd) - return ret; - sd->cpu = cpu; + memset(sd, 0, sizeof(struct svm_cpu_data)); sd->save_area = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!sd->save_area) - goto free_cpu_data; + return ret; ret = sev_cpu_init(sd); if (ret) goto free_save_area; - per_cpu(svm_data, cpu) = sd; - + sd->save_area_pa = __sme_page_pa(sd->save_area); return 0; free_save_area: __free_page(sd->save_area); -free_cpu_data: - kfree(sd); + sd->save_area = NULL; return ret; } @@ -730,6 +714,15 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) u32 offset; u32 *msrpm; + /* + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: to_svm(vcpu)->msrpm; @@ -1425,7 +1418,7 @@ static void svm_clear_current_vmcb(struct vmcb *vmcb) int i; for_each_online_cpu(i) - cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL); + cmpxchg(per_cpu_ptr(&svm_data.current_vmcb, i), vmcb, NULL); } static void svm_vcpu_free(struct kvm_vcpu *vcpu) @@ -1439,6 +1432,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu) */ svm_clear_current_vmcb(svm->vmcb); + svm_leave_nested(vcpu); svm_free_nested(svm); sev_free_vcpu(vcpu); @@ -1450,7 +1444,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu) static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); if (sev_es_guest(vcpu->kvm)) sev_es_unmap_ghcb(svm); @@ -1462,7 +1456,7 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) * Save additional host state that will be restored on VMEXIT (sev-es) * or subsequent vmload of host save area. */ - vmsave(__sme_page_pa(sd->save_area)); + vmsave(sd->save_area_pa); if (sev_es_guest(vcpu->kvm)) { struct sev_es_save_area *hostsa; hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400); @@ -1487,7 +1481,7 @@ static void svm_prepare_host_switch(struct kvm_vcpu *vcpu) static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct svm_cpu_data *sd = per_cpu(svm_data, cpu); + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); if (sd->current_vmcb != svm->vmcb) { sd->current_vmcb = svm->vmcb; @@ -2710,9 +2704,9 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr) msr->data = 0; switch (msr->index) { - case MSR_F10H_DECFG: - if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) - msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE; + case MSR_AMD64_DE_CFG: + if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC)) + msr->data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE; break; case MSR_IA32_PERF_CAPABILITIES: return 0; @@ -2813,7 +2807,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0x1E; } break; - case MSR_F10H_DECFG: + case MSR_AMD64_DE_CFG: msr_info->data = svm->msr_decfg; break; default: @@ -3042,7 +3036,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; - case MSR_F10H_DECFG: { + case MSR_AMD64_DE_CFG: { struct kvm_msr_entry msr_entry; msr_entry.index = msr->index; @@ -3426,15 +3420,6 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) return 0; } - if (is_external_interrupt(svm->vmcb->control.exit_int_info) && - exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && - exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && - exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) - printk(KERN_ERR "%s: unexpected exit_int_info 0x%x " - "exit_code 0x%x\n", - __func__, svm->vmcb->control.exit_int_info, - exit_code); - if (exit_fastpath != EXIT_FASTPATH_NONE) return 1; @@ -3443,7 +3428,7 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) static void reload_tss(struct kvm_vcpu *vcpu) { - struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); sd->tss_desc->type = 9; /* available 32/64-bit TSS */ load_TR_desc(); @@ -3451,7 +3436,7 @@ static void reload_tss(struct kvm_vcpu *vcpu) static void pre_svm_run(struct kvm_vcpu *vcpu) { - struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); struct vcpu_svm *svm = to_svm(vcpu); /* @@ -3911,30 +3896,16 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) return EXIT_FASTPATH_NONE; } -static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) +static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted) { struct vcpu_svm *svm = to_svm(vcpu); - unsigned long vmcb_pa = svm->current_vmcb->pa; guest_state_enter_irqoff(); - if (sev_es_guest(vcpu->kvm)) { - __svm_sev_es_vcpu_run(vmcb_pa); - } else { - struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); - - /* - * Use a single vmcb (vmcb01 because it's always valid) for - * context switching guest state via VMLOAD/VMSAVE, that way - * the state doesn't need to be copied between vmcb01 and - * vmcb02 when switching vmcbs for nested virtualization. - */ - vmload(svm->vmcb01.pa); - __svm_vcpu_run(vmcb_pa, (unsigned long *)&vcpu->arch.regs); - vmsave(svm->vmcb01.pa); - - vmload(__sme_page_pa(sd->save_area)); - } + if (sev_es_guest(vcpu->kvm)) + __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted); + else + __svm_vcpu_run(svm, spec_ctrl_intercepted); guest_state_exit_irqoff(); } @@ -3942,6 +3913,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL); trace_kvm_entry(vcpu); @@ -3998,34 +3970,15 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) * being speculatively taken. */ if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL)) - x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); - - svm_vcpu_enter_exit(vcpu); + x86_spec_ctrl_set_guest(svm->virt_spec_ctrl); - /* - * We do not use IBRS in the kernel. If this vCPU has used the - * SPEC_CTRL MSR it may have left it on; save the value and - * turn it off. This is much more efficient than blindly adding - * it to the atomic save/restore list. Especially as the former - * (Saving guest MSRs on vmexit) doesn't even exist in KVM. - * - * For non-nested case: - * If the L01 MSR bitmap does not intercept the MSR, then we need to - * save it. - * - * For nested case: - * If the L02 MSR bitmap does not intercept the MSR, then we need to - * save it. - */ - if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL) && - unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) - svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); + svm_vcpu_enter_exit(vcpu, spec_ctrl_intercepted); if (!sev_es_guest(vcpu->kvm)) reload_tss(vcpu); if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL)) - x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); + x86_spec_ctrl_restore_host(svm->virt_spec_ctrl); if (!sev_es_guest(vcpu->kvm)) { vcpu->arch.cr2 = svm->vmcb->save.cr2; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 6a7686bf6900..199a2ecef1ce 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -209,7 +209,6 @@ struct vcpu_svm { struct vmcb *vmcb; struct kvm_vmcb_info vmcb01; struct kvm_vmcb_info *current_vmcb; - struct svm_cpu_data *svm_data; u32 asid; u32 sysenter_esp_hi; u32 sysenter_eip_hi; @@ -281,8 +280,6 @@ struct vcpu_svm { }; struct svm_cpu_data { - int cpu; - u64 asid_generation; u32 max_asid; u32 next_asid; @@ -290,13 +287,15 @@ struct svm_cpu_data { struct kvm_ldttss_desc *tss_desc; struct page *save_area; + unsigned long save_area_pa; + struct vmcb *current_vmcb; /* index = sev_asid, value = vmcb pointer */ struct vmcb **sev_vmcbs; }; -DECLARE_PER_CPU(struct svm_cpu_data *, svm_data); +DECLARE_PER_CPU(struct svm_cpu_data, svm_data); void recalc_intercepts(struct vcpu_svm *svm); @@ -683,7 +682,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm); /* vmenter.S */ -void __svm_sev_es_vcpu_run(unsigned long vmcb_pa); -void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs); +void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); +void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); #endif diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h index 9430d6437c9f..36c8af87a707 100644 --- a/arch/x86/kvm/svm/svm_ops.h +++ b/arch/x86/kvm/svm/svm_ops.h @@ -61,9 +61,4 @@ static __always_inline void vmsave(unsigned long pa) svm_asm1(vmsave, "a" (pa), "memory"); } -static __always_inline void vmload(unsigned long pa) -{ - svm_asm1(vmload, "a" (pa), "memory"); -} - #endif /* __KVM_X86_SVM_OPS_H */ diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 723f8534986c..34367dc203f2 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -4,35 +4,97 @@ #include <asm/bitsperlong.h> #include <asm/kvm_vcpu_regs.h> #include <asm/nospec-branch.h> +#include "kvm-asm-offsets.h" #define WORD_SIZE (BITS_PER_LONG / 8) /* Intentionally omit RAX as it's context switched by hardware */ -#define VCPU_RCX __VCPU_REGS_RCX * WORD_SIZE -#define VCPU_RDX __VCPU_REGS_RDX * WORD_SIZE -#define VCPU_RBX __VCPU_REGS_RBX * WORD_SIZE +#define VCPU_RCX (SVM_vcpu_arch_regs + __VCPU_REGS_RCX * WORD_SIZE) +#define VCPU_RDX (SVM_vcpu_arch_regs + __VCPU_REGS_RDX * WORD_SIZE) +#define VCPU_RBX (SVM_vcpu_arch_regs + __VCPU_REGS_RBX * WORD_SIZE) /* Intentionally omit RSP as it's context switched by hardware */ -#define VCPU_RBP __VCPU_REGS_RBP * WORD_SIZE -#define VCPU_RSI __VCPU_REGS_RSI * WORD_SIZE -#define VCPU_RDI __VCPU_REGS_RDI * WORD_SIZE +#define VCPU_RBP (SVM_vcpu_arch_regs + __VCPU_REGS_RBP * WORD_SIZE) +#define VCPU_RSI (SVM_vcpu_arch_regs + __VCPU_REGS_RSI * WORD_SIZE) +#define VCPU_RDI (SVM_vcpu_arch_regs + __VCPU_REGS_RDI * WORD_SIZE) #ifdef CONFIG_X86_64 -#define VCPU_R8 __VCPU_REGS_R8 * WORD_SIZE -#define VCPU_R9 __VCPU_REGS_R9 * WORD_SIZE -#define VCPU_R10 __VCPU_REGS_R10 * WORD_SIZE -#define VCPU_R11 __VCPU_REGS_R11 * WORD_SIZE -#define VCPU_R12 __VCPU_REGS_R12 * WORD_SIZE -#define VCPU_R13 __VCPU_REGS_R13 * WORD_SIZE -#define VCPU_R14 __VCPU_REGS_R14 * WORD_SIZE -#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE +#define VCPU_R8 (SVM_vcpu_arch_regs + __VCPU_REGS_R8 * WORD_SIZE) +#define VCPU_R9 (SVM_vcpu_arch_regs + __VCPU_REGS_R9 * WORD_SIZE) +#define VCPU_R10 (SVM_vcpu_arch_regs + __VCPU_REGS_R10 * WORD_SIZE) +#define VCPU_R11 (SVM_vcpu_arch_regs + __VCPU_REGS_R11 * WORD_SIZE) +#define VCPU_R12 (SVM_vcpu_arch_regs + __VCPU_REGS_R12 * WORD_SIZE) +#define VCPU_R13 (SVM_vcpu_arch_regs + __VCPU_REGS_R13 * WORD_SIZE) +#define VCPU_R14 (SVM_vcpu_arch_regs + __VCPU_REGS_R14 * WORD_SIZE) +#define VCPU_R15 (SVM_vcpu_arch_regs + __VCPU_REGS_R15 * WORD_SIZE) #endif +#define SVM_vmcb01_pa (SVM_vmcb01 + KVM_VMCB_pa) + .section .noinstr.text, "ax" +.macro RESTORE_GUEST_SPEC_CTRL + /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */ + ALTERNATIVE_2 "", \ + "jmp 800f", X86_FEATURE_MSR_SPEC_CTRL, \ + "", X86_FEATURE_V_SPEC_CTRL +801: +.endm +.macro RESTORE_GUEST_SPEC_CTRL_BODY +800: + /* + * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the + * host's, write the MSR. This is kept out-of-line so that the common + * case does not have to jump. + * + * IMPORTANT: To avoid RSB underflow attacks and any other nastiness, + * there must not be any returns or indirect branches between this code + * and vmentry. + */ + movl SVM_spec_ctrl(%_ASM_DI), %eax + cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax + je 801b + mov $MSR_IA32_SPEC_CTRL, %ecx + xor %edx, %edx + wrmsr + jmp 801b +.endm + +.macro RESTORE_HOST_SPEC_CTRL + /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */ + ALTERNATIVE_2 "", \ + "jmp 900f", X86_FEATURE_MSR_SPEC_CTRL, \ + "", X86_FEATURE_V_SPEC_CTRL +901: +.endm +.macro RESTORE_HOST_SPEC_CTRL_BODY +900: + /* Same for after vmexit. */ + mov $MSR_IA32_SPEC_CTRL, %ecx + + /* + * Load the value that the guest had written into MSR_IA32_SPEC_CTRL, + * if it was not intercepted during guest execution. + */ + cmpb $0, (%_ASM_SP) + jnz 998f + rdmsr + movl %eax, SVM_spec_ctrl(%_ASM_DI) +998: + + /* Now restore the host value of the MSR if different from the guest's. */ + movl PER_CPU_VAR(x86_spec_ctrl_current), %eax + cmp SVM_spec_ctrl(%_ASM_DI), %eax + je 901b + xor %edx, %edx + wrmsr + jmp 901b +.endm + + /** * __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode - * @vmcb_pa: unsigned long - * @regs: unsigned long * (to guest registers) + * @svm: struct vcpu_svm * + * @spec_ctrl_intercepted: bool */ SYM_FUNC_START(__svm_vcpu_run) push %_ASM_BP @@ -47,49 +109,71 @@ SYM_FUNC_START(__svm_vcpu_run) #endif push %_ASM_BX - /* Save @regs. */ + /* + * Save variables needed after vmexit on the stack, in inverse + * order compared to when they are needed. + */ + + /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */ push %_ASM_ARG2 - /* Save @vmcb. */ + /* Needed to restore access to percpu variables. */ + __ASM_SIZE(push) PER_CPU_VAR(svm_data + SD_save_area_pa) + + /* Finally save @svm. */ push %_ASM_ARG1 - /* Move @regs to RAX. */ - mov %_ASM_ARG2, %_ASM_AX +.ifnc _ASM_ARG1, _ASM_DI + /* + * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX + * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL. + */ + mov %_ASM_ARG1, %_ASM_DI +.endif + + /* Clobbers RAX, RCX, RDX. */ + RESTORE_GUEST_SPEC_CTRL + + /* + * Use a single vmcb (vmcb01 because it's always valid) for + * context switching guest state via VMLOAD/VMSAVE, that way + * the state doesn't need to be copied between vmcb01 and + * vmcb02 when switching vmcbs for nested virtualization. + */ + mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX +1: vmload %_ASM_AX +2: + + /* Get svm->current_vmcb->pa into RAX. */ + mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX + mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX /* Load guest registers. */ - mov VCPU_RCX(%_ASM_AX), %_ASM_CX - mov VCPU_RDX(%_ASM_AX), %_ASM_DX - mov VCPU_RBX(%_ASM_AX), %_ASM_BX - mov VCPU_RBP(%_ASM_AX), %_ASM_BP - mov VCPU_RSI(%_ASM_AX), %_ASM_SI - mov VCPU_RDI(%_ASM_AX), %_ASM_DI + mov VCPU_RCX(%_ASM_DI), %_ASM_CX + mov VCPU_RDX(%_ASM_DI), %_ASM_DX + mov VCPU_RBX(%_ASM_DI), %_ASM_BX + mov VCPU_RBP(%_ASM_DI), %_ASM_BP + mov VCPU_RSI(%_ASM_DI), %_ASM_SI #ifdef CONFIG_X86_64 - mov VCPU_R8 (%_ASM_AX), %r8 - mov VCPU_R9 (%_ASM_AX), %r9 - mov VCPU_R10(%_ASM_AX), %r10 - mov VCPU_R11(%_ASM_AX), %r11 - mov VCPU_R12(%_ASM_AX), %r12 - mov VCPU_R13(%_ASM_AX), %r13 - mov VCPU_R14(%_ASM_AX), %r14 - mov VCPU_R15(%_ASM_AX), %r15 + mov VCPU_R8 (%_ASM_DI), %r8 + mov VCPU_R9 (%_ASM_DI), %r9 + mov VCPU_R10(%_ASM_DI), %r10 + mov VCPU_R11(%_ASM_DI), %r11 + mov VCPU_R12(%_ASM_DI), %r12 + mov VCPU_R13(%_ASM_DI), %r13 + mov VCPU_R14(%_ASM_DI), %r14 + mov VCPU_R15(%_ASM_DI), %r15 #endif - - /* "POP" @vmcb to RAX. */ - pop %_ASM_AX + mov VCPU_RDI(%_ASM_DI), %_ASM_DI /* Enter guest mode */ sti -1: vmrun %_ASM_AX - -2: cli - -#ifdef CONFIG_RETPOLINE - /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ - FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE -#endif +3: vmrun %_ASM_AX +4: + cli - /* "POP" @regs to RAX. */ + /* Pop @svm to RAX while it's the only available register. */ pop %_ASM_AX /* Save all guest registers. */ @@ -110,6 +194,26 @@ SYM_FUNC_START(__svm_vcpu_run) mov %r15, VCPU_R15(%_ASM_AX) #endif + /* @svm can stay in RDI from now on. */ + mov %_ASM_AX, %_ASM_DI + + mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX +5: vmsave %_ASM_AX +6: + + /* Restores GSBASE among other things, allowing access to percpu data. */ + pop %_ASM_AX +7: vmload %_ASM_AX +8: + +#ifdef CONFIG_RETPOLINE + /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ + FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE +#endif + + /* Clobbers RAX, RCX, RDX. */ + RESTORE_HOST_SPEC_CTRL + /* * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be * untrained as soon as we exit the VM and are back to the @@ -145,6 +249,9 @@ SYM_FUNC_START(__svm_vcpu_run) xor %r15d, %r15d #endif + /* "Pop" @spec_ctrl_intercepted. */ + pop %_ASM_BX + pop %_ASM_BX #ifdef CONFIG_X86_64 @@ -159,17 +266,33 @@ SYM_FUNC_START(__svm_vcpu_run) pop %_ASM_BP RET -3: cmpb $0, kvm_rebooting + RESTORE_GUEST_SPEC_CTRL_BODY + RESTORE_HOST_SPEC_CTRL_BODY + +10: cmpb $0, kvm_rebooting jne 2b ud2 +30: cmpb $0, kvm_rebooting + jne 4b + ud2 +50: cmpb $0, kvm_rebooting + jne 6b + ud2 +70: cmpb $0, kvm_rebooting + jne 8b + ud2 - _ASM_EXTABLE(1b, 3b) + _ASM_EXTABLE(1b, 10b) + _ASM_EXTABLE(3b, 30b) + _ASM_EXTABLE(5b, 50b) + _ASM_EXTABLE(7b, 70b) SYM_FUNC_END(__svm_vcpu_run) /** * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode - * @vmcb_pa: unsigned long + * @svm: struct vcpu_svm * + * @spec_ctrl_intercepted: bool */ SYM_FUNC_START(__svm_sev_es_vcpu_run) push %_ASM_BP @@ -184,8 +307,31 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) #endif push %_ASM_BX - /* Move @vmcb to RAX. */ - mov %_ASM_ARG1, %_ASM_AX + /* + * Save variables needed after vmexit on the stack, in inverse + * order compared to when they are needed. + */ + + /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */ + push %_ASM_ARG2 + + /* Save @svm. */ + push %_ASM_ARG1 + +.ifnc _ASM_ARG1, _ASM_DI + /* + * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX + * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL. + */ + mov %_ASM_ARG1, %_ASM_DI +.endif + + /* Clobbers RAX, RCX, RDX. */ + RESTORE_GUEST_SPEC_CTRL + + /* Get svm->current_vmcb->pa into RAX. */ + mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX + mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX /* Enter guest mode */ sti @@ -194,11 +340,17 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) 2: cli + /* Pop @svm to RDI, guest registers have been saved already. */ + pop %_ASM_DI + #ifdef CONFIG_RETPOLINE /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE #endif + /* Clobbers RAX, RCX, RDX. */ + RESTORE_HOST_SPEC_CTRL + /* * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be * untrained as soon as we exit the VM and are back to the @@ -208,6 +360,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) */ UNTRAIN_RET + /* "Pop" @spec_ctrl_intercepted. */ + pop %_ASM_BX + pop %_ASM_BX #ifdef CONFIG_X86_64 @@ -222,6 +377,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) pop %_ASM_BP RET + RESTORE_GUEST_SPEC_CTRL_BODY + RESTORE_HOST_SPEC_CTRL_BODY + 3: cmpb $0, kvm_rebooting jne 2b ud2 diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 0c62352dda6a..5b0d4859e4b7 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4854,6 +4854,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, static void nested_vmx_triple_fault(struct kvm_vcpu *vcpu) { + kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu); nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0); } @@ -6440,9 +6441,6 @@ out: return kvm_state.size; } -/* - * Forcibly leave nested mode in order to be able to reset the VCPU later on. - */ void vmx_leave_nested(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu)) { diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 25b70a85bef5..10b33da9bd05 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -617,7 +617,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu) struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); - for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) { + for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) { pmu->gp_counters[i].type = KVM_PMC_GP; pmu->gp_counters[i].vcpu = vcpu; pmu->gp_counters[i].idx = i; @@ -643,7 +643,7 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu) struct kvm_pmc *pmc = NULL; int i; - for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) { + for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) { pmc = &pmu->gp_counters[i]; pmc_stop_counter(pmc); diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 8477d8bdd69c..0b5db4de4d09 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -1,12 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/linkage.h> #include <asm/asm.h> -#include <asm/asm-offsets.h> #include <asm/bitsperlong.h> #include <asm/kvm_vcpu_regs.h> #include <asm/nospec-branch.h> #include <asm/percpu.h> #include <asm/segment.h> +#include "kvm-asm-offsets.h" #include "run_flags.h" #define WORD_SIZE (BITS_PER_LONG / 8) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5f5eb577d583..69227f77b201 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -628,6 +628,12 @@ static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vecto ex->payload = payload; } +/* Forcibly leave the nested mode in cases like a vCPU reset */ +static void kvm_leave_nested(struct kvm_vcpu *vcpu) +{ + kvm_x86_ops.nested_ops->leave_nested(vcpu); +} + static void kvm_multiple_exception(struct kvm_vcpu *vcpu, unsigned nr, bool has_error, u32 error_code, bool has_payload, unsigned long payload, bool reinject) @@ -1438,32 +1444,27 @@ static const u32 msrs_to_save_all[] = { MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS, MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL, + MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG, + + /* This part of MSRs should match KVM_INTEL_PMC_MAX_GENERIC. */ MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1, MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3, MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5, MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7, - MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9, - MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11, - MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13, - MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15, - MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17, MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1, MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3, MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5, MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7, - MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9, - MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11, - MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13, - MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15, - MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, - MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG, MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3, MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3, + + /* This part of MSRs should match KVM_AMD_PMC_MAX_GENERIC. */ MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2, MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5, MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2, MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5, + MSR_IA32_XFD, MSR_IA32_XFD_ERR, }; @@ -1562,7 +1563,7 @@ static const u32 msr_based_features_all[] = { MSR_IA32_VMX_EPT_VPID_CAP, MSR_IA32_VMX_VMFUNC, - MSR_F10H_DECFG, + MSR_AMD64_DE_CFG, MSR_IA32_UCODE_REV, MSR_IA32_ARCH_CAPABILITIES, MSR_IA32_PERF_CAPABILITIES, @@ -5200,7 +5201,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, if (events->flags & KVM_VCPUEVENT_VALID_SMM) { if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) { - kvm_x86_ops.nested_ops->leave_nested(vcpu); + kvm_leave_nested(vcpu); kvm_smm_changed(vcpu, events->smi.smm); } @@ -7041,14 +7042,14 @@ static void kvm_init_msr_list(void) intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2) continue; break; - case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17: + case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR_MAX: if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >= - min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp)) + min(KVM_INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp)) continue; break; - case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17: + case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL_MAX: if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >= - min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp)) + min(KVM_INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp)) continue; break; case MSR_IA32_XFD: @@ -9810,7 +9811,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) int kvm_check_nested_events(struct kvm_vcpu *vcpu) { - if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { + if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { kvm_x86_ops.nested_ops->triple_fault(vcpu); return 1; } @@ -10565,10 +10566,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 0; goto out; } - if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { - if (is_guest_mode(vcpu)) { + if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { + if (is_guest_mode(vcpu)) kvm_x86_ops.nested_ops->triple_fault(vcpu); - } else { + + if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; vcpu->mmio_needed = 0; r = 0; @@ -12002,8 +12004,18 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) WARN_ON_ONCE(!init_event && (old_cr0 || kvm_read_cr3(vcpu) || kvm_read_cr4(vcpu))); + /* + * SVM doesn't unconditionally VM-Exit on INIT and SHUTDOWN, thus it's + * possible to INIT the vCPU while L2 is active. Force the vCPU back + * into L1 as EFER.SVME is cleared on INIT (along with all other EFER + * bits), i.e. virtualization is disabled. + */ + if (is_guest_mode(vcpu)) + kvm_leave_nested(vcpu); + kvm_lapic_reset(vcpu, init_event); + WARN_ON_ONCE(is_guest_mode(vcpu) || is_smm(vcpu)); vcpu->arch.hflags = 0; vcpu->arch.smi_pending = 0; diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 2dae413bd62a..f3098c0e386a 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -954,6 +954,14 @@ static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu) return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result); } +static inline int max_evtchn_port(struct kvm *kvm) +{ + if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) + return EVTCHN_2L_NR_CHANNELS; + else + return COMPAT_EVTCHN_2L_NR_CHANNELS; +} + static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, evtchn_port_t *ports) { @@ -1042,6 +1050,10 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, *r = -EFAULT; goto out; } + if (ports[i] >= max_evtchn_port(vcpu->kvm)) { + *r = -EINVAL; + goto out; + } } if (sched_poll.nr_ports == 1) @@ -1215,6 +1227,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) bool longmode; u64 input, params[6], r = -ENOSYS; bool handled = false; + u8 cpl; input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX); @@ -1242,9 +1255,17 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) params[5] = (u64)kvm_r9_read(vcpu); } #endif + cpl = static_call(kvm_x86_get_cpl)(vcpu); trace_kvm_xen_hypercall(input, params[0], params[1], params[2], params[3], params[4], params[5]); + /* + * Only allow hypercall acceleration for CPL0. The rare hypercalls that + * are permitted in guest userspace can be handled by the VMM. + */ + if (unlikely(cpl > 0)) + goto handle_in_userspace; + switch (input) { case __HYPERVISOR_xen_version: if (params[0] == XENVER_version && vcpu->kvm->arch.xen.xen_version) { @@ -1279,10 +1300,11 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) if (handled) return kvm_xen_hypercall_set_result(vcpu, r); +handle_in_userspace: vcpu->run->exit_reason = KVM_EXIT_XEN; vcpu->run->xen.type = KVM_EXIT_XEN_HCALL; vcpu->run->xen.u.hcall.longmode = longmode; - vcpu->run->xen.u.hcall.cpl = static_call(kvm_x86_get_cpl)(vcpu); + vcpu->run->xen.u.hcall.cpl = cpl; vcpu->run->xen.u.hcall.input = input; vcpu->run->xen.u.hcall.params[0] = params[0]; vcpu->run->xen.u.hcall.params[1] = params[1]; @@ -1297,14 +1319,6 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) return 0; } -static inline int max_evtchn_port(struct kvm *kvm) -{ - if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) - return EVTCHN_2L_NR_CHANNELS; - else - return COMPAT_EVTCHN_2L_NR_CHANNELS; -} - static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port) { int poll_evtchn = vcpu->arch.xen.poll_evtchn; diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index f1bb18617156..24b48af27417 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -6,6 +6,7 @@ #include <linux/uaccess.h> #include <linux/export.h> +#include <linux/instrumented.h> #include <asm/tlbflush.h> @@ -44,7 +45,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) * called from other contexts. */ pagefault_disable(); + instrument_copy_from_user_before(to, from, n); ret = raw_copy_from_user(to, from, n); + instrument_copy_from_user_after(to, from, n, ret); pagefault_enable(); return ret; diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 6b3033845c6d..5804bbae4f01 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -37,8 +37,12 @@ int pmd_huge(pmd_t pmd) */ int pud_huge(pud_t pud) { +#if CONFIG_PGTABLE_LEVELS > 2 return !pud_none(pud) && (pud_val(pud) & (_PAGE_PRESENT|_PAGE_PSE)) != _PAGE_PRESENT; +#else + return 0; +#endif } #ifdef CONFIG_HUGETLB_PAGE diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 78c5bc654cff..6453fbaedb08 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -217,9 +217,15 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size, * Mappings have to be page-aligned */ offset = phys_addr & ~PAGE_MASK; - phys_addr &= PHYSICAL_PAGE_MASK; + phys_addr &= PAGE_MASK; size = PAGE_ALIGN(last_addr+1) - phys_addr; + /* + * Mask out any bits not part of the actual physical + * address, like memory encryption bits. + */ + phys_addr &= PHYSICAL_PAGE_MASK; + retval = memtype_reserve(phys_addr, (u64)phys_addr + size, pcm, &new_pcm); if (retval) { diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 00127abd89ee..99620428ad78 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -11,7 +11,6 @@ #include <linux/bpf.h> #include <linux/memory.h> #include <linux/sort.h> -#include <linux/init.h> #include <asm/extable.h> #include <asm/set_memory.h> #include <asm/nospec-branch.h> @@ -389,18 +388,6 @@ out: return ret; } -int __init bpf_arch_init_dispatcher_early(void *ip) -{ - const u8 *nop_insn = x86_nops[5]; - - if (is_endbr(*(u32 *)ip)) - ip += ENDBR_INSN_SIZE; - - if (memcmp(ip, nop_insn, X86_PATCH_SIZE)) - text_poke_early(ip, nop_insn, X86_PATCH_SIZE); - return 0; -} - int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *old_addr, void *new_addr) { diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index bb176c72891c..93ae33248f42 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -513,15 +513,23 @@ static int pm_cpu_check(const struct x86_cpu_id *c) static void pm_save_spec_msr(void) { - u32 spec_msr_id[] = { - MSR_IA32_SPEC_CTRL, - MSR_IA32_TSX_CTRL, - MSR_TSX_FORCE_ABORT, - MSR_IA32_MCU_OPT_CTRL, - MSR_AMD64_LS_CFG, + struct msr_enumeration { + u32 msr_no; + u32 feature; + } msr_enum[] = { + { MSR_IA32_SPEC_CTRL, X86_FEATURE_MSR_SPEC_CTRL }, + { MSR_IA32_TSX_CTRL, X86_FEATURE_MSR_TSX_CTRL }, + { MSR_TSX_FORCE_ABORT, X86_FEATURE_TSX_FORCE_ABORT }, + { MSR_IA32_MCU_OPT_CTRL, X86_FEATURE_SRBDS_CTRL }, + { MSR_AMD64_LS_CFG, X86_FEATURE_LS_CFG_SSBD }, + { MSR_AMD64_DE_CFG, X86_FEATURE_LFENCE_RDTSC }, }; + int i; - msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id)); + for (i = 0; i < ARRAY_SIZE(msr_enum); i++) { + if (boot_cpu_has(msr_enum[i].feature)) + msr_build_context(&msr_enum[i].msr_no, 1); + } } static int pm_check_save_msr(void) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index f82857e48815..038da45f057a 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -23,6 +23,7 @@ #include <linux/start_kernel.h> #include <linux/sched.h> #include <linux/kprobes.h> +#include <linux/kstrtox.h> #include <linux/memblock.h> #include <linux/export.h> #include <linux/mm.h> @@ -113,7 +114,7 @@ static __read_mostly bool xen_msr_safe = IS_ENABLED(CONFIG_XEN_PV_MSR_SAFE); static int __init parse_xen_msr_safe(char *str) { if (str) - return strtobool(str, &xen_msr_safe); + return kstrtobool(str, &xen_msr_safe); return -EINVAL; } early_param("xen_msr_safe", parse_xen_msr_safe); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 4f4309500559..8db26f10fb1d 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -7,6 +7,7 @@ #include <linux/init.h> #include <linux/sched.h> +#include <linux/kstrtox.h> #include <linux/mm.h> #include <linux/pm.h> #include <linux/memblock.h> @@ -85,7 +86,7 @@ static void __init xen_parse_512gb(void) arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit="); if (!arg) val = true; - else if (strtobool(arg + strlen("xen_512gb_limit="), &val)) + else if (kstrtobool(arg + strlen("xen_512gb_limit="), &val)) return; xen_512gb_limit = val; |