diff options
Diffstat (limited to 'arch')
688 files changed, 8088 insertions, 5700 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 205fd23e0cad..d6a68656950e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -34,6 +34,29 @@ config ARCH_HAS_SUBPAGE_FAULTS config HOTPLUG_SMT bool +# Selected by HOTPLUG_CORE_SYNC_DEAD or HOTPLUG_CORE_SYNC_FULL +config HOTPLUG_CORE_SYNC + bool + +# Basic CPU dead synchronization selected by architecture +config HOTPLUG_CORE_SYNC_DEAD + bool + select HOTPLUG_CORE_SYNC + +# Full CPU synchronization with alive state selected by architecture +config HOTPLUG_CORE_SYNC_FULL + bool + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU + select HOTPLUG_CORE_SYNC + +config HOTPLUG_SPLIT_STARTUP + bool + select HOTPLUG_CORE_SYNC_FULL + +config HOTPLUG_PARALLEL + bool + select HOTPLUG_SPLIT_STARTUP + config GENERIC_ENTRY bool @@ -285,6 +308,9 @@ config ARCH_HAS_DMA_SET_UNCACHED config ARCH_HAS_DMA_CLEAR_UNCACHED bool +config ARCH_HAS_CPU_FINALIZE_INIT + bool + # Select if arch init_task must go in the __init_task_data section config ARCH_TASK_STRUCT_ON_STACK bool @@ -1188,13 +1214,6 @@ config COMPAT_32BIT_TIME config ARCH_NO_PREEMPT bool -config ARCH_EPHEMERAL_INODES - def_bool n - help - An arch should select this symbol if it doesn't keep track of inode - instances on its own, but instead relies on something else (e.g. the - host kernel for an UML kernel). - config ARCH_SUPPORTS_RT bool diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index f2861a43a61e..cbd9244571af 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -200,25 +200,6 @@ ATOMIC_OPS(xor, xor) #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -#define arch_atomic64_cmpxchg(v, old, new) \ - (arch_cmpxchg(&((v)->counter), old, new)) -#define arch_atomic64_xchg(v, new) \ - (arch_xchg(&((v)->counter), new)) - -#define arch_atomic_cmpxchg(v, old, new) \ - (arch_cmpxchg(&((v)->counter), old, new)) -#define arch_atomic_xchg(v, new) \ - (arch_xchg(&((v)->counter), new)) - -/** - * arch_atomic_fetch_add_unless - add unless the number is a given value - * @v: pointer of type atomic_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, so long as it was not @u. - * Returns the old value of @v. - */ static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) { int c, new, old; @@ -242,15 +223,6 @@ static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) } #define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless -/** - * arch_atomic64_fetch_add_unless - add unless the number is a given value - * @v: pointer of type atomic64_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, so long as it was not @u. - * Returns the old value of @v. - */ static __inline__ s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { s64 c, new, old; @@ -274,13 +246,6 @@ static __inline__ s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u } #define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless -/* - * arch_atomic64_dec_if_positive - decrement by 1 if old value positive - * @v: pointer of type atomic_t - * - * The function returns the old value of *v minus 1, even if - * the atomic variable, v, was not decremented. - */ static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) { s64 old, tmp; diff --git a/arch/alpha/include/asm/bugs.h b/arch/alpha/include/asm/bugs.h deleted file mode 100644 index 78030d1c7e7e..000000000000 --- a/arch/alpha/include/asm/bugs.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * include/asm-alpha/bugs.h - * - * Copyright (C) 1994 Linus Torvalds - */ - -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Needs: - * void check_bugs(void); - */ - -/* - * I don't know of any alpha bugs yet.. Nice chip - */ - -static void check_bugs(void) -{ -} diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 2a9a877a0508..d98701ee36c6 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1014,8 +1014,6 @@ SYSCALL_DEFINE2(osf_settimeofday, struct timeval32 __user *, tv, return do_sys_settimeofday64(tv ? &kts : NULL, tz ? &ktz : NULL); } -asmlinkage long sys_ni_posix_timers(void); - SYSCALL_DEFINE2(osf_utimes, const char __user *, filename, struct timeval32 __user *, tvs) { diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index 33bf3a627002..b650ff1cb022 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -658,7 +658,7 @@ setup_arch(char **cmdline_p) #endif /* Default root filesystem to sda2. */ - ROOT_DEV = Root_SDA2; + ROOT_DEV = MKDEV(SCSI_DISK0_MAJOR, 2); #ifdef CONFIG_EISA /* FIXME: only set this when we actually have EISA in this box? */ diff --git a/arch/arc/include/asm/atomic-spinlock.h b/arch/arc/include/asm/atomic-spinlock.h index 2c830347bfb4..89d12a60f84c 100644 --- a/arch/arc/include/asm/atomic-spinlock.h +++ b/arch/arc/include/asm/atomic-spinlock.h @@ -81,6 +81,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return + #undef ATOMIC_OPS #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ @@ -92,7 +97,11 @@ ATOMIC_OPS(or, |=, or) ATOMIC_OPS(xor, ^=, xor) #define arch_atomic_andnot arch_atomic_andnot + +#define arch_atomic_fetch_and arch_atomic_fetch_and #define arch_atomic_fetch_andnot arch_atomic_fetch_andnot +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 52ee51e1ff7c..592d7fffc223 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -22,30 +22,6 @@ #include <asm/atomic-spinlock.h> #endif -#define arch_atomic_cmpxchg(v, o, n) \ -({ \ - arch_cmpxchg(&((v)->counter), (o), (n)); \ -}) - -#ifdef arch_cmpxchg_relaxed -#define arch_atomic_cmpxchg_relaxed(v, o, n) \ -({ \ - arch_cmpxchg_relaxed(&((v)->counter), (o), (n)); \ -}) -#endif - -#define arch_atomic_xchg(v, n) \ -({ \ - arch_xchg(&((v)->counter), (n)); \ -}) - -#ifdef arch_xchg_relaxed -#define arch_atomic_xchg_relaxed(v, n) \ -({ \ - arch_xchg_relaxed(&((v)->counter), (n)); \ -}) -#endif - /* * 64-bit atomics */ diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h index c5a8010fdc97..6b6db981967a 100644 --- a/arch/arc/include/asm/atomic64-arcv2.h +++ b/arch/arc/include/asm/atomic64-arcv2.h @@ -159,6 +159,7 @@ arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new) return prev; } +#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new) { @@ -179,14 +180,7 @@ static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new) return prev; } - -/** - * arch_atomic64_dec_if_positive - decrement by 1 if old value positive - * @v: pointer of type atomic64_t - * - * The function returns the old value of *v minus 1, even if - * the atomic variable, v, was not decremented. - */ +#define arch_atomic64_xchg arch_atomic64_xchg static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) { @@ -212,15 +206,6 @@ static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) } #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive -/** - * arch_atomic64_fetch_add_unless - add unless the number is a given value - * @v: pointer of type atomic64_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, if it was not @u. - * Returns the old value of @v - */ static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { s64 old, temp; diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 0fb4b218f665..cef741bb03b3 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -5,6 +5,7 @@ config ARM select ARCH_32BIT_OFF_T select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE if HAVE_KRETPROBES && FRAME_POINTER && !ARM_UNWIND select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_CPU_FINALIZE_INIT if MMU select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE @@ -124,6 +125,7 @@ config ARM select HAVE_SYSCALL_TRACEPOINTS select HAVE_UID16 select HAVE_VIRT_CPU_ACCOUNTING_GEN + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select IRQ_FORCED_THREADING select MODULES_USE_ELF_REL select NEED_DMA_MAP_STATE @@ -1780,7 +1782,7 @@ config VFP Say Y to include VFP support code in the kernel. This is needed if your hardware includes a VFP unit. - Please see <file:Documentation/arm/vfp/release-notes.rst> for + Please see <file:Documentation/arch/arm/vfp/release-notes.rst> for release notes and additional status information. Say N if your target does not have VFP hardware. diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c index 1feb6b0f7a1f..627752f18661 100644 --- a/arch/arm/boot/compressed/atags_to_fdt.c +++ b/arch/arm/boot/compressed/atags_to_fdt.c @@ -2,6 +2,7 @@ #include <linux/libfdt_env.h> #include <asm/setup.h> #include <libfdt.h> +#include "misc.h" #if defined(CONFIG_ARM_ATAG_DTB_COMPAT_CMDLINE_EXTEND) #define do_extend_cmdline 1 diff --git a/arch/arm/boot/compressed/fdt_check_mem_start.c b/arch/arm/boot/compressed/fdt_check_mem_start.c index 9291a2661bdf..aa856567fd33 100644 --- a/arch/arm/boot/compressed/fdt_check_mem_start.c +++ b/arch/arm/boot/compressed/fdt_check_mem_start.c @@ -3,6 +3,7 @@ #include <linux/kernel.h> #include <linux/libfdt.h> #include <linux/sizes.h> +#include "misc.h" static const void *get_prop(const void *fdt, const char *node_path, const char *property, int minlen) diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c index abfed1aa2baa..6b4baa6a9a50 100644 --- a/arch/arm/boot/compressed/misc.c +++ b/arch/arm/boot/compressed/misc.c @@ -103,9 +103,6 @@ static void putstr(const char *ptr) /* * gzip declarations */ -extern char input_data[]; -extern char input_data_end[]; - unsigned char *output_data; unsigned long free_mem_ptr; @@ -131,9 +128,6 @@ asmlinkage void __div0(void) error("Attempting division by 0!"); } -extern int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x)); - - void decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p, unsigned long free_mem_ptr_end_p, diff --git a/arch/arm/boot/compressed/misc.h b/arch/arm/boot/compressed/misc.h index c958dccd1d97..6da00a26ac08 100644 --- a/arch/arm/boot/compressed/misc.h +++ b/arch/arm/boot/compressed/misc.h @@ -6,5 +6,16 @@ void error(char *x) __noreturn; extern unsigned long free_mem_ptr; extern unsigned long free_mem_end_ptr; +void __div0(void); +void +decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p, + unsigned long free_mem_ptr_end_p, int arch_id); +void fortify_panic(const char *name); +int atags_to_fdt(void *atag_list, void *fdt, int total_space); +uint32_t fdt_check_mem_start(uint32_t mem_start, const void *fdt); +int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x)); + +extern char input_data[]; +extern char input_data_end[]; #endif diff --git a/arch/arm/boot/dts/am57xx-cl-som-am57x.dts b/arch/arm/boot/dts/am57xx-cl-som-am57x.dts index 2fc9a5d5e0c0..625b9b311b49 100644 --- a/arch/arm/boot/dts/am57xx-cl-som-am57x.dts +++ b/arch/arm/boot/dts/am57xx-cl-som-am57x.dts @@ -527,7 +527,7 @@ interrupt-parent = <&gpio1>; interrupts = <31 0>; - pendown-gpio = <&gpio1 31 0>; + pendown-gpio = <&gpio1 31 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <0x0>; diff --git a/arch/arm/boot/dts/at91-sama7g5ek.dts b/arch/arm/boot/dts/at91-sama7g5ek.dts index aa5cc0e98bba..217e9b96c61e 100644 --- a/arch/arm/boot/dts/at91-sama7g5ek.dts +++ b/arch/arm/boot/dts/at91-sama7g5ek.dts @@ -792,7 +792,7 @@ }; &shdwc { - atmel,shdwc-debouncer = <976>; + debounce-delay-us = <976>; status = "okay"; input@0 { diff --git a/arch/arm/boot/dts/at91sam9261ek.dts b/arch/arm/boot/dts/at91sam9261ek.dts index 88869ca874d1..045cb253f23a 100644 --- a/arch/arm/boot/dts/at91sam9261ek.dts +++ b/arch/arm/boot/dts/at91sam9261ek.dts @@ -156,7 +156,7 @@ compatible = "ti,ads7843"; interrupts-extended = <&pioC 2 IRQ_TYPE_EDGE_BOTH>; spi-max-frequency = <3000000>; - pendown-gpio = <&pioC 2 GPIO_ACTIVE_HIGH>; + pendown-gpio = <&pioC 2 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <150>; ti,x-max = /bits/ 16 <3830>; diff --git a/arch/arm/boot/dts/imx6qdl-mba6.dtsi b/arch/arm/boot/dts/imx6qdl-mba6.dtsi index 78555a618851..7b7e6c2ad190 100644 --- a/arch/arm/boot/dts/imx6qdl-mba6.dtsi +++ b/arch/arm/boot/dts/imx6qdl-mba6.dtsi @@ -209,6 +209,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pcie>; reset-gpio = <&gpio6 7 GPIO_ACTIVE_LOW>; + vpcie-supply = <®_pcie>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6ull-dhcor-som.dtsi b/arch/arm/boot/dts/imx6ull-dhcor-som.dtsi index 5882c7565f64..32a6022625d9 100644 --- a/arch/arm/boot/dts/imx6ull-dhcor-som.dtsi +++ b/arch/arm/boot/dts/imx6ull-dhcor-som.dtsi @@ -8,6 +8,7 @@ #include <dt-bindings/input/input.h> #include <dt-bindings/leds/common.h> #include <dt-bindings/pwm/pwm.h> +#include <dt-bindings/regulator/dlg,da9063-regulator.h> #include "imx6ull.dtsi" / { @@ -84,16 +85,20 @@ regulators { vdd_soc_in_1v4: buck1 { + regulator-allowed-modes = <DA9063_BUCK_MODE_SLEEP>; /* PFM */ regulator-always-on; regulator-boot-on; + regulator-initial-mode = <DA9063_BUCK_MODE_SLEEP>; regulator-max-microvolt = <1400000>; regulator-min-microvolt = <1400000>; regulator-name = "vdd_soc_in_1v4"; }; vcc_3v3: buck2 { + regulator-allowed-modes = <DA9063_BUCK_MODE_SYNC>; /* PWM */ regulator-always-on; regulator-boot-on; + regulator-initial-mode = <DA9063_BUCK_MODE_SYNC>; regulator-max-microvolt = <3300000>; regulator-min-microvolt = <3300000>; regulator-name = "vcc_3v3"; @@ -106,8 +111,10 @@ * the voltage is set to 1.5V. */ vcc_ddr_1v35: buck3 { + regulator-allowed-modes = <DA9063_BUCK_MODE_SYNC>; /* PWM */ regulator-always-on; regulator-boot-on; + regulator-initial-mode = <DA9063_BUCK_MODE_SYNC>; regulator-max-microvolt = <1500000>; regulator-min-microvolt = <1500000>; regulator-name = "vcc_ddr_1v35"; diff --git a/arch/arm/boot/dts/imx7d-pico-hobbit.dts b/arch/arm/boot/dts/imx7d-pico-hobbit.dts index d917dc4f2f22..6ad39dca7009 100644 --- a/arch/arm/boot/dts/imx7d-pico-hobbit.dts +++ b/arch/arm/boot/dts/imx7d-pico-hobbit.dts @@ -64,7 +64,7 @@ interrupt-parent = <&gpio2>; interrupts = <7 0>; spi-max-frequency = <1000000>; - pendown-gpio = <&gpio2 7 0>; + pendown-gpio = <&gpio2 7 GPIO_ACTIVE_LOW>; vcc-supply = <®_3p3v>; ti,x-min = /bits/ 16 <0>; ti,x-max = /bits/ 16 <4095>; diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts index f483bc0afe5e..234e5fc647b2 100644 --- a/arch/arm/boot/dts/imx7d-sdb.dts +++ b/arch/arm/boot/dts/imx7d-sdb.dts @@ -205,7 +205,7 @@ pinctrl-0 = <&pinctrl_tsc2046_pendown>; interrupt-parent = <&gpio2>; interrupts = <29 0>; - pendown-gpio = <&gpio2 29 GPIO_ACTIVE_HIGH>; + pendown-gpio = <&gpio2 29 GPIO_ACTIVE_LOW>; touchscreen-max-pressure = <255>; wakeup-source; }; diff --git a/arch/arm/boot/dts/omap3-cm-t3x.dtsi b/arch/arm/boot/dts/omap3-cm-t3x.dtsi index e61b8a2bfb7d..51baedf1603b 100644 --- a/arch/arm/boot/dts/omap3-cm-t3x.dtsi +++ b/arch/arm/boot/dts/omap3-cm-t3x.dtsi @@ -227,7 +227,7 @@ interrupt-parent = <&gpio2>; interrupts = <25 0>; /* gpio_57 */ - pendown-gpio = <&gpio2 25 GPIO_ACTIVE_HIGH>; + pendown-gpio = <&gpio2 25 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <0x0>; ti,x-max = /bits/ 16 <0x0fff>; diff --git a/arch/arm/boot/dts/omap3-devkit8000-lcd-common.dtsi b/arch/arm/boot/dts/omap3-devkit8000-lcd-common.dtsi index 3decc2d78a6c..a7f99ae0c1fe 100644 --- a/arch/arm/boot/dts/omap3-devkit8000-lcd-common.dtsi +++ b/arch/arm/boot/dts/omap3-devkit8000-lcd-common.dtsi @@ -54,7 +54,7 @@ interrupt-parent = <&gpio1>; interrupts = <27 0>; /* gpio_27 */ - pendown-gpio = <&gpio1 27 GPIO_ACTIVE_HIGH>; + pendown-gpio = <&gpio1 27 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <0x0>; ti,x-max = /bits/ 16 <0x0fff>; diff --git a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi index c595afe4181d..d310b5c7bac3 100644 --- a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi +++ b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi @@ -311,7 +311,7 @@ interrupt-parent = <&gpio1>; interrupts = <8 0>; /* boot6 / gpio_8 */ spi-max-frequency = <1000000>; - pendown-gpio = <&gpio1 8 GPIO_ACTIVE_HIGH>; + pendown-gpio = <&gpio1 8 GPIO_ACTIVE_LOW>; vcc-supply = <®_vcc3>; pinctrl-names = "default"; pinctrl-0 = <&tsc2048_pins>; diff --git a/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi b/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi index 1d6e88f99eb3..c3570acc35fa 100644 --- a/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi +++ b/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi @@ -149,7 +149,7 @@ interrupt-parent = <&gpio4>; interrupts = <18 0>; /* gpio_114 */ - pendown-gpio = <&gpio4 18 GPIO_ACTIVE_HIGH>; + pendown-gpio = <&gpio4 18 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <0x0>; ti,x-max = /bits/ 16 <0x0fff>; diff --git a/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi b/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi index 7e30f9d45790..d95a0e130058 100644 --- a/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi +++ b/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi @@ -160,7 +160,7 @@ interrupt-parent = <&gpio4>; interrupts = <18 0>; /* gpio_114 */ - pendown-gpio = <&gpio4 18 GPIO_ACTIVE_HIGH>; + pendown-gpio = <&gpio4 18 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <0x0>; ti,x-max = /bits/ 16 <0x0fff>; diff --git a/arch/arm/boot/dts/omap3-pandora-common.dtsi b/arch/arm/boot/dts/omap3-pandora-common.dtsi index 559853764487..4c3b6bab179c 100644 --- a/arch/arm/boot/dts/omap3-pandora-common.dtsi +++ b/arch/arm/boot/dts/omap3-pandora-common.dtsi @@ -651,7 +651,7 @@ pinctrl-0 = <&penirq_pins>; interrupt-parent = <&gpio3>; interrupts = <30 IRQ_TYPE_NONE>; /* GPIO_94 */ - pendown-gpio = <&gpio3 30 GPIO_ACTIVE_HIGH>; + pendown-gpio = <&gpio3 30 GPIO_ACTIVE_LOW>; vcc-supply = <&vaux4>; ti,x-min = /bits/ 16 <0>; diff --git a/arch/arm/boot/dts/omap5-cm-t54.dts b/arch/arm/boot/dts/omap5-cm-t54.dts index 2d87b9fc230e..af288d63a26a 100644 --- a/arch/arm/boot/dts/omap5-cm-t54.dts +++ b/arch/arm/boot/dts/omap5-cm-t54.dts @@ -354,7 +354,7 @@ interrupt-parent = <&gpio1>; interrupts = <15 0>; /* gpio1_wk15 */ - pendown-gpio = <&gpio1 15 GPIO_ACTIVE_HIGH>; + pendown-gpio = <&gpio1 15 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <0x0>; diff --git a/arch/arm/boot/dts/qcom-apq8026-asus-sparrow.dts b/arch/arm/boot/dts/qcom-apq8026-asus-sparrow.dts index 7a80e1c9f126..aa0e0e8d2a97 100644 --- a/arch/arm/boot/dts/qcom-apq8026-asus-sparrow.dts +++ b/arch/arm/boot/dts/qcom-apq8026-asus-sparrow.dts @@ -268,7 +268,6 @@ function = "gpio"; drive-strength = <8>; bias-disable; - input-enable; }; wlan_hostwake_default_state: wlan-hostwake-default-state { @@ -276,7 +275,6 @@ function = "gpio"; drive-strength = <2>; bias-disable; - input-enable; }; wlan_regulator_default_state: wlan-regulator-default-state { diff --git a/arch/arm/boot/dts/qcom-apq8026-huawei-sturgeon.dts b/arch/arm/boot/dts/qcom-apq8026-huawei-sturgeon.dts index d64096028ab1..5593a3a60d6c 100644 --- a/arch/arm/boot/dts/qcom-apq8026-huawei-sturgeon.dts +++ b/arch/arm/boot/dts/qcom-apq8026-huawei-sturgeon.dts @@ -352,7 +352,6 @@ function = "gpio"; drive-strength = <2>; bias-disable; - input-enable; }; wlan_regulator_default_state: wlan-regulator-default-state { diff --git a/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts b/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts index b82381229adf..b887e5361ec3 100644 --- a/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts +++ b/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts @@ -307,7 +307,6 @@ function = "gpio"; drive-strength = <2>; bias-disable; - input-enable; }; touch_pins: touch-state { @@ -317,7 +316,6 @@ drive-strength = <8>; bias-pull-down; - input-enable; }; reset-pins { @@ -335,7 +333,6 @@ function = "gpio"; drive-strength = <2>; bias-disable; - input-enable; }; wlan_regulator_default_state: wlan-regulator-default-state { diff --git a/arch/arm/boot/dts/qcom-apq8064.dtsi b/arch/arm/boot/dts/qcom-apq8064.dtsi index 672b246afbba..d2289205ff81 100644 --- a/arch/arm/boot/dts/qcom-apq8064.dtsi +++ b/arch/arm/boot/dts/qcom-apq8064.dtsi @@ -83,6 +83,7 @@ L2: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; idle-states { diff --git a/arch/arm/boot/dts/qcom-apq8084.dtsi b/arch/arm/boot/dts/qcom-apq8084.dtsi index b653ea40c441..83839e1ec4d1 100644 --- a/arch/arm/boot/dts/qcom-apq8084.dtsi +++ b/arch/arm/boot/dts/qcom-apq8084.dtsi @@ -74,6 +74,7 @@ L2: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; qcom,saw = <&saw_l2>; }; diff --git a/arch/arm/boot/dts/qcom-ipq4019.dtsi b/arch/arm/boot/dts/qcom-ipq4019.dtsi index dfcfb3339c23..f0ef86fadc9d 100644 --- a/arch/arm/boot/dts/qcom-ipq4019.dtsi +++ b/arch/arm/boot/dts/qcom-ipq4019.dtsi @@ -102,6 +102,7 @@ L2: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; qcom,saw = <&saw_l2>; }; }; diff --git a/arch/arm/boot/dts/qcom-ipq8064.dtsi b/arch/arm/boot/dts/qcom-ipq8064.dtsi index af6764770fd1..7581845737a8 100644 --- a/arch/arm/boot/dts/qcom-ipq8064.dtsi +++ b/arch/arm/boot/dts/qcom-ipq8064.dtsi @@ -45,6 +45,7 @@ L2: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts b/arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts index a8304769b509..b269fdca1460 100644 --- a/arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts +++ b/arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts @@ -49,7 +49,6 @@ gpioext1-pins { pins = "gpio2"; function = "gpio"; - input-enable; bias-disable; }; }; diff --git a/arch/arm/boot/dts/qcom-msm8660.dtsi b/arch/arm/boot/dts/qcom-msm8660.dtsi index f601b40ebcf4..78023ed2fdf7 100644 --- a/arch/arm/boot/dts/qcom-msm8660.dtsi +++ b/arch/arm/boot/dts/qcom-msm8660.dtsi @@ -36,6 +36,7 @@ L2: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm/boot/dts/qcom-msm8960.dtsi b/arch/arm/boot/dts/qcom-msm8960.dtsi index 2a668cd535cc..616fef2ea682 100644 --- a/arch/arm/boot/dts/qcom-msm8960.dtsi +++ b/arch/arm/boot/dts/qcom-msm8960.dtsi @@ -42,6 +42,7 @@ L2: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm/boot/dts/qcom-msm8974-lge-nexus5-hammerhead.dts b/arch/arm/boot/dts/qcom-msm8974-lge-nexus5-hammerhead.dts index ab35f2d644c0..861695cecf84 100644 --- a/arch/arm/boot/dts/qcom-msm8974-lge-nexus5-hammerhead.dts +++ b/arch/arm/boot/dts/qcom-msm8974-lge-nexus5-hammerhead.dts @@ -592,7 +592,6 @@ pins = "gpio73"; function = "gpio"; bias-disable; - input-enable; }; touch_pin: touch-state { @@ -602,7 +601,6 @@ drive-strength = <2>; bias-disable; - input-enable; }; reset-pins { diff --git a/arch/arm/boot/dts/qcom-msm8974-sony-xperia-rhine.dtsi b/arch/arm/boot/dts/qcom-msm8974-sony-xperia-rhine.dtsi index d3bec03b126c..68a2f9094e53 100644 --- a/arch/arm/boot/dts/qcom-msm8974-sony-xperia-rhine.dtsi +++ b/arch/arm/boot/dts/qcom-msm8974-sony-xperia-rhine.dtsi @@ -433,7 +433,6 @@ function = "gpio"; drive-strength = <2>; bias-disable; - input-enable; }; sdc1_on: sdc1-on-state { diff --git a/arch/arm/boot/dts/qcom-msm8974.dtsi b/arch/arm/boot/dts/qcom-msm8974.dtsi index 8208012684d4..7ed0d925a4e9 100644 --- a/arch/arm/boot/dts/qcom-msm8974.dtsi +++ b/arch/arm/boot/dts/qcom-msm8974.dtsi @@ -80,6 +80,7 @@ L2: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; qcom,saw = <&saw_l2>; }; diff --git a/arch/arm/boot/dts/qcom-msm8974pro-oneplus-bacon.dts b/arch/arm/boot/dts/qcom-msm8974pro-oneplus-bacon.dts index 8d2a054d8fee..8230d0e1d95d 100644 --- a/arch/arm/boot/dts/qcom-msm8974pro-oneplus-bacon.dts +++ b/arch/arm/boot/dts/qcom-msm8974pro-oneplus-bacon.dts @@ -461,7 +461,6 @@ function = "gpio"; drive-strength = <2>; bias-disable; - input-enable; }; reset-pins { diff --git a/arch/arm/boot/dts/qcom-msm8974pro-samsung-klte.dts b/arch/arm/boot/dts/qcom-msm8974pro-samsung-klte.dts index b9698ffb66ca..eb505d6d7f31 100644 --- a/arch/arm/boot/dts/qcom-msm8974pro-samsung-klte.dts +++ b/arch/arm/boot/dts/qcom-msm8974pro-samsung-klte.dts @@ -704,7 +704,6 @@ pins = "gpio75"; function = "gpio"; drive-strength = <16>; - input-enable; }; devwake-pins { @@ -760,14 +759,12 @@ i2c_touchkey_pins: i2c-touchkey-state { pins = "gpio95", "gpio96"; function = "gpio"; - input-enable; bias-pull-up; }; i2c_led_gpioex_pins: i2c-led-gpioex-state { pins = "gpio120", "gpio121"; function = "gpio"; - input-enable; bias-pull-down; }; @@ -781,7 +778,6 @@ wifi_pin: wifi-state { pins = "gpio92"; function = "gpio"; - input-enable; bias-pull-down; }; diff --git a/arch/arm/boot/dts/qcom-msm8974pro-sony-xperia-shinano-castor.dts b/arch/arm/boot/dts/qcom-msm8974pro-sony-xperia-shinano-castor.dts index 04bc58d87abf..0f650ed31005 100644 --- a/arch/arm/boot/dts/qcom-msm8974pro-sony-xperia-shinano-castor.dts +++ b/arch/arm/boot/dts/qcom-msm8974pro-sony-xperia-shinano-castor.dts @@ -631,7 +631,6 @@ function = "gpio"; drive-strength = <2>; bias-disable; - input-enable; }; bt_host_wake_pin: bt-host-wake-state { diff --git a/arch/arm/boot/dts/stm32f429.dtsi b/arch/arm/boot/dts/stm32f429.dtsi index c9e05e3540d6..00bf53f99c29 100644 --- a/arch/arm/boot/dts/stm32f429.dtsi +++ b/arch/arm/boot/dts/stm32f429.dtsi @@ -387,6 +387,7 @@ interrupt-names = "tx", "rx0", "rx1", "sce"; resets = <&rcc STM32F4_APB1_RESET(CAN2)>; clocks = <&rcc 0 STM32F4_APB1_CLOCK(CAN2)>; + st,can-secondary; st,gcan = <&gcan>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/stm32f7-pinctrl.dtsi b/arch/arm/boot/dts/stm32f7-pinctrl.dtsi index c8e6c52fb248..9f65403295ca 100644 --- a/arch/arm/boot/dts/stm32f7-pinctrl.dtsi +++ b/arch/arm/boot/dts/stm32f7-pinctrl.dtsi @@ -283,6 +283,88 @@ slew-rate = <2>; }; }; + + can1_pins_a: can1-0 { + pins1 { + pinmux = <STM32_PINMUX('A', 12, AF9)>; /* CAN1_TX */ + }; + pins2 { + pinmux = <STM32_PINMUX('A', 11, AF9)>; /* CAN1_RX */ + bias-pull-up; + }; + }; + + can1_pins_b: can1-1 { + pins1 { + pinmux = <STM32_PINMUX('B', 9, AF9)>; /* CAN1_TX */ + }; + pins2 { + pinmux = <STM32_PINMUX('B', 8, AF9)>; /* CAN1_RX */ + bias-pull-up; + }; + }; + + can1_pins_c: can1-2 { + pins1 { + pinmux = <STM32_PINMUX('D', 1, AF9)>; /* CAN1_TX */ + }; + pins2 { + pinmux = <STM32_PINMUX('D', 0, AF9)>; /* CAN1_RX */ + bias-pull-up; + + }; + }; + + can1_pins_d: can1-3 { + pins1 { + pinmux = <STM32_PINMUX('H', 13, AF9)>; /* CAN1_TX */ + }; + pins2 { + pinmux = <STM32_PINMUX('H', 14, AF9)>; /* CAN1_RX */ + bias-pull-up; + + }; + }; + + can2_pins_a: can2-0 { + pins1 { + pinmux = <STM32_PINMUX('B', 6, AF9)>; /* CAN2_TX */ + }; + pins2 { + pinmux = <STM32_PINMUX('B', 5, AF9)>; /* CAN2_RX */ + bias-pull-up; + }; + }; + + can2_pins_b: can2-1 { + pins1 { + pinmux = <STM32_PINMUX('B', 13, AF9)>; /* CAN2_TX */ + }; + pins2 { + pinmux = <STM32_PINMUX('B', 12, AF9)>; /* CAN2_RX */ + bias-pull-up; + }; + }; + + can3_pins_a: can3-0 { + pins1 { + pinmux = <STM32_PINMUX('A', 15, AF11)>; /* CAN3_TX */ + }; + pins2 { + pinmux = <STM32_PINMUX('A', 8, AF11)>; /* CAN3_RX */ + bias-pull-up; + }; + }; + + can3_pins_b: can3-1 { + pins1 { + pinmux = <STM32_PINMUX('B', 4, AF11)>; /* CAN3_TX */ + }; + pins2 { + pinmux = <STM32_PINMUX('B', 3, AF11)>; /* CAN3_RX */ + bias-pull-up; + }; + }; }; }; }; diff --git a/arch/arm/boot/dts/vexpress-v2p-ca5s.dts b/arch/arm/boot/dts/vexpress-v2p-ca5s.dts index 3b88209bacea..ff1f9a1bcfcf 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca5s.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca5s.dts @@ -132,6 +132,7 @@ reg = <0x2c0f0000 0x1000>; interrupts = <0 84 4>; cache-level = <2>; + cache-unified; }; pmu { diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index 8a9aeeb504dd..e013ff1168d3 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c @@ -21,7 +21,7 @@ /* * The public API for this code is documented in arch/arm/include/asm/mcpm.h. * For a comprehensive description of the main algorithm used here, please - * see Documentation/arm/cluster-pm-race-avoidance.rst. + * see Documentation/arch/arm/cluster-pm-race-avoidance.rst. */ struct sync_struct mcpm_sync; diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S index 299495c43dfd..f590e803ca11 100644 --- a/arch/arm/common/mcpm_head.S +++ b/arch/arm/common/mcpm_head.S @@ -5,7 +5,7 @@ * Created by: Nicolas Pitre, March 2012 * Copyright: (C) 2012-2013 Linaro Limited * - * Refer to Documentation/arm/cluster-pm-race-avoidance.rst + * Refer to Documentation/arch/arm/cluster-pm-race-avoidance.rst * for details of the synchronisation algorithms used here. */ diff --git a/arch/arm/common/vlock.S b/arch/arm/common/vlock.S index 1fa09c4697ed..c5eaed5a76f0 100644 --- a/arch/arm/common/vlock.S +++ b/arch/arm/common/vlock.S @@ -6,7 +6,7 @@ * Copyright: (C) 2012-2013 Linaro Limited * * This algorithm is described in more detail in - * Documentation/arm/vlocks.rst. + * Documentation/arch/arm/vlocks.rst. */ #include <linux/linkage.h> diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h index 78d3d4b82c6c..f3cd04ff022d 100644 --- a/arch/arm/include/asm/arm_pmuv3.h +++ b/arch/arm/include/asm/arm_pmuv3.h @@ -92,7 +92,7 @@ #define RETURN_READ_PMEVCNTRN(n) \ return read_sysreg(PMEVCNTR##n) -static unsigned long read_pmevcntrn(int n) +static inline unsigned long read_pmevcntrn(int n) { PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); return 0; @@ -100,14 +100,14 @@ static unsigned long read_pmevcntrn(int n) #define WRITE_PMEVCNTRN(n) \ write_sysreg(val, PMEVCNTR##n) -static void write_pmevcntrn(int n, unsigned long val) +static inline void write_pmevcntrn(int n, unsigned long val) { PMEVN_SWITCH(n, WRITE_PMEVCNTRN); } #define WRITE_PMEVTYPERN(n) \ write_sysreg(val, PMEVTYPER##n) -static void write_pmevtypern(int n, unsigned long val) +static inline void write_pmevtypern(int n, unsigned long val) { PMEVN_SWITCH(n, WRITE_PMEVTYPERN); } @@ -222,6 +222,11 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) return false; } +static inline bool kvm_set_pmuserenr(u64 val) +{ + return false; +} + /* PMU Version in DFR Register */ #define ARMV8_PMU_DFR_VER_NI 0 #define ARMV8_PMU_DFR_VER_V3P4 0x5 diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 505a306e0271..aebe2c8f6a68 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -394,6 +394,23 @@ ALT_UP_B(.L0_\@) #endif .endm +/* + * Raw SMP data memory barrier + */ + .macro __smp_dmb mode +#if __LINUX_ARM_ARCH__ >= 7 + .ifeqs "\mode","arm" + dmb ish + .else + W(dmb) ish + .endif +#elif __LINUX_ARM_ARCH__ == 6 + mcr p15, 0, r0, c7, c10, 5 @ dmb +#else + .error "Incompatible SMP platform" +#endif + .endm + #if defined(CONFIG_CPU_V7M) /* * setmode is used to assert to be in svc mode during boot. For v7-M diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index db8512d9a918..f0e3b01afa74 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -197,6 +197,16 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ return val; \ } +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { int ret; @@ -210,8 +220,7 @@ static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) return ret; } - -#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot +#define arch_atomic_cmpxchg arch_atomic_cmpxchg #endif /* __LINUX_ARM_ARCH__ */ @@ -240,8 +249,6 @@ ATOMIC_OPS(xor, ^=, eor) #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new)) - #ifndef CONFIG_GENERIC_ATOMIC64 typedef struct { s64 counter; diff --git a/arch/arm/include/asm/bugs.h b/arch/arm/include/asm/bugs.h index 97a312ba0840..fe385551edec 100644 --- a/arch/arm/include/asm/bugs.h +++ b/arch/arm/include/asm/bugs.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * arch/arm/include/asm/bugs.h - * * Copyright (C) 1995-2003 Russell King */ #ifndef __ASM_BUGS_H @@ -10,10 +8,8 @@ extern void check_writebuffer_bugs(void); #ifdef CONFIG_MMU -extern void check_bugs(void); extern void check_other_bugs(void); #else -#define check_bugs() do { } while (0) #define check_other_bugs() do { } while (0) #endif diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index 7e9251ca29fe..5be3ddc96a50 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -75,6 +75,10 @@ static inline bool arch_syscall_match_sym_name(const char *sym, return !strcasecmp(sym, name); } +void prepare_ftrace_return(unsigned long *parent, unsigned long self, + unsigned long frame_pointer, + unsigned long stack_pointer); + #endif /* ifndef __ASSEMBLY__ */ #endif /* _ASM_ARM_FTRACE */ diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h index 9349e7a82c9c..2b18a258204d 100644 --- a/arch/arm/include/asm/mach/arch.h +++ b/arch/arm/include/asm/mach/arch.h @@ -56,7 +56,6 @@ struct machine_desc { void (*init_time)(void); void (*init_machine)(void); void (*init_late)(void); - void (*handle_irq)(struct pt_regs *); void (*restart)(enum reboot_mode, const char *); }; diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index 74bb5947b387..28c63d172a96 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -113,6 +113,28 @@ struct cpu_user_fns { unsigned long vaddr, struct vm_area_struct *vma); }; +void fa_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma); +void fa_clear_user_highpage(struct page *page, unsigned long vaddr); +void feroceon_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma); +void feroceon_clear_user_highpage(struct page *page, unsigned long vaddr); +void v4_mc_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma); +void v4_mc_clear_user_highpage(struct page *page, unsigned long vaddr); +void v4wb_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma); +void v4wb_clear_user_highpage(struct page *page, unsigned long vaddr); +void v4wt_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma); +void v4wt_clear_user_highpage(struct page *page, unsigned long vaddr); +void xsc3_mc_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma); +void xsc3_mc_clear_user_highpage(struct page *page, unsigned long vaddr); +void xscale_mc_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma); +void xscale_mc_clear_user_highpage(struct page *page, unsigned long vaddr); + #ifdef MULTI_USER extern struct cpu_user_fns cpu_user; diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index 483b8ddfcb82..7f44e88d1f25 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -193,5 +193,8 @@ static inline unsigned long it_advance(unsigned long cpsr) return cpsr; } +int syscall_trace_enter(struct pt_regs *regs); +void syscall_trace_exit(struct pt_regs *regs); + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h index ba0872a8dcda..546af8b1e3f6 100644 --- a/arch/arm/include/asm/setup.h +++ b/arch/arm/include/asm/setup.h @@ -5,7 +5,7 @@ * Copyright (C) 1997-1999 Russell King * * Structure passed to kernel to tell it about the - * hardware it's running on. See Documentation/arm/setup.rst + * hardware it's running on. See Documentation/arch/arm/setup.rst * for more info. */ #ifndef __ASMARM_SETUP_H @@ -28,4 +28,11 @@ extern void save_atags(const struct tag *tags); static inline void save_atags(const struct tag *tags) { } #endif +struct machine_desc; +void init_default_cache_policy(unsigned long); +void paging_init(const struct machine_desc *desc); +void early_mm_init(const struct machine_desc *); +void adjust_lowmem_bounds(void); +void setup_dma_zone(const struct machine_desc *desc); + #endif diff --git a/arch/arm/include/asm/signal.h b/arch/arm/include/asm/signal.h index 430be7774402..8b84092d1518 100644 --- a/arch/arm/include/asm/signal.h +++ b/arch/arm/include/asm/signal.h @@ -22,4 +22,9 @@ typedef struct { #define __ARCH_HAS_SA_RESTORER #include <asm/sigcontext.h> + +void do_rseq_syscall(struct pt_regs *regs); +int do_work_pending(struct pt_regs *regs, unsigned int thread_flags, + int syscall); + #endif diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index 7c1c90d9f582..8c05a7f374d8 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -64,7 +64,7 @@ extern void secondary_startup_arm(void); extern int __cpu_disable(void); -extern void __cpu_die(unsigned int cpu); +static inline void __cpu_die(unsigned int cpu) { } extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); diff --git a/arch/arm/include/asm/spectre.h b/arch/arm/include/asm/spectre.h index 85f9e538fb32..d9c28b3b6b62 100644 --- a/arch/arm/include/asm/spectre.h +++ b/arch/arm/include/asm/spectre.h @@ -35,4 +35,8 @@ static inline void spectre_v2_update_state(unsigned int state, int spectre_bhb_update_vectors(unsigned int method); +void cpu_v7_ca8_ibe(void); +void cpu_v7_ca15_ibe(void); +void cpu_v7_bugs_init(void); + #endif diff --git a/arch/arm/include/asm/suspend.h b/arch/arm/include/asm/suspend.h index 506314265c6f..be81b9ca2ea1 100644 --- a/arch/arm/include/asm/suspend.h +++ b/arch/arm/include/asm/suspend.h @@ -13,5 +13,6 @@ extern void cpu_resume(void); extern void cpu_resume_no_hyp(void); extern void cpu_resume_arm(void); extern int cpu_suspend(unsigned long, int (*)(unsigned long)); +extern void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr); #endif diff --git a/arch/arm/include/asm/sync_bitops.h b/arch/arm/include/asm/sync_bitops.h index 6f5d627c44a3..f46b3c570f92 100644 --- a/arch/arm/include/asm/sync_bitops.h +++ b/arch/arm/include/asm/sync_bitops.h @@ -14,14 +14,35 @@ * ops which are SMP safe even on a UP kernel. */ +/* + * Unordered + */ + #define sync_set_bit(nr, p) _set_bit(nr, p) #define sync_clear_bit(nr, p) _clear_bit(nr, p) #define sync_change_bit(nr, p) _change_bit(nr, p) -#define sync_test_and_set_bit(nr, p) _test_and_set_bit(nr, p) -#define sync_test_and_clear_bit(nr, p) _test_and_clear_bit(nr, p) -#define sync_test_and_change_bit(nr, p) _test_and_change_bit(nr, p) #define sync_test_bit(nr, addr) test_bit(nr, addr) -#define arch_sync_cmpxchg arch_cmpxchg +/* + * Fully ordered + */ + +int _sync_test_and_set_bit(int nr, volatile unsigned long * p); +#define sync_test_and_set_bit(nr, p) _sync_test_and_set_bit(nr, p) + +int _sync_test_and_clear_bit(int nr, volatile unsigned long * p); +#define sync_test_and_clear_bit(nr, p) _sync_test_and_clear_bit(nr, p) + +int _sync_test_and_change_bit(int nr, volatile unsigned long * p); +#define sync_test_and_change_bit(nr, p) _sync_test_and_change_bit(nr, p) + +#define arch_sync_cmpxchg(ptr, old, new) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __smp_mb__before_atomic(); \ + __ret = arch_cmpxchg_relaxed((ptr), (old), (new)); \ + __smp_mb__after_atomic(); \ + __ret; \ +}) #endif diff --git a/arch/arm/include/asm/syscalls.h b/arch/arm/include/asm/syscalls.h new file mode 100644 index 000000000000..5912e7cffa6a --- /dev/null +++ b/arch/arm/include/asm/syscalls.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_SYSCALLS_H +#define __ASM_SYSCALLS_H + +#include <linux/linkage.h> +#include <linux/types.h> + +struct pt_regs; +asmlinkage int sys_sigreturn(struct pt_regs *regs); +asmlinkage int sys_rt_sigreturn(struct pt_regs *regs); +asmlinkage long sys_arm_fadvise64_64(int fd, int advice, + loff_t offset, loff_t len); + +struct oldabi_stat64; +asmlinkage long sys_oabi_stat64(const char __user * filename, + struct oldabi_stat64 __user * statbuf); +asmlinkage long sys_oabi_lstat64(const char __user * filename, + struct oldabi_stat64 __user * statbuf); +asmlinkage long sys_oabi_fstat64(unsigned long fd, + struct oldabi_stat64 __user * statbuf); +asmlinkage long sys_oabi_fstatat64(int dfd, + const char __user *filename, + struct oldabi_stat64 __user *statbuf, + int flag); +asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd, + unsigned long arg); +struct oabi_epoll_event; +asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd, + struct oabi_epoll_event __user *event); +struct oabi_sembuf; +struct old_timespec32; +asmlinkage long sys_oabi_semtimedop(int semid, + struct oabi_sembuf __user *tsops, + unsigned nsops, + const struct old_timespec32 __user *timeout); +asmlinkage long sys_oabi_semop(int semid, struct oabi_sembuf __user *tsops, + unsigned nsops); +asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third, + void __user *ptr, long fifth); +struct sockaddr; +asmlinkage long sys_oabi_bind(int fd, struct sockaddr __user *addr, int addrlen); +asmlinkage long sys_oabi_connect(int fd, struct sockaddr __user *addr, int addrlen); +asmlinkage long sys_oabi_sendto(int fd, void __user *buff, + size_t len, unsigned flags, + struct sockaddr __user *addr, + int addrlen); +struct user_msghdr; +asmlinkage long sys_oabi_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags); +asmlinkage long sys_oabi_socketcall(int call, unsigned long __user *args); + +#endif diff --git a/arch/arm/include/asm/tcm.h b/arch/arm/include/asm/tcm.h index d8bd8a4b0ede..e1f7dca86a22 100644 --- a/arch/arm/include/asm/tcm.h +++ b/arch/arm/include/asm/tcm.h @@ -9,9 +9,7 @@ #ifndef __ASMARM_TCM_H #define __ASMARM_TCM_H -#ifndef CONFIG_HAVE_TCM -#error "You should not be including tcm.h unless you have a TCM!" -#endif +#ifdef CONFIG_HAVE_TCM #include <linux/compiler.h> @@ -29,4 +27,11 @@ void tcm_free(void *addr, size_t len); bool tcm_dtcm_present(void); bool tcm_itcm_present(void); +void __init tcm_init(void); +#else +/* No TCM support, just blank inlines to be optimized out */ +static inline void tcm_init(void) +{ +} +#endif #endif diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h index 987fefb0a4db..0aaefe3e1700 100644 --- a/arch/arm/include/asm/traps.h +++ b/arch/arm/include/asm/traps.h @@ -35,4 +35,13 @@ extern void ptrace_break(struct pt_regs *regs); extern void *vectors_page; +asmlinkage void dump_backtrace_stm(u32 *stack, u32 instruction, const char *loglvl); +asmlinkage void do_undefinstr(struct pt_regs *regs); +asmlinkage void handle_fiq_as_nmi(struct pt_regs *regs); +asmlinkage void bad_mode(struct pt_regs *regs, int reason); +asmlinkage int arm_syscall(int no, struct pt_regs *regs); +asmlinkage void baddataabort(int code, unsigned long instr, struct pt_regs *regs); +asmlinkage void __div0(void); +asmlinkage void handle_bad_stack(struct pt_regs *regs); + #endif diff --git a/arch/arm/include/asm/unwind.h b/arch/arm/include/asm/unwind.h index b51f85417f58..d60b09a5acfc 100644 --- a/arch/arm/include/asm/unwind.h +++ b/arch/arm/include/asm/unwind.h @@ -40,6 +40,10 @@ extern void unwind_table_del(struct unwind_table *tab); extern void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk, const char *loglvl); +void __aeabi_unwind_cpp_pr0(void); +void __aeabi_unwind_cpp_pr1(void); +void __aeabi_unwind_cpp_pr2(void); + #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_ARM_UNWIND diff --git a/arch/arm/include/asm/vdso.h b/arch/arm/include/asm/vdso.h index 5b85889f82ee..422c3afa806a 100644 --- a/arch/arm/include/asm/vdso.h +++ b/arch/arm/include/asm/vdso.h @@ -24,6 +24,11 @@ static inline void arm_install_vdso(struct mm_struct *mm, unsigned long addr) #endif /* CONFIG_VDSO */ +int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts); +int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts); +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); +int __vdso_clock_getres(clockid_t clock_id, struct old_timespec32 *res); + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/arm/include/asm/vfp.h b/arch/arm/include/asm/vfp.h index 157ea3426158..5b57b8768bac 100644 --- a/arch/arm/include/asm/vfp.h +++ b/arch/arm/include/asm/vfp.h @@ -102,6 +102,7 @@ #ifndef __ASSEMBLY__ void vfp_disable(void); +void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs); #endif #endif /* __ASM_VFP_H */ diff --git a/arch/arm/include/uapi/asm/setup.h b/arch/arm/include/uapi/asm/setup.h index 25ceda63b284..8e50e034fec7 100644 --- a/arch/arm/include/uapi/asm/setup.h +++ b/arch/arm/include/uapi/asm/setup.h @@ -9,7 +9,7 @@ * published by the Free Software Foundation. * * Structure passed to kernel to tell it about the - * hardware it's running on. See Documentation/arm/setup.rst + * hardware it's running on. See Documentation/arch/arm/setup.rst * for more info. */ #ifndef _UAPI__ASMARM_SETUP_H diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c index 373b61f9a4f0..33f6eb5213a5 100644 --- a/arch/arm/kernel/atags_parse.c +++ b/arch/arm/kernel/atags_parse.c @@ -127,7 +127,7 @@ static int __init parse_tag_cmdline(const struct tag *tag) #elif defined(CONFIG_CMDLINE_FORCE) pr_warn("Ignoring tag cmdline (using the default kernel command line)\n"); #else - strlcpy(default_command_line, tag->u.cmdline.cmdline, + strscpy(default_command_line, tag->u.cmdline.cmdline, COMMAND_LINE_SIZE); #endif return 0; @@ -224,7 +224,7 @@ setup_machine_tags(void *atags_vaddr, unsigned int machine_nr) } /* parse_early_param needs a boot_command_line */ - strlcpy(boot_command_line, from, COMMAND_LINE_SIZE); + strscpy(boot_command_line, from, COMMAND_LINE_SIZE); return mdesc; } diff --git a/arch/arm/kernel/bugs.c b/arch/arm/kernel/bugs.c index 14c8dbbb7d2d..087bce6ec8e9 100644 --- a/arch/arm/kernel/bugs.c +++ b/arch/arm/kernel/bugs.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/init.h> +#include <linux/cpu.h> #include <asm/bugs.h> #include <asm/proc-fns.h> @@ -11,7 +12,7 @@ void check_other_bugs(void) #endif } -void __init check_bugs(void) +void __init arch_cpu_finalize_init(void) { check_writebuffer_bugs(); check_other_bugs(); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index c39303e5c234..291dc48d6bed 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -875,7 +875,7 @@ ENDPROC(__bad_stack) * existing ones. This mechanism should be used only for things that are * really small and justified, and not be abused freely. * - * See Documentation/arm/kernel_user_helpers.rst for formal definitions. + * See Documentation/arch/arm/kernel_user_helpers.rst for formal definitions. */ THUMB( .arm ) diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c index 98ca3e3fa847..d2c8e5313539 100644 --- a/arch/arm/kernel/fiq.c +++ b/arch/arm/kernel/fiq.c @@ -45,6 +45,7 @@ #include <asm/cacheflush.h> #include <asm/cp15.h> #include <asm/fiq.h> +#include <asm/mach/irq.h> #include <asm/irq.h> #include <asm/traps.h> diff --git a/arch/arm/kernel/head-inflate-data.c b/arch/arm/kernel/head-inflate-data.c index 89a52104d32a..225c0699a12c 100644 --- a/arch/arm/kernel/head-inflate-data.c +++ b/arch/arm/kernel/head-inflate-data.c @@ -8,16 +8,13 @@ #include <linux/init.h> #include <linux/zutil.h> +#include "head.h" /* for struct inflate_state */ #include "../../../lib/zlib_inflate/inftrees.h" #include "../../../lib/zlib_inflate/inflate.h" #include "../../../lib/zlib_inflate/infutil.h" -extern char __data_loc[]; -extern char _edata_loc[]; -extern char _sdata[]; - /* * This code is called very early during the boot process to decompress * the .data segment stored compressed in ROM. Therefore none of the global diff --git a/arch/arm/kernel/head.h b/arch/arm/kernel/head.h new file mode 100644 index 000000000000..0eb5accf7141 --- /dev/null +++ b/arch/arm/kernel/head.h @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only + +extern char __data_loc[]; +extern char _edata_loc[]; +extern char _sdata[]; + +int __init __inflate_kernel_data(void); diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index d59c36dc0494..e74d84f58b77 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -169,8 +169,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, offset = __mem_to_opcode_arm(*(u32 *)loc); offset = (offset & 0x00ffffff) << 2; - if (offset & 0x02000000) - offset -= 0x04000000; + offset = sign_extend32(offset, 25); offset += sym->st_value - loc; @@ -236,7 +235,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, case R_ARM_MOVT_PREL: offset = tmp = __mem_to_opcode_arm(*(u32 *)loc); offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff); - offset = (offset ^ 0x8000) - 0x8000; + offset = sign_extend32(offset, 15); offset += sym->st_value; if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_PREL || @@ -344,8 +343,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, ((~(j2 ^ sign) & 1) << 22) | ((upper & 0x03ff) << 12) | ((lower & 0x07ff) << 1); - if (offset & 0x01000000) - offset -= 0x02000000; + offset = sign_extend32(offset, 24); offset += sym->st_value - loc; /* @@ -401,7 +399,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, offset = ((upper & 0x000f) << 12) | ((upper & 0x0400) << 1) | ((lower & 0x7000) >> 4) | (lower & 0x00ff); - offset = (offset ^ 0x8000) - 0x8000; + offset = sign_extend32(offset, 15); offset += sym->st_value; if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_PREL || diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 75cd4699e7b3..c66b560562b3 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -76,13 +76,6 @@ static int __init fpe_setup(char *line) __setup("fpe=", fpe_setup); #endif -extern void init_default_cache_policy(unsigned long); -extern void paging_init(const struct machine_desc *desc); -extern void early_mm_init(const struct machine_desc *); -extern void adjust_lowmem_bounds(void); -extern enum reboot_mode reboot_mode; -extern void setup_dma_zone(const struct machine_desc *desc); - unsigned int processor_id; EXPORT_SYMBOL(processor_id); unsigned int __machine_arch_type __read_mostly; @@ -1142,7 +1135,7 @@ void __init setup_arch(char **cmdline_p) setup_initial_init_mm(_text, _etext, _edata, _end); /* populate cmd_line too for later use, preserving boot_command_line */ - strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE); + strscpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = cmd_line; early_fixmap_init(); @@ -1198,10 +1191,6 @@ void __init setup_arch(char **cmdline_p) reserve_crashkernel(); -#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER - handle_arch_irq = mdesc->handle_irq; -#endif - #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) conswitchp = &vga_con; diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index e07f359254c3..8d0afa11bed5 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -18,6 +18,7 @@ #include <asm/traps.h> #include <asm/unistd.h> #include <asm/vfp.h> +#include <asm/syscalls.h> #include "signal.h" diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 87f8d0e5e314..6756203e45f3 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -288,15 +288,11 @@ int __cpu_disable(void) } /* - * called on the thread which is asking for a CPU to be shutdown - - * waits until shutdown has completed, or it is timed out. + * called on the thread which is asking for a CPU to be shutdown after the + * shutdown completed. */ -void __cpu_die(unsigned int cpu) +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { - if (!cpu_wait_death(cpu, 5)) { - pr_err("CPU%u: cpu didn't die\n", cpu); - return; - } pr_debug("CPU%u: shutdown\n", cpu); clear_tasks_mm_cpumask(cpu); @@ -336,11 +332,11 @@ void __noreturn arch_cpu_idle_dead(void) flush_cache_louis(); /* - * Tell __cpu_die() that this CPU is now safe to dispose of. Once - * this returns, power and/or clocks can be removed at any point - * from this CPU and its cache by platform_cpu_kill(). + * Tell cpuhp_bp_sync_dead() that this CPU is now safe to dispose + * of. Once this returns, power and/or clocks can be removed at + * any point from this CPU and its cache by platform_cpu_kill(). */ - (void)cpu_report_death(); + cpuhp_ap_report_dead(); /* * Ensure that the cache lines associated with that completion are diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index a5f183cfecb1..0141e9bb02e8 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -24,6 +24,7 @@ #include <linux/ipc.h> #include <linux/uaccess.h> #include <linux/slab.h> +#include <asm/syscalls.h> /* * Since loff_t is a 64 bit type we avoid a lot of ABI hassle diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 006163195d67..d00f4040a9f5 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -10,6 +10,8 @@ * Copyright: MontaVista Software, Inc. */ +#include <asm/syscalls.h> + /* * The legacy ABI and the new ARM EABI have different rules making some * syscalls incompatible especially with structure arguments. diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 40c7c807d67f..3bad79db5d6e 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -756,6 +756,7 @@ void __readwrite_bug(const char *fn) } EXPORT_SYMBOL(__readwrite_bug); +#ifdef CONFIG_MMU void __pte_error(const char *file, int line, pte_t pte) { pr_err("%s:%d: bad pte %08llx.\n", file, line, (long long)pte_val(pte)); @@ -770,6 +771,7 @@ void __pgd_error(const char *file, int line, pgd_t pgd) { pr_err("%s:%d: bad pgd %08llx.\n", file, line, (long long)pgd_val(pgd)); } +#endif asmlinkage void __div0(void) { diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index 3408269d19c7..f297d66a8a76 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -135,7 +135,7 @@ static Elf32_Sym * __init find_symbol(struct elfinfo *lib, const char *symname) if (lib->dynsym[i].st_name == 0) continue; - strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, + strscpy(name, lib->dynstr + lib->dynsym[i].st_name, MAX_SYMNAME); c = strchr(name, '@'); if (c) diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index 95bd35991288..f069d1b2318e 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h @@ -28,7 +28,7 @@ UNWIND( .fnend ) ENDPROC(\name ) .endm - .macro testop, name, instr, store + .macro __testop, name, instr, store, barrier ENTRY( \name ) UNWIND( .fnstart ) ands ip, r1, #3 @@ -38,7 +38,7 @@ UNWIND( .fnstart ) mov r0, r0, lsr #5 add r1, r1, r0, lsl #2 @ Get word offset mov r3, r2, lsl r3 @ create mask - smp_dmb + \barrier #if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) .arch_extension mp ALT_SMP(W(pldw) [r1]) @@ -50,13 +50,21 @@ UNWIND( .fnstart ) strex ip, r2, [r1] cmp ip, #0 bne 1b - smp_dmb + \barrier cmp r0, #0 movne r0, #1 2: bx lr UNWIND( .fnend ) ENDPROC(\name ) .endm + + .macro testop, name, instr, store + __testop \name, \instr, \store, smp_dmb + .endm + + .macro sync_testop, name, instr, store + __testop \name, \instr, \store, __smp_dmb + .endm #else .macro bitop, name, instr ENTRY( \name ) diff --git a/arch/arm/lib/testchangebit.S b/arch/arm/lib/testchangebit.S index 4ebecc67e6e0..f13fe9bc2399 100644 --- a/arch/arm/lib/testchangebit.S +++ b/arch/arm/lib/testchangebit.S @@ -10,3 +10,7 @@ .text testop _test_and_change_bit, eor, str + +#if __LINUX_ARM_ARCH__ >= 6 +sync_testop _sync_test_and_change_bit, eor, str +#endif diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S index 009afa0f5b4a..4d2c5ca620eb 100644 --- a/arch/arm/lib/testclearbit.S +++ b/arch/arm/lib/testclearbit.S @@ -10,3 +10,7 @@ .text testop _test_and_clear_bit, bicne, strne + +#if __LINUX_ARM_ARCH__ >= 6 +sync_testop _sync_test_and_clear_bit, bicne, strne +#endif diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S index f3192e55acc8..649dbab65d8d 100644 --- a/arch/arm/lib/testsetbit.S +++ b/arch/arm/lib/testsetbit.S @@ -10,3 +10,7 @@ .text testop _test_and_set_bit, orreq, streq + +#if __LINUX_ARM_ARCH__ >= 6 +sync_testop _sync_test_and_set_bit, orreq, streq +#endif diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 60dc56d8acfb..437dd0352fd4 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -334,16 +334,14 @@ static bool at91_pm_eth_quirk_is_valid(struct at91_pm_quirk_eth *eth) pdev = of_find_device_by_node(eth->np); if (!pdev) return false; + /* put_device(eth->dev) is called at the end of suspend. */ eth->dev = &pdev->dev; } /* No quirks if device isn't a wakeup source. */ - if (!device_may_wakeup(eth->dev)) { - put_device(eth->dev); + if (!device_may_wakeup(eth->dev)) return false; - } - /* put_device(eth->dev) is called at the end of suspend. */ return true; } @@ -439,14 +437,14 @@ clk_unconfigure: pr_err("AT91: PM: failed to enable %s clocks\n", j == AT91_PM_G_ETH ? "geth" : "eth"); } - } else { - /* - * Release the reference to eth->dev taken in - * at91_pm_eth_quirk_is_valid(). - */ - put_device(eth->dev); - eth->dev = NULL; } + + /* + * Release the reference to eth->dev taken in + * at91_pm_eth_quirk_is_valid(). + */ + put_device(eth->dev); + eth->dev = NULL; } return ret; diff --git a/arch/arm/mach-exynos/common.h b/arch/arm/mach-exynos/common.h index 29eb075b24a4..b5287ff1c542 100644 --- a/arch/arm/mach-exynos/common.h +++ b/arch/arm/mach-exynos/common.h @@ -106,7 +106,7 @@ void exynos_firmware_init(void); #define C2_STATE (1 << 3) /* * Magic values for bootloader indicating chosen low power mode. - * See also Documentation/arm/samsung/bootloader-interface.rst + * See also Documentation/arch/arm/samsung/bootloader-interface.rst */ #define EXYNOS_SLEEP_MAGIC 0x00000bad #define EXYNOS_AFTR_MAGIC 0xfcba0d10 diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c index 51e47053c816..3faf9a1e3e36 100644 --- a/arch/arm/mach-mxs/mach-mxs.c +++ b/arch/arm/mach-mxs/mach-mxs.c @@ -11,7 +11,6 @@ #include <linux/err.h> #include <linux/gpio.h> #include <linux/init.h> -#include <linux/irqchip/mxs.h> #include <linux/reboot.h> #include <linux/micrel_phy.h> #include <linux/of_address.h> @@ -472,7 +471,6 @@ static const char *const mxs_dt_compat[] __initconst = { }; DT_MACHINE_START(MXS, "Freescale MXS (Device Tree)") - .handle_irq = icoll_handle_irq, .init_machine = mxs_machine_init, .init_late = mxs_pm_init, .dt_compat = mxs_dt_compat, diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index 9108c871d129..88139200449e 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -877,7 +877,6 @@ MACHINE_START(AMS_DELTA, "Amstrad E3 (Delta)") .map_io = ams_delta_map_io, .init_early = omap1_init_early, .init_irq = omap1_init_irq, - .handle_irq = omap1_handle_irq, .init_machine = ams_delta_init, .init_late = ams_delta_init_late, .init_time = omap1_timer_init, diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c index a501a473ffd6..b56cea9f9d2f 100644 --- a/arch/arm/mach-omap1/board-nokia770.c +++ b/arch/arm/mach-omap1/board-nokia770.c @@ -291,7 +291,6 @@ MACHINE_START(NOKIA770, "Nokia 770") .map_io = omap1_map_io, .init_early = omap1_init_early, .init_irq = omap1_init_irq, - .handle_irq = omap1_handle_irq, .init_machine = omap_nokia770_init, .init_late = omap1_init_late, .init_time = omap1_timer_init, diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c index df758c1f9237..46eda4ff4797 100644 --- a/arch/arm/mach-omap1/board-osk.c +++ b/arch/arm/mach-omap1/board-osk.c @@ -389,7 +389,6 @@ MACHINE_START(OMAP_OSK, "TI-OSK") .map_io = omap1_map_io, .init_early = omap1_init_early, .init_irq = omap1_init_irq, - .handle_irq = omap1_handle_irq, .init_machine = osk_init, .init_late = omap1_init_late, .init_time = omap1_timer_init, diff --git a/arch/arm/mach-omap1/board-palmte.c b/arch/arm/mach-omap1/board-palmte.c index f79c497f04d5..91df3dc365af 100644 --- a/arch/arm/mach-omap1/board-palmte.c +++ b/arch/arm/mach-omap1/board-palmte.c @@ -259,7 +259,6 @@ MACHINE_START(OMAP_PALMTE, "OMAP310 based Palm Tungsten E") .map_io = omap1_map_io, .init_early = omap1_init_early, .init_irq = omap1_init_irq, - .handle_irq = omap1_handle_irq, .init_machine = omap_palmte_init, .init_late = omap1_init_late, .init_time = omap1_timer_init, diff --git a/arch/arm/mach-omap1/board-sx1.c b/arch/arm/mach-omap1/board-sx1.c index 0c0cdd5e77c7..3ae295af96fd 100644 --- a/arch/arm/mach-omap1/board-sx1.c +++ b/arch/arm/mach-omap1/board-sx1.c @@ -338,7 +338,6 @@ MACHINE_START(SX1, "OMAP310 based Siemens SX1") .map_io = omap1_map_io, .init_early = omap1_init_early, .init_irq = omap1_init_irq, - .handle_irq = omap1_handle_irq, .init_machine = omap_sx1_init, .init_late = omap1_init_late, .init_time = omap1_timer_init, diff --git a/arch/arm/mach-omap1/irq.c b/arch/arm/mach-omap1/irq.c index bfc7ab010ae2..3d9e72e1eddc 100644 --- a/arch/arm/mach-omap1/irq.c +++ b/arch/arm/mach-omap1/irq.c @@ -37,6 +37,7 @@ */ #include <linux/gpio.h> #include <linux/init.h> +#include <linux/irq.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/interrupt.h> @@ -254,4 +255,6 @@ void __init omap1_init_irq(void) ct = irq_data_get_chip_type(d); ct->chip.irq_unmask(d); } + + set_handle_irq(omap1_handle_irq); } diff --git a/arch/arm/mach-pxa/gumstix.c b/arch/arm/mach-pxa/gumstix.c index 72b08a9bf0fd..6b7197ae3c72 100644 --- a/arch/arm/mach-pxa/gumstix.c +++ b/arch/arm/mach-pxa/gumstix.c @@ -233,7 +233,6 @@ MACHINE_START(GUMSTIX, "Gumstix") .map_io = pxa25x_map_io, .nr_irqs = PXA_NR_IRQS, .init_irq = pxa25x_init_irq, - .handle_irq = pxa25x_handle_irq, .init_time = pxa_timer_init, .init_machine = gumstix_init, .restart = pxa_restart, diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c index 1b83be181bab..032dc897fe94 100644 --- a/arch/arm/mach-pxa/pxa25x.c +++ b/arch/arm/mach-pxa/pxa25x.c @@ -143,6 +143,7 @@ set_pwer: void __init pxa25x_init_irq(void) { pxa_init_irq(32, pxa25x_set_wake); + set_handle_irq(pxa25x_handle_irq); } static int __init __init diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c index 4135ba2877c4..c9b56424b653 100644 --- a/arch/arm/mach-pxa/pxa27x.c +++ b/arch/arm/mach-pxa/pxa27x.c @@ -228,6 +228,7 @@ static int pxa27x_set_wake(struct irq_data *d, unsigned int on) void __init pxa27x_init_irq(void) { pxa_init_irq(34, pxa27x_set_wake); + set_handle_irq(pxa27x_handle_irq); } static int __init diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index 4325bdc2b9ff..042922a0a9d6 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -1043,7 +1043,6 @@ MACHINE_START(SPITZ, "SHARP Spitz") .map_io = pxa27x_map_io, .nr_irqs = PXA_NR_IRQS, .init_irq = pxa27x_init_irq, - .handle_irq = pxa27x_handle_irq, .init_machine = spitz_init, .init_time = pxa_timer_init, .restart = spitz_restart, @@ -1056,7 +1055,6 @@ MACHINE_START(BORZOI, "SHARP Borzoi") .map_io = pxa27x_map_io, .nr_irqs = PXA_NR_IRQS, .init_irq = pxa27x_init_irq, - .handle_irq = pxa27x_handle_irq, .init_machine = spitz_init, .init_time = pxa_timer_init, .restart = spitz_restart, @@ -1069,7 +1067,6 @@ MACHINE_START(AKITA, "SHARP Akita") .map_io = pxa27x_map_io, .nr_irqs = PXA_NR_IRQS, .init_irq = pxa27x_init_irq, - .handle_irq = pxa27x_handle_irq, .init_machine = spitz_init, .init_time = pxa_timer_init, .restart = spitz_restart, diff --git a/arch/arm/mach-sti/Kconfig b/arch/arm/mach-sti/Kconfig index b2d45cf10a3c..b3842c971d31 100644 --- a/arch/arm/mach-sti/Kconfig +++ b/arch/arm/mach-sti/Kconfig @@ -21,7 +21,7 @@ menuconfig ARCH_STI help Include support for STMicroelectronics' STiH415/416, STiH407/10 and STiH418 family SoCs using the Device Tree for discovery. More - information can be found in Documentation/arm/sti/ and + information can be found in Documentation/arch/arm/sti/ and Documentation/devicetree. if ARCH_STI diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index be183ed1232d..c164cde50243 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -712,7 +712,7 @@ config ARM_VIRT_EXT assistance. A compliant bootloader is required in order to make maximum - use of this feature. Refer to Documentation/arm/booting.rst for + use of this feature. Refer to Documentation/arch/arm/booting.rst for details. config SWP_EMULATE @@ -904,7 +904,7 @@ config KUSER_HELPERS the CPU type fitted to the system. This permits binaries to be run on ARMv4 through to ARMv7 without modification. - See Documentation/arm/kernel_user_helpers.rst for details. + See Documentation/arch/arm/kernel_user_helpers.rst for details. However, the fixed address nature of these helpers can be used by ROP (return orientated programming) authors when creating diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index b4a33358d2e9..bc4ed5ce3e00 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -258,12 +258,14 @@ static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata; static int dma_mmu_remap_num __initdata; +#ifdef CONFIG_DMA_CMA void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { dma_mmu_remap[dma_mmu_remap_num].base = base; dma_mmu_remap[dma_mmu_remap_num].size = size; dma_mmu_remap_num++; } +#endif void __init dma_contiguous_remap(void) { diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h index 54927ba1fa6e..e8f8c1902544 100644 --- a/arch/arm/mm/fault.h +++ b/arch/arm/mm/fault.h @@ -37,5 +37,9 @@ static inline int fsr_fs(unsigned int fsr) void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs); void early_abt_enable(void); +asmlinkage void do_DataAbort(unsigned long addr, unsigned int fsr, + struct pt_regs *regs); +asmlinkage void do_PrefetchAbort(unsigned long addr, unsigned int ifsr, + struct pt_regs *regs); #endif /* __ARCH_ARM_FAULT_H */ diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 7ff9feea13a6..2508be91b7a0 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -354,6 +354,7 @@ EXPORT_SYMBOL(flush_dcache_page); * memcpy() to/from page * if written to page, flush_dcache_page() */ +void __flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr); void __flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) { unsigned long pfn; diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 463fc2a8448f..f3a52c08a200 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -21,6 +21,7 @@ #include <asm/sections.h> #include <asm/setup.h> #include <asm/smp_plat.h> +#include <asm/tcm.h> #include <asm/tlb.h> #include <asm/highmem.h> #include <asm/system_info.h> @@ -37,7 +38,6 @@ #include "fault.h" #include "mm.h" -#include "tcm.h" extern unsigned long __atags_pointer; diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 53f2d8774fdb..43cfd06bbeba 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -21,6 +21,7 @@ #include <asm/cputype.h> #include <asm/mpu.h> #include <asm/procinfo.h> +#include <asm/idmap.h> #include "mm.h" diff --git a/arch/arm/mm/tcm.h b/arch/arm/mm/tcm.h deleted file mode 100644 index 6b80a760d875..000000000000 --- a/arch/arm/mm/tcm.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2008-2009 ST-Ericsson AB - * TCM memory handling for ARM systems - * - * Author: Linus Walleij <linus.walleij@stericsson.com> - * Author: Rickard Andersson <rickard.andersson@stericsson.com> - */ - -#ifdef CONFIG_HAVE_TCM -void __init tcm_init(void); -#else -/* No TCM support, just blank inlines to be optimized out */ -static inline void tcm_init(void) -{ -} -#endif diff --git a/arch/arm/probes/kprobes/checkers-common.c b/arch/arm/probes/kprobes/checkers-common.c index 4d720990cf2a..eba7ac4725c0 100644 --- a/arch/arm/probes/kprobes/checkers-common.c +++ b/arch/arm/probes/kprobes/checkers-common.c @@ -40,7 +40,7 @@ enum probes_insn checker_stack_use_imm_0xx(probes_opcode_t insn, * Different from other insn uses imm8, the real addressing offset of * STRD in T32 encoding should be imm8 * 4. See ARMARM description. */ -enum probes_insn checker_stack_use_t32strd(probes_opcode_t insn, +static enum probes_insn checker_stack_use_t32strd(probes_opcode_t insn, struct arch_probes_insn *asi, const struct decode_header *h) { diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index 9090c3a74dcc..d8238da095df 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -233,7 +233,7 @@ singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) * kprobe, and that level is reserved for user kprobe handlers, so we can't * risk encountering a new kprobe in an interrupt handler. */ -void __kprobes kprobe_handler(struct pt_regs *regs) +static void __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p, *cur; struct kprobe_ctlblk *kcb; diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index dbef34ed933f..7f65048380ca 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -145,8 +145,6 @@ __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) } } -extern void kprobe_handler(struct pt_regs *regs); - static void optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) { diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c index c562832b8627..171c7076b89f 100644 --- a/arch/arm/probes/kprobes/test-core.c +++ b/arch/arm/probes/kprobes/test-core.c @@ -720,7 +720,7 @@ static const char coverage_register_lookup[16] = { [REG_TYPE_NOSPPCX] = COVERAGE_ANY_REG | COVERAGE_SP, }; -unsigned coverage_start_registers(const struct decode_header *h) +static unsigned coverage_start_registers(const struct decode_header *h) { unsigned regs = 0; int i; diff --git a/arch/arm/probes/kprobes/test-core.h b/arch/arm/probes/kprobes/test-core.h index 56ad3c0aaeea..c7297037c162 100644 --- a/arch/arm/probes/kprobes/test-core.h +++ b/arch/arm/probes/kprobes/test-core.h @@ -454,3 +454,7 @@ void kprobe_thumb32_test_cases(void); #else void kprobe_arm_test_cases(void); #endif + +void __kprobes_test_case_start(void); +void __kprobes_test_case_end_16(void); +void __kprobes_test_case_end_32(void); diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types index 9e74c7ff6b04..97e2bfa01f4b 100644 --- a/arch/arm/tools/mach-types +++ b/arch/arm/tools/mach-types @@ -7,7 +7,7 @@ # http://www.arm.linux.org.uk/developer/machines/download.php # # Please do not send patches to this file; it is automatically generated! -# To add an entry into this database, please see Documentation/arm/arm.rst, +# To add an entry into this database, please see Documentation/arch/arm/arm.rst, # or visit: # # http://www.arm.linux.org.uk/developer/machines/?action=new diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c index 1976c6f325a4..a003beacac76 100644 --- a/arch/arm/vdso/vgettimeofday.c +++ b/arch/arm/vdso/vgettimeofday.c @@ -6,6 +6,8 @@ */ #include <linux/time.h> #include <linux/types.h> +#include <asm/vdso.h> +#include <asm/unwind.h> int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts) diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 349dcb944a93..1ba5078c1025 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -25,6 +25,7 @@ #include <asm/thread_notify.h> #include <asm/traps.h> #include <asm/vfp.h> +#include <asm/neon.h> #include "vfpinstr.h" #include "vfp.h" diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b1201d25a8a4..a8d0bd4136db 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -207,6 +207,7 @@ config ARM64 select HAVE_IOREMAP_PROT select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KVM + select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_PERF_EVENTS select HAVE_PERF_REGS @@ -222,6 +223,7 @@ config ARM64 select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_GENERIC_VDSO + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select IRQ_DOMAIN select IRQ_FORCED_THREADING select KASAN_VMALLOC if KASAN @@ -577,7 +579,6 @@ config ARM64_ERRATUM_845719 config ARM64_ERRATUM_843419 bool "Cortex-A53: 843419: A load or store might access an incorrect address" default y - select ARM64_MODULE_PLTS if MODULES help This option links the kernel with '--fix-cortex-a53-843419' and enables PLT support to replace certain ADRP instructions, which can @@ -1516,7 +1517,7 @@ config XEN # 16K | 27 | 14 | 13 | 11 | # 64K | 29 | 16 | 13 | 13 | config ARCH_FORCE_MAX_ORDER - int "Order of maximal physically contiguous allocations" if EXPERT && (ARM64_4K_PAGES || ARM64_16K_PAGES) + int default "13" if ARM64_64K_PAGES default "11" if ARM64_16K_PAGES default "10" @@ -1619,7 +1620,7 @@ config KUSER_HELPERS the system. This permits binaries to be run on ARMv4 through to ARMv8 without modification. - See Documentation/arm/kernel_user_helpers.rst for details. + See Documentation/arch/arm/kernel_user_helpers.rst for details. However, the fixed address nature of these helpers can be used by ROP (return orientated programming) authors when creating @@ -2107,26 +2108,6 @@ config ARM64_SME register state capable of holding two dimensional matrix tiles to enable various matrix operations. -config ARM64_MODULE_PLTS - bool "Use PLTs to allow module memory to spill over into vmalloc area" - depends on MODULES - select HAVE_MOD_ARCH_SPECIFIC - help - Allocate PLTs when loading modules so that jumps and calls whose - targets are too far away for their relative offsets to be encoded - in the instructions themselves can be bounced via veneers in the - module's PLT. This allows modules to be allocated in the generic - vmalloc area after the dedicated module memory area has been - exhausted. - - When running with address space randomization (KASLR), the module - region itself may be too far away for ordinary relative jumps and - calls, and so in that case, module PLTs are required and cannot be - disabled. - - Specific errata workaround(s) might also force module PLTs to be - enabled (ARM64_ERRATUM_843419). - config ARM64_PSEUDO_NMI bool "Support for NMI-like interrupts" select ARM_GIC_V3 @@ -2167,7 +2148,6 @@ config RELOCATABLE config RANDOMIZE_BASE bool "Randomize the address of the kernel image" - select ARM64_MODULE_PLTS if MODULES select RELOCATABLE help Randomizes the virtual address at which the kernel image is @@ -2198,9 +2178,8 @@ config RANDOMIZE_MODULE_REGION_FULL When this option is not set, the module region will be randomized over a limited range that contains the [_stext, _etext] interval of the core kernel, so branch relocations are almost always in range unless - ARM64_MODULE_PLTS is enabled and the region is exhausted. In this - particular case of region exhaustion, modules might be able to fall - back to a larger 2GB area. + the region is exhausted. In this particular case of region + exhaustion, modules might be able to fall back to a larger 2GB area. config CC_HAVE_STACKPROTECTOR_SYSREG def_bool $(cc-option,-mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=0) diff --git a/arch/arm64/boot/dts/arm/foundation-v8.dtsi b/arch/arm64/boot/dts/arm/foundation-v8.dtsi index 029578072d8f..7b41537731a6 100644 --- a/arch/arm64/boot/dts/arm/foundation-v8.dtsi +++ b/arch/arm64/boot/dts/arm/foundation-v8.dtsi @@ -59,6 +59,7 @@ L2_0: l2-cache0 { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/arm/rtsm_ve-aemv8a.dts b/arch/arm64/boot/dts/arm/rtsm_ve-aemv8a.dts index ef68f5aae7dd..afdf954206f1 100644 --- a/arch/arm64/boot/dts/arm/rtsm_ve-aemv8a.dts +++ b/arch/arm64/boot/dts/arm/rtsm_ve-aemv8a.dts @@ -72,6 +72,7 @@ L2_0: l2-cache0 { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts b/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts index 796cd7d02eb5..7bdeb965f0a9 100644 --- a/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts +++ b/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts @@ -58,6 +58,7 @@ L2_0: l2-cache0 { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi index 2209c1ac6e9b..e62a43591361 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi @@ -171,6 +171,7 @@ conn_subsys: bus@5b000000 { interrupt-names = "host", "peripheral", "otg", "wakeup"; phys = <&usb3_phy>; phy-names = "cdns3,usb3-phy"; + cdns,on-chip-buff-size = /bits/ 16 <18>; status = "disabled"; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi index 2dce8f2ee3ea..adb98a72bdfd 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi @@ -90,6 +90,8 @@ dma_subsys: bus@5a000000 { clocks = <&uart0_lpcg IMX_LPCG_CLK_4>, <&uart0_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "baud"; + assigned-clocks = <&clk IMX_SC_R_UART_0 IMX_SC_PM_CLK_PER>; + assigned-clock-rates = <80000000>; power-domains = <&pd IMX_SC_R_UART_0>; status = "disabled"; }; @@ -100,6 +102,8 @@ dma_subsys: bus@5a000000 { clocks = <&uart1_lpcg IMX_LPCG_CLK_4>, <&uart1_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "baud"; + assigned-clocks = <&clk IMX_SC_R_UART_1 IMX_SC_PM_CLK_PER>; + assigned-clock-rates = <80000000>; power-domains = <&pd IMX_SC_R_UART_1>; status = "disabled"; }; @@ -110,6 +114,8 @@ dma_subsys: bus@5a000000 { clocks = <&uart2_lpcg IMX_LPCG_CLK_4>, <&uart2_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "baud"; + assigned-clocks = <&clk IMX_SC_R_UART_2 IMX_SC_PM_CLK_PER>; + assigned-clock-rates = <80000000>; power-domains = <&pd IMX_SC_R_UART_2>; status = "disabled"; }; @@ -120,6 +126,8 @@ dma_subsys: bus@5a000000 { clocks = <&uart3_lpcg IMX_LPCG_CLK_4>, <&uart3_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "baud"; + assigned-clocks = <&clk IMX_SC_R_UART_3 IMX_SC_PM_CLK_PER>; + assigned-clock-rates = <80000000>; power-domains = <&pd IMX_SC_R_UART_3>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi index 9e82069c941f..5a1f7c30afe5 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi @@ -81,7 +81,7 @@ &ecspi2 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_espi2>; - cs-gpios = <&gpio5 9 GPIO_ACTIVE_LOW>; + cs-gpios = <&gpio5 13 GPIO_ACTIVE_LOW>; status = "okay"; eeprom@0 { @@ -202,7 +202,7 @@ MX8MN_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0x82 MX8MN_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0x82 MX8MN_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0x82 - MX8MN_IOMUXC_ECSPI1_SS0_GPIO5_IO9 0x41 + MX8MN_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0x41 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi index 67072e6c77d5..cbd9d124c80d 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi @@ -98,11 +98,17 @@ #address-cells = <1>; #size-cells = <0>; - ethphy: ethernet-phy@4 { + ethphy: ethernet-phy@4 { /* AR8033 or ADIN1300 */ compatible = "ethernet-phy-ieee802.3-c22"; reg = <4>; reset-gpios = <&gpio1 9 GPIO_ACTIVE_LOW>; reset-assert-us = <10000>; + /* + * Deassert delay: + * ADIN1300 requires 5ms. + * AR8033 requires 1ms. + */ + reset-deassert-us = <20000>; }; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index bd84db550053..8be8f090e8b8 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -1069,13 +1069,6 @@ <&clk IMX8MN_CLK_DISP_APB_ROOT>, <&clk IMX8MN_CLK_DISP_AXI_ROOT>; clock-names = "pix", "axi", "disp_axi"; - assigned-clocks = <&clk IMX8MN_CLK_DISP_PIXEL_ROOT>, - <&clk IMX8MN_CLK_DISP_AXI>, - <&clk IMX8MN_CLK_DISP_APB>; - assigned-clock-parents = <&clk IMX8MN_CLK_DISP_PIXEL>, - <&clk IMX8MN_SYS_PLL2_1000M>, - <&clk IMX8MN_SYS_PLL1_800M>; - assigned-clock-rates = <594000000>, <500000000>, <200000000>; interrupts = <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>; power-domains = <&disp_blk_ctrl IMX8MN_DISPBLK_PD_LCDIF>; status = "disabled"; @@ -1093,12 +1086,6 @@ clocks = <&clk IMX8MN_CLK_DSI_CORE>, <&clk IMX8MN_CLK_DSI_PHY_REF>; clock-names = "bus_clk", "sclk_mipi"; - assigned-clocks = <&clk IMX8MN_CLK_DSI_CORE>, - <&clk IMX8MN_CLK_DSI_PHY_REF>; - assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_266M>, - <&clk IMX8MN_CLK_24M>; - assigned-clock-rates = <266000000>, <24000000>; - samsung,pll-clock-frequency = <24000000>; interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>; power-domains = <&disp_blk_ctrl IMX8MN_DISPBLK_PD_MIPI_DSI>; status = "disabled"; @@ -1142,6 +1129,21 @@ "lcdif-axi", "lcdif-apb", "lcdif-pix", "dsi-pclk", "dsi-ref", "csi-aclk", "csi-pclk"; + assigned-clocks = <&clk IMX8MN_CLK_DSI_CORE>, + <&clk IMX8MN_CLK_DSI_PHY_REF>, + <&clk IMX8MN_CLK_DISP_PIXEL>, + <&clk IMX8MN_CLK_DISP_AXI>, + <&clk IMX8MN_CLK_DISP_APB>; + assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_266M>, + <&clk IMX8MN_CLK_24M>, + <&clk IMX8MN_VIDEO_PLL1_OUT>, + <&clk IMX8MN_SYS_PLL2_1000M>, + <&clk IMX8MN_SYS_PLL1_800M>; + assigned-clock-rates = <266000000>, + <24000000>, + <594000000>, + <500000000>, + <200000000>; #power-domain-cells = <1>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index f81391993354..428c60462e3d 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -1211,13 +1211,6 @@ <&clk IMX8MP_CLK_MEDIA_APB_ROOT>, <&clk IMX8MP_CLK_MEDIA_AXI_ROOT>; clock-names = "pix", "axi", "disp_axi"; - assigned-clocks = <&clk IMX8MP_CLK_MEDIA_DISP1_PIX_ROOT>, - <&clk IMX8MP_CLK_MEDIA_AXI>, - <&clk IMX8MP_CLK_MEDIA_APB>; - assigned-clock-parents = <&clk IMX8MP_CLK_MEDIA_DISP1_PIX>, - <&clk IMX8MP_SYS_PLL2_1000M>, - <&clk IMX8MP_SYS_PLL1_800M>; - assigned-clock-rates = <594000000>, <500000000>, <200000000>; interrupts = <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>; power-domains = <&media_blk_ctrl IMX8MP_MEDIABLK_PD_LCDIF_1>; status = "disabled"; @@ -1237,11 +1230,6 @@ <&clk IMX8MP_CLK_MEDIA_APB_ROOT>, <&clk IMX8MP_CLK_MEDIA_AXI_ROOT>; clock-names = "pix", "axi", "disp_axi"; - assigned-clocks = <&clk IMX8MP_CLK_MEDIA_DISP2_PIX>, - <&clk IMX8MP_VIDEO_PLL1>; - assigned-clock-parents = <&clk IMX8MP_VIDEO_PLL1_OUT>, - <&clk IMX8MP_VIDEO_PLL1_REF_SEL>; - assigned-clock-rates = <0>, <1039500000>; power-domains = <&media_blk_ctrl IMX8MP_MEDIABLK_PD_LCDIF_2>; status = "disabled"; @@ -1296,11 +1284,16 @@ "disp1", "disp2", "isp", "phy"; assigned-clocks = <&clk IMX8MP_CLK_MEDIA_AXI>, - <&clk IMX8MP_CLK_MEDIA_APB>; + <&clk IMX8MP_CLK_MEDIA_APB>, + <&clk IMX8MP_CLK_MEDIA_DISP1_PIX>, + <&clk IMX8MP_CLK_MEDIA_DISP2_PIX>, + <&clk IMX8MP_VIDEO_PLL1>; assigned-clock-parents = <&clk IMX8MP_SYS_PLL2_1000M>, - <&clk IMX8MP_SYS_PLL1_800M>; - assigned-clock-rates = <500000000>, <200000000>; - + <&clk IMX8MP_SYS_PLL1_800M>, + <&clk IMX8MP_VIDEO_PLL1_OUT>, + <&clk IMX8MP_VIDEO_PLL1_OUT>; + assigned-clock-rates = <500000000>, <200000000>, + <0>, <0>, <1039500000>; #power-domain-cells = <1>; lvds_bridge: bridge@5c { diff --git a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts index ce9d3f0b98fc..607cd6b4e972 100644 --- a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts +++ b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts @@ -82,8 +82,8 @@ pinctrl-0 = <&pinctrl_usdhc2>; bus-width = <4>; vmmc-supply = <®_usdhc2_vmmc>; - cd-gpios = <&lsio_gpio4 22 GPIO_ACTIVE_LOW>; - wp-gpios = <&lsio_gpio4 21 GPIO_ACTIVE_HIGH>; + cd-gpios = <&lsio_gpio5 22 GPIO_ACTIVE_LOW>; + wp-gpios = <&lsio_gpio5 21 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8x-colibri-eval-v3.dtsi b/arch/arm64/boot/dts/freescale/imx8x-colibri-eval-v3.dtsi index 7264d784ae72..9af769ab8ceb 100644 --- a/arch/arm64/boot/dts/freescale/imx8x-colibri-eval-v3.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8x-colibri-eval-v3.dtsi @@ -33,6 +33,12 @@ }; }; +&iomuxc { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_ext_io0>, <&pinctrl_hog0>, <&pinctrl_hog1>, + <&pinctrl_lpspi2_cs2>; +}; + /* Colibri SPI */ &lpspi2 { status = "okay"; diff --git a/arch/arm64/boot/dts/freescale/imx8x-colibri-iris.dtsi b/arch/arm64/boot/dts/freescale/imx8x-colibri-iris.dtsi index 5f30c88855e7..f8953067bc3b 100644 --- a/arch/arm64/boot/dts/freescale/imx8x-colibri-iris.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8x-colibri-iris.dtsi @@ -48,8 +48,7 @@ <IMX8QXP_SAI0_TXFS_LSIO_GPIO0_IO28 0x20>, /* SODIMM 101 */ <IMX8QXP_SAI0_RXD_LSIO_GPIO0_IO27 0x20>, /* SODIMM 97 */ <IMX8QXP_ENET0_RGMII_RXC_LSIO_GPIO5_IO03 0x06000020>, /* SODIMM 85 */ - <IMX8QXP_SAI0_TXC_LSIO_GPIO0_IO26 0x20>, /* SODIMM 79 */ - <IMX8QXP_QSPI0A_DATA1_LSIO_GPIO3_IO10 0x06700041>; /* SODIMM 45 */ + <IMX8QXP_SAI0_TXC_LSIO_GPIO0_IO26 0x20>; /* SODIMM 79 */ }; pinctrl_uart1_forceoff: uart1forceoffgrp { diff --git a/arch/arm64/boot/dts/freescale/imx8x-colibri.dtsi b/arch/arm64/boot/dts/freescale/imx8x-colibri.dtsi index 7cad79102e1a..49d105eb4769 100644 --- a/arch/arm64/boot/dts/freescale/imx8x-colibri.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8x-colibri.dtsi @@ -363,10 +363,6 @@ /* TODO VPU Encoder/Decoder */ &iomuxc { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_ext_io0>, <&pinctrl_hog0>, <&pinctrl_hog1>, - <&pinctrl_hog2>, <&pinctrl_lpspi2_cs2>; - /* On-module touch pen-down interrupt */ pinctrl_ad7879_int: ad7879intgrp { fsl,pins = <IMX8QXP_MIPI_CSI0_I2C0_SCL_LSIO_GPIO3_IO05 0x21>; @@ -499,8 +495,7 @@ }; pinctrl_hog1: hog1grp { - fsl,pins = <IMX8QXP_CSI_MCLK_LSIO_GPIO3_IO01 0x20>, /* SODIMM 75 */ - <IMX8QXP_QSPI0A_SCLK_LSIO_GPIO3_IO16 0x20>; /* SODIMM 93 */ + fsl,pins = <IMX8QXP_QSPI0A_SCLK_LSIO_GPIO3_IO16 0x20>; /* SODIMM 93 */ }; pinctrl_hog2: hog2grp { @@ -774,3 +769,10 @@ fsl,pins = <IMX8QXP_SCU_BOOT_MODE3_SCU_DSC_RTC_CLOCK_OUTPUT_32K 0x20>; }; }; + +/* Delete peripherals which are not present on SOC, but are defined in imx8-ss-*.dtsi */ + +/delete-node/ &adc1; +/delete-node/ &adc1_lpcg; +/delete-node/ &dsp; +/delete-node/ &dsp_lpcg; diff --git a/arch/arm64/boot/dts/qcom/ipq5332.dtsi b/arch/arm64/boot/dts/qcom/ipq5332.dtsi index 12e0e179e139..af4d97143bcf 100644 --- a/arch/arm64/boot/dts/qcom/ipq5332.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq5332.dtsi @@ -73,6 +73,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi index 9ff4e9d45065..f531797f2619 100644 --- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi @@ -83,7 +83,8 @@ L2_0: l2-cache { compatible = "cache"; - cache-level = <0x2>; + cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index 84e715aa4310..5b2c1986c8f4 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -66,7 +66,8 @@ L2_0: l2-cache { compatible = "cache"; - cache-level = <0x2>; + cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/ipq9574.dtsi b/arch/arm64/boot/dts/qcom/ipq9574.dtsi index 3bb7435f5e7f..0ed19fbf7d87 100644 --- a/arch/arm64/boot/dts/qcom/ipq9574.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq9574.dtsi @@ -72,6 +72,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index 7e0fa37a3adf..834e0b66b7f2 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -180,6 +180,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; idle-states { diff --git a/arch/arm64/boot/dts/qcom/msm8953.dtsi b/arch/arm64/boot/dts/qcom/msm8953.dtsi index 602cb188a635..d44cfa0471e9 100644 --- a/arch/arm64/boot/dts/qcom/msm8953.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8953.dtsi @@ -153,11 +153,13 @@ L2_0: l2-cache-0 { compatible = "cache"; cache-level = <2>; + cache-unified; }; L2_1: l2-cache-1 { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8976.dtsi b/arch/arm64/boot/dts/qcom/msm8976.dtsi index 1f0bd24a074a..f47fb8ea71e2 100644 --- a/arch/arm64/boot/dts/qcom/msm8976.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8976.dtsi @@ -193,11 +193,13 @@ l2_0: l2-cache0 { compatible = "cache"; cache-level = <2>; + cache-unified; }; l2_1: l2-cache1 { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8994.dtsi b/arch/arm64/boot/dts/qcom/msm8994.dtsi index 2831966be960..bdc3f2ba1755 100644 --- a/arch/arm64/boot/dts/qcom/msm8994.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8994.dtsi @@ -52,6 +52,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; @@ -88,6 +89,7 @@ L2_1: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 2b35cb3f5292..30257c07e127 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -53,8 +53,9 @@ #cooling-cells = <2>; next-level-cache = <&L2_0>; L2_0: l2-cache { - compatible = "cache"; - cache-level = <2>; + compatible = "cache"; + cache-level = <2>; + cache-unified; }; }; @@ -83,8 +84,9 @@ #cooling-cells = <2>; next-level-cache = <&L2_1>; L2_1: l2-cache { - compatible = "cache"; - cache-level = <2>; + compatible = "cache"; + cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi index b150437a8355..3ec941fed14f 100644 --- a/arch/arm64/boot/dts/qcom/msm8998.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi @@ -146,6 +146,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; @@ -190,6 +191,7 @@ L2_1: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/qcm2290.dtsi b/arch/arm64/boot/dts/qcom/qcm2290.dtsi index ae5abc76bcc7..b29bc4e4b837 100644 --- a/arch/arm64/boot/dts/qcom/qcm2290.dtsi +++ b/arch/arm64/boot/dts/qcom/qcm2290.dtsi @@ -51,6 +51,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/qcs404.dtsi b/arch/arm64/boot/dts/qcom/qcs404.dtsi index eefed585738c..972f753847e1 100644 --- a/arch/arm64/boot/dts/qcom/qcs404.dtsi +++ b/arch/arm64/boot/dts/qcom/qcs404.dtsi @@ -95,6 +95,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; idle-states { diff --git a/arch/arm64/boot/dts/qcom/qdu1000.dtsi b/arch/arm64/boot/dts/qcom/qdu1000.dtsi index 734438113bba..fb553f0bb17a 100644 --- a/arch/arm64/boot/dts/qcom/qdu1000.dtsi +++ b/arch/arm64/boot/dts/qcom/qdu1000.dtsi @@ -35,9 +35,13 @@ next-level-cache = <&L2_0>; L2_0: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; + cache-level = <3>; + cache-unified; }; }; }; @@ -54,6 +58,8 @@ next-level-cache = <&L2_100>; L2_100: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -70,6 +76,8 @@ next-level-cache = <&L2_200>; L2_200: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -86,6 +94,8 @@ next-level-cache = <&L2_300>; L2_300: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sa8155p-adp.dts b/arch/arm64/boot/dts/qcom/sa8155p-adp.dts index 339fea522509..15e1ae1c1a97 100644 --- a/arch/arm64/boot/dts/qcom/sa8155p-adp.dts +++ b/arch/arm64/boot/dts/qcom/sa8155p-adp.dts @@ -7,7 +7,7 @@ #include <dt-bindings/regulator/qcom,rpmh-regulator.h> #include <dt-bindings/gpio/gpio.h> -#include "sm8150.dtsi" +#include "sa8155p.dtsi" #include "pmm8155au_1.dtsi" #include "pmm8155au_2.dtsi" diff --git a/arch/arm64/boot/dts/qcom/sa8155p.dtsi b/arch/arm64/boot/dts/qcom/sa8155p.dtsi new file mode 100644 index 000000000000..ffb7ab695213 --- /dev/null +++ b/arch/arm64/boot/dts/qcom/sa8155p.dtsi @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* + * Copyright (c) 2023, Linaro Limited + * + * SA8155P is an automotive variant of SM8150, with some minor changes. + * Most notably, the RPMhPD setup differs: MMCX and LCX/LMX rails are gone, + * though the cmd-db doesn't reflect that and access attemps result in a bite. + */ + +#include "sm8150.dtsi" + +&dispcc { + power-domains = <&rpmhpd SA8155P_CX>; +}; + +&mdss_dsi0 { + power-domains = <&rpmhpd SA8155P_CX>; +}; + +&mdss_dsi1 { + power-domains = <&rpmhpd SA8155P_CX>; +}; + +&mdss_mdp { + power-domains = <&rpmhpd SA8155P_CX>; +}; + +&remoteproc_slpi { + power-domains = <&rpmhpd SA8155P_CX>, + <&rpmhpd SA8155P_MX>; +}; + +&rpmhpd { + /* + * The bindings were crafted such that SA8155P PDs match their + * SM8150 counterparts to make it more maintainable and only + * necessitate adjusting entries that actually differ + */ + compatible = "qcom,sa8155p-rpmhpd"; +}; diff --git a/arch/arm64/boot/dts/qcom/sa8775p.dtsi b/arch/arm64/boot/dts/qcom/sa8775p.dtsi index 2343df7e0ea4..c3310caf9f68 100644 --- a/arch/arm64/boot/dts/qcom/sa8775p.dtsi +++ b/arch/arm64/boot/dts/qcom/sa8775p.dtsi @@ -42,9 +42,13 @@ next-level-cache = <&L2_0>; L2_0: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; + cache-level = <3>; + cache-unified; }; }; }; @@ -58,6 +62,8 @@ next-level-cache = <&L2_1>; L2_1: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -71,6 +77,8 @@ next-level-cache = <&L2_2>; L2_2: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -84,6 +92,8 @@ next-level-cache = <&L2_3>; L2_3: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -97,9 +107,13 @@ next-level-cache = <&L2_4>; L2_4: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_1>; L3_1: l3-cache { compatible = "cache"; + cache-level = <3>; + cache-unified; }; }; @@ -114,6 +128,8 @@ next-level-cache = <&L2_5>; L2_5: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_1>; }; }; @@ -127,6 +143,8 @@ next-level-cache = <&L2_6>; L2_6: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_1>; }; }; @@ -140,6 +158,8 @@ next-level-cache = <&L2_7>; L2_7: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_1>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc7180-idp.dts b/arch/arm64/boot/dts/qcom/sc7180-idp.dts index 9f052270e090..299ef5dc225a 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-idp.dts +++ b/arch/arm64/boot/dts/qcom/sc7180-idp.dts @@ -393,6 +393,11 @@ qcom,spare-regs = <&tcsr_regs_2 0xb3e4>; }; +&scm { + /* TF-A firmware maps memory cached so mark dma-coherent to match. */ + dma-coherent; +}; + &sdhc_1 { status = "okay"; diff --git a/arch/arm64/boot/dts/qcom/sc7180-lite.dtsi b/arch/arm64/boot/dts/qcom/sc7180-lite.dtsi index d8ed1d7b4ec7..4b306a59d9be 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-lite.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-lite.dtsi @@ -16,3 +16,11 @@ &cpu6_opp12 { opp-peak-kBps = <8532000 23347200>; }; + +&cpu6_opp13 { + opp-peak-kBps = <8532000 23347200>; +}; + +&cpu6_opp14 { + opp-peak-kBps = <8532000 23347200>; +}; diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi index ca6920de7ea8..1472e7f10831 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi @@ -892,6 +892,11 @@ hp_i2c: &i2c9 { qcom,spare-regs = <&tcsr_regs_2 0xb3e4>; }; +&scm { + /* TF-A firmware maps memory cached so mark dma-coherent to match. */ + dma-coherent; +}; + &sdhc_1 { status = "okay"; diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index ea1ffade1aa1..a65be760d1a7 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -92,10 +92,12 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; cache-level = <3>; + cache-unified; }; }; }; @@ -120,6 +122,7 @@ L2_100: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -144,6 +147,7 @@ L2_200: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -168,6 +172,7 @@ L2_300: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -192,6 +197,7 @@ L2_400: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -216,6 +222,7 @@ L2_500: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -240,6 +247,7 @@ L2_600: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -264,6 +272,7 @@ L2_700: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -360,7 +369,7 @@ }; firmware { - scm { + scm: scm { compatible = "qcom,scm-sc7180", "qcom,scm"; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi b/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi index f562e4d2b655..2e1cd219fc18 100644 --- a/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi @@ -79,6 +79,11 @@ firmware-name = "ath11k/WCN6750/hw1.0/wpss.mdt"; }; +&scm { + /* TF-A firmware maps memory cached so mark dma-coherent to match. */ + dma-coherent; +}; + &wifi { status = "okay"; diff --git a/arch/arm64/boot/dts/qcom/sc7280-idp.dtsi b/arch/arm64/boot/dts/qcom/sc7280-idp.dtsi index c6dc200c00ce..21027042cf13 100644 --- a/arch/arm64/boot/dts/qcom/sc7280-idp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280-idp.dtsi @@ -480,7 +480,6 @@ wcd_rx: codec@0,4 { compatible = "sdw20217010d00"; reg = <0 4>; - #sound-dai-cells = <1>; qcom,rx-port-mapping = <1 2 3 4 5>; }; }; @@ -491,7 +490,6 @@ wcd_tx: codec@0,3 { compatible = "sdw20217010d00"; reg = <0 3>; - #sound-dai-cells = <1>; qcom,tx-port-mapping = <1 2 3 4>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc7280-qcard.dtsi b/arch/arm64/boot/dts/qcom/sc7280-qcard.dtsi index 88b3586e389f..9137db066d9e 100644 --- a/arch/arm64/boot/dts/qcom/sc7280-qcard.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280-qcard.dtsi @@ -414,7 +414,6 @@ wcd_rx: codec@0,4 { compatible = "sdw20217010d00"; reg = <0 4>; - #sound-dai-cells = <1>; qcom,rx-port-mapping = <1 2 3 4 5>; }; }; @@ -423,7 +422,6 @@ wcd_tx: codec@0,3 { compatible = "sdw20217010d00"; reg = <0 3>; - #sound-dai-cells = <1>; qcom,tx-port-mapping = <1 2 3 4>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index 31728f461422..36f0bb9b3cbb 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -182,10 +182,12 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; cache-level = <3>; + cache-unified; }; }; }; @@ -208,6 +210,7 @@ L2_100: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -230,6 +233,7 @@ L2_200: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -252,6 +256,7 @@ L2_300: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -274,6 +279,7 @@ L2_400: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -296,6 +302,7 @@ L2_500: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -318,6 +325,7 @@ L2_600: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -340,6 +348,7 @@ L2_700: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -647,7 +656,7 @@ }; firmware { - scm { + scm: scm { compatible = "qcom,scm-sc7280", "qcom,scm"; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index 8fa9fbfe5d00..cc4aef21e617 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -58,10 +58,12 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { - compatible = "cache"; - cache-level = <3>; + compatible = "cache"; + cache-level = <3>; + cache-unified; }; }; }; @@ -83,6 +85,7 @@ L2_100: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -104,6 +107,7 @@ L2_200: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -125,6 +129,7 @@ L2_300: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -146,6 +151,7 @@ L2_400: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -167,6 +173,7 @@ L2_500: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -188,6 +195,7 @@ L2_600: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -209,6 +217,7 @@ L2_700: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -2726,6 +2735,7 @@ pins = "gpio7"; function = "dmic1_data"; drive-strength = <8>; + input-enable; }; }; @@ -2743,6 +2753,7 @@ function = "dmic1_data"; drive-strength = <2>; bias-pull-down; + input-enable; }; }; @@ -2758,6 +2769,7 @@ pins = "gpio9"; function = "dmic2_data"; drive-strength = <8>; + input-enable; }; }; @@ -2775,6 +2787,7 @@ function = "dmic2_data"; drive-strength = <2>; bias-pull-down; + input-enable; }; }; @@ -3982,6 +3995,7 @@ qcom,tcs-config = <ACTIVE_TCS 2>, <SLEEP_TCS 3>, <WAKE_TCS 3>, <CONTROL_TCS 1>; label = "apps_rsc"; + power-domains = <&CLUSTER_PD>; apps_bcm_voter: bcm-voter { compatible = "qcom,bcm-voter"; diff --git a/arch/arm64/boot/dts/qcom/sdm630.dtsi b/arch/arm64/boot/dts/qcom/sdm630.dtsi index 37e72b1c56dc..eaead2f7beb4 100644 --- a/arch/arm64/boot/dts/qcom/sdm630.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm630.dtsi @@ -63,6 +63,7 @@ L2_1: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; @@ -127,6 +128,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/sdm670.dtsi b/arch/arm64/boot/dts/qcom/sdm670.dtsi index c5f839dd1c6e..b61e13db89bd 100644 --- a/arch/arm64/boot/dts/qcom/sdm670.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm670.dtsi @@ -41,8 +41,12 @@ L2_0: l2-cache { compatible = "cache"; next-level-cache = <&L3_0>; + cache-level = <2>; + cache-unified; L3_0: l3-cache { - compatible = "cache"; + compatible = "cache"; + cache-level = <3>; + cache-unified; }; }; }; @@ -57,6 +61,8 @@ next-level-cache = <&L2_100>; L2_100: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -71,6 +77,8 @@ next-level-cache = <&L2_200>; L2_200: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -85,6 +93,8 @@ next-level-cache = <&L2_300>; L2_300: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -99,6 +109,8 @@ next-level-cache = <&L2_400>; L2_400: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -113,6 +125,8 @@ next-level-cache = <&L2_500>; L2_500: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -127,6 +141,8 @@ next-level-cache = <&L2_600>; L2_600: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -141,6 +157,8 @@ next-level-cache = <&L2_700>; L2_700: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 90424442bb4a..cdeb05e95674 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -108,10 +108,12 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { - compatible = "cache"; - cache-level = <3>; + compatible = "cache"; + cache-level = <3>; + cache-unified; }; }; }; @@ -135,6 +137,7 @@ L2_100: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -158,6 +161,7 @@ L2_200: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -181,6 +185,7 @@ L2_300: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -204,6 +209,7 @@ L2_400: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -227,6 +233,7 @@ L2_500: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -250,6 +257,7 @@ L2_600: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -273,6 +281,7 @@ L2_700: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm6115.dtsi b/arch/arm64/boot/dts/qcom/sm6115.dtsi index 631ca327e064..43f31c1b9d5a 100644 --- a/arch/arm64/boot/dts/qcom/sm6115.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6115.dtsi @@ -50,6 +50,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; @@ -102,6 +103,7 @@ L2_1: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm6125.dtsi b/arch/arm64/boot/dts/qcom/sm6125.dtsi index 9484752fb850..2aa093d16858 100644 --- a/arch/arm64/boot/dts/qcom/sm6125.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6125.dtsi @@ -47,6 +47,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; @@ -87,6 +88,7 @@ L2_1: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm6350.dtsi b/arch/arm64/boot/dts/qcom/sm6350.dtsi index 18c4616848ce..ad34301f6cdd 100644 --- a/arch/arm64/boot/dts/qcom/sm6350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6350.dtsi @@ -60,10 +60,12 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; cache-level = <3>; + cache-unified; }; }; }; @@ -86,6 +88,7 @@ L2_100: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -108,6 +111,7 @@ L2_200: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -130,6 +134,7 @@ L2_300: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -152,6 +157,7 @@ L2_400: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -174,6 +180,7 @@ L2_500: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -196,6 +203,7 @@ L2_600: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -218,6 +226,7 @@ L2_700: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm6375-sony-xperia-murray-pdx225.dts b/arch/arm64/boot/dts/qcom/sm6375-sony-xperia-murray-pdx225.dts index 8220e6f44117..b2f1bb1d58e9 100644 --- a/arch/arm64/boot/dts/qcom/sm6375-sony-xperia-murray-pdx225.dts +++ b/arch/arm64/boot/dts/qcom/sm6375-sony-xperia-murray-pdx225.dts @@ -178,12 +178,12 @@ }; &remoteproc_adsp { - firmware-name = "qcom/Sony/murray/adsp.mbn"; + firmware-name = "qcom/sm6375/Sony/murray/adsp.mbn"; status = "okay"; }; &remoteproc_cdsp { - firmware-name = "qcom/Sony/murray/cdsp.mbn"; + firmware-name = "qcom/sm6375/Sony/murray/cdsp.mbn"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/qcom/sm6375.dtsi b/arch/arm64/boot/dts/qcom/sm6375.dtsi index ae9b6bc446cb..f8d9c34d3b2f 100644 --- a/arch/arm64/boot/dts/qcom/sm6375.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6375.dtsi @@ -48,10 +48,14 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_0: l2-cache { - compatible = "cache"; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; L3_0: l3-cache { - compatible = "cache"; + compatible = "cache"; + cache-level = <3>; + cache-unified; }; }; }; @@ -68,8 +72,10 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_100: l2-cache { - compatible = "cache"; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; @@ -85,8 +91,10 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_200: l2-cache { - compatible = "cache"; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; @@ -102,8 +110,10 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_300: l2-cache { - compatible = "cache"; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; @@ -119,8 +129,10 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_400: l2-cache { - compatible = "cache"; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; @@ -136,8 +148,10 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_500: l2-cache { - compatible = "cache"; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; @@ -153,8 +167,10 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_600: l2-cache { - compatible = "cache"; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; @@ -170,8 +186,10 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_700: l2-cache { - compatible = "cache"; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index 2273fa571988..27dcda0d4288 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -63,10 +63,12 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { - compatible = "cache"; - cache-level = <3>; + compatible = "cache"; + cache-level = <3>; + cache-unified; }; }; }; @@ -90,6 +92,7 @@ L2_100: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -113,6 +116,7 @@ L2_200: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -136,6 +140,7 @@ L2_300: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -159,6 +164,7 @@ L2_400: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -182,6 +188,7 @@ L2_500: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -205,6 +212,7 @@ L2_600: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -228,6 +236,7 @@ L2_700: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-boe.dts b/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-boe.dts index 8b2ae39950ff..de6101ddebe7 100644 --- a/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-boe.dts +++ b/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-boe.dts @@ -13,6 +13,6 @@ }; &display_panel { - compatible = "xiaomi,elish-boe-nt36523"; + compatible = "xiaomi,elish-boe-nt36523", "novatek,nt36523"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-csot.dts b/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-csot.dts index a4d5341495cf..4cffe9c703df 100644 --- a/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-csot.dts +++ b/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-csot.dts @@ -13,6 +13,6 @@ }; &display_panel { - compatible = "xiaomi,elish-csot-nt36523"; + compatible = "xiaomi,elish-csot-nt36523", "novatek,nt36523"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi index ebcb481571c2..3efdc03ed0f1 100644 --- a/arch/arm64/boot/dts/qcom/sm8350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi @@ -58,12 +58,14 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_0: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; L3_0: l3-cache { - compatible = "cache"; - cache-level = <3>; + compatible = "cache"; + cache-level = <3>; + cache-unified; }; }; }; @@ -80,9 +82,10 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_100: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; @@ -98,9 +101,10 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_200: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; @@ -116,9 +120,10 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_300: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; @@ -134,9 +139,10 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_400: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; @@ -152,9 +158,10 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_500: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; @@ -170,9 +177,10 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_600: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; @@ -188,9 +196,10 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_700: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index 595533aeafc4..d59ea8ee7111 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -57,12 +57,14 @@ #cooling-cells = <2>; clocks = <&cpufreq_hw 0>; L2_0: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; L3_0: l3-cache { - compatible = "cache"; - cache-level = <3>; + compatible = "cache"; + cache-level = <3>; + cache-unified; }; }; }; @@ -79,9 +81,10 @@ #cooling-cells = <2>; clocks = <&cpufreq_hw 0>; L2_100: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; @@ -97,9 +100,10 @@ #cooling-cells = <2>; clocks = <&cpufreq_hw 0>; L2_200: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; @@ -115,9 +119,10 @@ #cooling-cells = <2>; clocks = <&cpufreq_hw 0>; L2_300: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; @@ -133,9 +138,10 @@ #cooling-cells = <2>; clocks = <&cpufreq_hw 1>; L2_400: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; @@ -151,9 +157,10 @@ #cooling-cells = <2>; clocks = <&cpufreq_hw 1>; L2_500: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; @@ -169,9 +176,10 @@ #cooling-cells = <2>; clocks = <&cpufreq_hw 1>; L2_600: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; @@ -187,9 +195,10 @@ #cooling-cells = <2>; clocks = <&cpufreq_hw 2>; L2_700: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi index 6e9bad8f6f33..558cbc430708 100644 --- a/arch/arm64/boot/dts/qcom/sm8550.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi @@ -80,10 +80,12 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; cache-level = <3>; + cache-unified; }; }; }; @@ -104,6 +106,7 @@ L2_100: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -124,6 +127,7 @@ L2_200: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -144,6 +148,7 @@ L2_300: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -164,6 +169,7 @@ L2_400: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -184,6 +190,7 @@ L2_500: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -204,6 +211,7 @@ L2_600: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -224,6 +232,7 @@ L2_700: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -2022,7 +2031,7 @@ qcom,din-ports = <4>; qcom,dout-ports = <9>; - qcom,ports-sinterval = <0x07 0x1f 0x3f 0x07 0x1f 0x3f 0x18f 0xff 0xff 0x0f 0x0f 0xff 0x31f>; + qcom,ports-sinterval = /bits/ 16 <0x07 0x1f 0x3f 0x07 0x1f 0x3f 0x18f 0xff 0xff 0x0f 0x0f 0xff 0x31f>; qcom,ports-offset1 = /bits/ 8 <0x01 0x03 0x05 0x02 0x04 0x15 0x00 0xff 0xff 0x06 0x0d 0xff 0x00>; qcom,ports-offset2 = /bits/ 8 <0xff 0x07 0x1f 0xff 0x07 0x1f 0xff 0xff 0xff 0xff 0xff 0xff 0xff>; qcom,ports-hstart = /bits/ 8 <0xff 0xff 0xff 0xff 0xff 0xff 0x08 0xff 0xff 0xff 0xff 0xff 0x0f>; @@ -2068,7 +2077,7 @@ qcom,din-ports = <0>; qcom,dout-ports = <10>; - qcom,ports-sinterval = <0x03 0x3f 0x1f 0x07 0x00 0x18f 0xff 0xff 0xff 0xff>; + qcom,ports-sinterval = /bits/ 16 <0x03 0x3f 0x1f 0x07 0x00 0x18f 0xff 0xff 0xff 0xff>; qcom,ports-offset1 = /bits/ 8 <0x00 0x00 0x0b 0x01 0x00 0x00 0xff 0xff 0xff 0xff>; qcom,ports-offset2 = /bits/ 8 <0x00 0x00 0x0b 0x00 0x00 0x00 0xff 0xff 0xff 0xff>; qcom,ports-hstart = /bits/ 8 <0xff 0x03 0xff 0xff 0xff 0x08 0xff 0xff 0xff 0xff>; @@ -2133,7 +2142,7 @@ qcom,din-ports = <4>; qcom,dout-ports = <9>; - qcom,ports-sinterval = <0x07 0x1f 0x3f 0x07 0x1f 0x3f 0x18f 0xff 0xff 0x0f 0x0f 0xff 0x31f>; + qcom,ports-sinterval = /bits/ 16 <0x07 0x1f 0x3f 0x07 0x1f 0x3f 0x18f 0xff 0xff 0x0f 0x0f 0xff 0x31f>; qcom,ports-offset1 = /bits/ 8 <0x01 0x03 0x05 0x02 0x04 0x15 0x00 0xff 0xff 0x06 0x0d 0xff 0x00>; qcom,ports-offset2 = /bits/ 8 <0xff 0x07 0x1f 0xff 0x07 0x1f 0xff 0xff 0xff 0xff 0xff 0xff 0xff>; qcom,ports-hstart = /bits/ 8 <0xff 0xff 0xff 0xff 0xff 0xff 0x08 0xff 0xff 0xff 0xff 0xff 0x0f>; @@ -3762,9 +3771,16 @@ system-cache-controller@25000000 { compatible = "qcom,sm8550-llcc"; - reg = <0 0x25000000 0 0x800000>, + reg = <0 0x25000000 0 0x200000>, + <0 0x25200000 0 0x200000>, + <0 0x25400000 0 0x200000>, + <0 0x25600000 0 0x200000>, <0 0x25800000 0 0x200000>; - reg-names = "llcc_base", "llcc_broadcast_base"; + reg-names = "llcc0_base", + "llcc1_base", + "llcc2_base", + "llcc3_base", + "llcc_broadcast_base"; interrupts = <GIC_SPI 266 IRQ_TYPE_LEVEL_HIGH>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3308.dtsi b/arch/arm64/boot/dts/rockchip/rk3308.dtsi index dd228a256a32..2ae4bb7d5e62 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3308.dtsi @@ -97,6 +97,7 @@ l2: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts index f69a38f42d2d..0a27fa5271f5 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts @@ -37,7 +37,8 @@ vin-supply = <&vcc_io>; }; - vcc_host_5v: vcc-host-5v-regulator { + /* Common enable line for all of the rails mentioned in the labels */ + vcc_host_5v: vcc_host1_5v: vcc_otg_5v: vcc-host-5v-regulator { compatible = "regulator-fixed"; gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_LOW>; pinctrl-names = "default"; @@ -48,17 +49,6 @@ vin-supply = <&vcc_sys>; }; - vcc_host1_5v: vcc_otg_5v: vcc-host1-5v-regulator { - compatible = "regulator-fixed"; - gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_LOW>; - pinctrl-names = "default"; - pinctrl-0 = <&usb20_host_drv>; - regulator-name = "vcc_host1_5v"; - regulator-always-on; - regulator-boot-on; - vin-supply = <&vcc_sys>; - }; - vcc_sys: vcc-sys { compatible = "regulator-fixed"; regulator-name = "vcc_sys"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 6d7a7bf72ac7..e729e7a22b23 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -103,6 +103,7 @@ l2: l2-cache0 { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts b/arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts index 263ce40770dd..cddf6cd2fecb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts @@ -28,6 +28,16 @@ regulator-max-microvolt = <5000000>; vin-supply = <&vcc12v_dcin>; }; + + vcc_sd_pwr: vcc-sd-pwr-regulator { + compatible = "regulator-fixed"; + regulator-name = "vcc_sd_pwr"; + regulator-always-on; + regulator-boot-on; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + vin-supply = <&vcc3v3_sys>; + }; }; /* phy for pcie */ @@ -130,13 +140,7 @@ }; &sdmmc0 { - vmmc-supply = <&sdmmc_pwr>; - status = "okay"; -}; - -&sdmmc_pwr { - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; + vmmc-supply = <&vcc_sd_pwr>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi index 102e448bc026..31aa2b8efe39 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi @@ -104,16 +104,6 @@ regulator-max-microvolt = <3300000>; vin-supply = <&vcc5v0_sys>; }; - - sdmmc_pwr: sdmmc-pwr-regulator { - compatible = "regulator-fixed"; - enable-active-high; - gpio = <&gpio0 RK_PA5 GPIO_ACTIVE_HIGH>; - pinctrl-names = "default"; - pinctrl-0 = <&sdmmc_pwr_h>; - regulator-name = "sdmmc_pwr"; - status = "disabled"; - }; }; &cpu0 { @@ -155,6 +145,19 @@ status = "disabled"; }; +&gpio0 { + nextrst-hog { + gpio-hog; + /* + * GPIO_ACTIVE_LOW + output-low here means that the pin is set + * to high, because output-low decides the value pre-inversion. + */ + gpios = <RK_PA5 GPIO_ACTIVE_LOW>; + line-name = "nEXTRST"; + output-low; + }; +}; + &gpu { mali-supply = <&vdd_gpu>; status = "okay"; @@ -538,12 +541,6 @@ rockchip,pins = <2 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>; }; }; - - sdmmc-pwr { - sdmmc_pwr_h: sdmmc-pwr-h { - rockchip,pins = <0 RK_PA5 RK_FUNC_GPIO &pcfg_pull_none>; - }; - }; }; &pmu_io_domains { diff --git a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts index f70ca9f0470a..c718b8dbb9c6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts @@ -106,7 +106,7 @@ rockchip-key { reset_button_pin: reset-button-pin { - rockchip,pins = <4 RK_PA0 RK_FUNC_GPIO &pcfg_pull_up>; + rockchip,pins = <0 RK_PB7 RK_FUNC_GPIO &pcfg_pull_up>; }; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts index 2a1118f15c29..b6ad8328c7eb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts @@ -134,4 +134,3 @@ }; }; }; - diff --git a/arch/arm64/boot/dts/rockchip/rk3568.dtsi b/arch/arm64/boot/dts/rockchip/rk3568.dtsi index ba67b58f05b7..f1be76a54ceb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3568.dtsi @@ -94,9 +94,10 @@ power-domains = <&power RK3568_PD_PIPE>; reg = <0x3 0xc0400000 0x0 0x00400000>, <0x0 0xfe270000 0x0 0x00010000>, - <0x3 0x7f000000 0x0 0x01000000>; - ranges = <0x01000000 0x0 0x3ef00000 0x3 0x7ef00000 0x0 0x00100000>, - <0x02000000 0x0 0x00000000 0x3 0x40000000 0x0 0x3ef00000>; + <0x0 0xf2000000 0x0 0x00100000>; + ranges = <0x01000000 0x0 0xf2100000 0x0 0xf2100000 0x0 0x00100000>, + <0x02000000 0x0 0xf2200000 0x0 0xf2200000 0x0 0x01e00000>, + <0x03000000 0x0 0x40000000 0x3 0x40000000 0x0 0x40000000>; reg-names = "dbi", "apb", "config"; resets = <&cru SRST_PCIE30X1_POWERUP>; reset-names = "pipe"; @@ -146,9 +147,10 @@ power-domains = <&power RK3568_PD_PIPE>; reg = <0x3 0xc0800000 0x0 0x00400000>, <0x0 0xfe280000 0x0 0x00010000>, - <0x3 0xbf000000 0x0 0x01000000>; - ranges = <0x01000000 0x0 0x3ef00000 0x3 0xbef00000 0x0 0x00100000>, - <0x02000000 0x0 0x00000000 0x3 0x80000000 0x0 0x3ef00000>; + <0x0 0xf0000000 0x0 0x00100000>; + ranges = <0x01000000 0x0 0xf0100000 0x0 0xf0100000 0x0 0x00100000>, + <0x02000000 0x0 0xf0200000 0x0 0xf0200000 0x0 0x01e00000>, + <0x03000000 0x0 0x40000000 0x3 0x80000000 0x0 0x40000000>; reg-names = "dbi", "apb", "config"; resets = <&cru SRST_PCIE30X2_POWERUP>; reset-names = "pipe"; diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi index f62e0fd881a9..61680c7ac489 100644 --- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi @@ -952,7 +952,7 @@ compatible = "rockchip,rk3568-pcie"; reg = <0x3 0xc0000000 0x0 0x00400000>, <0x0 0xfe260000 0x0 0x00010000>, - <0x3 0x3f000000 0x0 0x01000000>; + <0x0 0xf4000000 0x0 0x00100000>; reg-names = "dbi", "apb", "config"; interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>, @@ -982,8 +982,9 @@ phys = <&combphy2 PHY_TYPE_PCIE>; phy-names = "pcie-phy"; power-domains = <&power RK3568_PD_PIPE>; - ranges = <0x01000000 0x0 0x3ef00000 0x3 0x3ef00000 0x0 0x00100000 - 0x02000000 0x0 0x00000000 0x3 0x00000000 0x0 0x3ef00000>; + ranges = <0x01000000 0x0 0xf4100000 0x0 0xf4100000 0x0 0x00100000>, + <0x02000000 0x0 0xf4200000 0x0 0xf4200000 0x0 0x01e00000>, + <0x03000000 0x0 0x40000000 0x3 0x00000000 0x0 0x40000000>; resets = <&cru SRST_PCIE20_POWERUP>; reset-names = "pipe"; #address-cells = <3>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi index 657c019d27fa..a3124bd2e092 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi @@ -229,6 +229,7 @@ cache-line-size = <64>; cache-sets = <512>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -238,6 +239,7 @@ cache-line-size = <64>; cache-sets = <512>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -247,6 +249,7 @@ cache-line-size = <64>; cache-sets = <512>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -256,6 +259,7 @@ cache-line-size = <64>; cache-sets = <512>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -265,6 +269,7 @@ cache-line-size = <64>; cache-sets = <1024>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -274,6 +279,7 @@ cache-line-size = <64>; cache-sets = <1024>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -283,6 +289,7 @@ cache-line-size = <64>; cache-sets = <1024>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -292,6 +299,7 @@ cache-line-size = <64>; cache-sets = <1024>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -301,6 +309,7 @@ cache-line-size = <64>; cache-sets = <4096>; cache-level = <3>; + cache-unified; }; }; diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c index a406454578f0..f1b8a04ee9f2 100644 --- a/arch/arm64/hyperv/mshyperv.c +++ b/arch/arm64/hyperv/mshyperv.c @@ -67,7 +67,7 @@ static int __init hyperv_init(void) if (ret) return ret; - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm64/hyperv_init:online", + ret = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "arm64/hyperv_init:online", hv_common_cpu_init, hv_common_cpu_die); if (ret < 0) { hv_common_free(); diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index bdf1f6bcd010..94b486192e1f 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -23,17 +23,17 @@ #include <linux/stringify.h> -#define ALTINSTR_ENTRY(feature) \ +#define ALTINSTR_ENTRY(cpucap) \ " .word 661b - .\n" /* label */ \ " .word 663f - .\n" /* new instruction */ \ - " .hword " __stringify(feature) "\n" /* feature bit */ \ + " .hword " __stringify(cpucap) "\n" /* cpucap */ \ " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ -#define ALTINSTR_ENTRY_CB(feature, cb) \ +#define ALTINSTR_ENTRY_CB(cpucap, cb) \ " .word 661b - .\n" /* label */ \ - " .word " __stringify(cb) "- .\n" /* callback */ \ - " .hword " __stringify(feature) "\n" /* feature bit */ \ + " .word " __stringify(cb) "- .\n" /* callback */ \ + " .hword " __stringify(cpucap) "\n" /* cpucap */ \ " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ @@ -53,13 +53,13 @@ * * Alternatives with callbacks do not generate replacement instructions. */ -#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ +#define __ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, cfg_enabled) \ ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feature) \ + ALTINSTR_ENTRY(cpucap) \ ".popsection\n" \ ".subsection 1\n" \ "663:\n\t" \ @@ -70,31 +70,31 @@ ".previous\n" \ ".endif\n" -#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \ +#define __ALTERNATIVE_CFG_CB(oldinstr, cpucap, cfg_enabled, cb) \ ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY_CB(feature, cb) \ + ALTINSTR_ENTRY_CB(cpucap, cb) \ ".popsection\n" \ "663:\n\t" \ "664:\n\t" \ ".endif\n" -#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ - __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) +#define _ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, cfg, ...) \ + __ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, IS_ENABLED(cfg)) -#define ALTERNATIVE_CB(oldinstr, feature, cb) \ - __ALTERNATIVE_CFG_CB(oldinstr, (1 << ARM64_CB_SHIFT) | (feature), 1, cb) +#define ALTERNATIVE_CB(oldinstr, cpucap, cb) \ + __ALTERNATIVE_CFG_CB(oldinstr, (1 << ARM64_CB_SHIFT) | (cpucap), 1, cb) #else #include <asm/assembler.h> -.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len +.macro altinstruction_entry orig_offset alt_offset cpucap orig_len alt_len .word \orig_offset - . .word \alt_offset - . - .hword (\feature) + .hword (\cpucap) .byte \orig_len .byte \alt_len .endm @@ -210,9 +210,9 @@ alternative_endif #endif /* __ASSEMBLY__ */ /* - * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature)); + * Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap)); * - * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO)); + * Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap, CONFIG_FOO)); * N.B. If CONFIG_FOO is specified, but not selected, the whole block * will be omitted, including oldinstr. */ @@ -224,15 +224,15 @@ alternative_endif #include <linux/types.h> static __always_inline bool -alternative_has_feature_likely(const unsigned long feature) +alternative_has_cap_likely(const unsigned long cpucap) { - compiletime_assert(feature < ARM64_NCAPS, - "feature must be < ARM64_NCAPS"); + compiletime_assert(cpucap < ARM64_NCAPS, + "cpucap must be < ARM64_NCAPS"); asm_volatile_goto( - ALTERNATIVE_CB("b %l[l_no]", %[feature], alt_cb_patch_nops) + ALTERNATIVE_CB("b %l[l_no]", %[cpucap], alt_cb_patch_nops) : - : [feature] "i" (feature) + : [cpucap] "i" (cpucap) : : l_no); @@ -242,15 +242,15 @@ l_no: } static __always_inline bool -alternative_has_feature_unlikely(const unsigned long feature) +alternative_has_cap_unlikely(const unsigned long cpucap) { - compiletime_assert(feature < ARM64_NCAPS, - "feature must be < ARM64_NCAPS"); + compiletime_assert(cpucap < ARM64_NCAPS, + "cpucap must be < ARM64_NCAPS"); asm_volatile_goto( - ALTERNATIVE("nop", "b %l[l_yes]", %[feature]) + ALTERNATIVE("nop", "b %l[l_yes]", %[cpucap]) : - : [feature] "i" (feature) + : [cpucap] "i" (cpucap) : : l_yes); diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index a38b92e11811..00d97b8a757f 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -13,7 +13,7 @@ struct alt_instr { s32 orig_offset; /* offset to original instruction */ s32 alt_offset; /* offset to replacement instruction */ - u16 cpufeature; /* cpufeature bit set for replacement */ + u16 cpucap; /* cpucap bit set for replacement */ u8 orig_len; /* size of original instruction(s) */ u8 alt_len; /* size of new instruction(s), <= orig_len */ }; @@ -23,7 +23,7 @@ typedef void (*alternative_cb_t)(struct alt_instr *alt, void __init apply_boot_alternatives(void); void __init apply_alternatives_all(void); -bool alternative_is_applied(u16 cpufeature); +bool alternative_is_applied(u16 cpucap); #ifdef CONFIG_MODULES void apply_alternatives_module(void *start, size_t length); @@ -31,5 +31,8 @@ void apply_alternatives_module(void *start, size_t length); static inline void apply_alternatives_module(void *start, size_t length) { } #endif +void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ALTERNATIVE_H */ diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index af1fafbe7e1d..934c658ee947 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -88,13 +88,7 @@ static inline notrace u64 arch_timer_read_cntvct_el0(void) #define arch_timer_reg_read_stable(reg) \ ({ \ - u64 _val; \ - \ - preempt_disable_notrace(); \ - _val = erratum_handler(read_ ## reg)(); \ - preempt_enable_notrace(); \ - \ - _val; \ + erratum_handler(read_ ## reg)(); \ }) /* diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h index 2f5f3da34782..b0abc64f86b0 100644 --- a/arch/arm64/include/asm/archrandom.h +++ b/arch/arm64/include/asm/archrandom.h @@ -129,4 +129,6 @@ static inline bool __init __early_cpu_has_rndr(void) return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf; } +u64 kaslr_early_init(void *fdt); + #endif /* _ASM_ARCHRANDOM_H */ diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h index d6b51deb7bf0..18dc2fb3d7b7 100644 --- a/arch/arm64/include/asm/arm_pmuv3.h +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -13,7 +13,7 @@ #define RETURN_READ_PMEVCNTRN(n) \ return read_sysreg(pmevcntr##n##_el0) -static unsigned long read_pmevcntrn(int n) +static inline unsigned long read_pmevcntrn(int n) { PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); return 0; @@ -21,14 +21,14 @@ static unsigned long read_pmevcntrn(int n) #define WRITE_PMEVCNTRN(n) \ write_sysreg(val, pmevcntr##n##_el0) -static void write_pmevcntrn(int n, unsigned long val) +static inline void write_pmevcntrn(int n, unsigned long val) { PMEVN_SWITCH(n, WRITE_PMEVCNTRN); } #define WRITE_PMEVTYPERN(n) \ write_sysreg(val, pmevtyper##n##_el0) -static void write_pmevtypern(int n, unsigned long val) +static inline void write_pmevtypern(int n, unsigned long val) { PMEVN_SWITCH(n, WRITE_PMEVTYPERN); } diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index 75b211c98dea..5b6efe8abeeb 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -18,7 +18,6 @@ bic \tmp1, \tmp1, #TTBR_ASID_MASK sub \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET // reserved_pg_dir msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 - isb add \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET msr ttbr1_el1, \tmp1 // set reserved ASID isb @@ -31,7 +30,6 @@ extr \tmp2, \tmp2, \tmp1, #48 ror \tmp2, \tmp2, #16 msr ttbr1_el1, \tmp2 // set the active ASID - isb msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1 isb .endm diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index c9979273d389..400d279e0f8d 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -142,24 +142,6 @@ static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v) #define arch_atomic_fetch_xor_release arch_atomic_fetch_xor_release #define arch_atomic_fetch_xor arch_atomic_fetch_xor -#define arch_atomic_xchg_relaxed(v, new) \ - arch_xchg_relaxed(&((v)->counter), (new)) -#define arch_atomic_xchg_acquire(v, new) \ - arch_xchg_acquire(&((v)->counter), (new)) -#define arch_atomic_xchg_release(v, new) \ - arch_xchg_release(&((v)->counter), (new)) -#define arch_atomic_xchg(v, new) \ - arch_xchg(&((v)->counter), (new)) - -#define arch_atomic_cmpxchg_relaxed(v, old, new) \ - arch_cmpxchg_relaxed(&((v)->counter), (old), (new)) -#define arch_atomic_cmpxchg_acquire(v, old, new) \ - arch_cmpxchg_acquire(&((v)->counter), (old), (new)) -#define arch_atomic_cmpxchg_release(v, old, new) \ - arch_cmpxchg_release(&((v)->counter), (old), (new)) -#define arch_atomic_cmpxchg(v, old, new) \ - arch_cmpxchg(&((v)->counter), (old), (new)) - #define arch_atomic_andnot arch_atomic_andnot /* @@ -209,16 +191,6 @@ static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v) #define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor_release #define arch_atomic64_fetch_xor arch_atomic64_fetch_xor -#define arch_atomic64_xchg_relaxed arch_atomic_xchg_relaxed -#define arch_atomic64_xchg_acquire arch_atomic_xchg_acquire -#define arch_atomic64_xchg_release arch_atomic_xchg_release -#define arch_atomic64_xchg arch_atomic_xchg - -#define arch_atomic64_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed -#define arch_atomic64_cmpxchg_acquire arch_atomic_cmpxchg_acquire -#define arch_atomic64_cmpxchg_release arch_atomic_cmpxchg_release -#define arch_atomic64_cmpxchg arch_atomic_cmpxchg - #define arch_atomic64_andnot arch_atomic64_andnot #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index cbb3d961123b..89d2ba272359 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -294,38 +294,46 @@ __CMPXCHG_CASE( , , mb_, 64, dmb ish, , l, "memory", L) #undef __CMPXCHG_CASE -#define __CMPXCHG_DBL(name, mb, rel, cl) \ -static __always_inline long \ -__ll_sc__cmpxchg_double##name(unsigned long old1, \ - unsigned long old2, \ - unsigned long new1, \ - unsigned long new2, \ - volatile void *ptr) \ +union __u128_halves { + u128 full; + struct { + u64 low, high; + }; +}; + +#define __CMPXCHG128(name, mb, rel, cl...) \ +static __always_inline u128 \ +__ll_sc__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new) \ { \ - unsigned long tmp, ret; \ + union __u128_halves r, o = { .full = (old) }, \ + n = { .full = (new) }; \ + unsigned int tmp; \ \ - asm volatile("// __cmpxchg_double" #name "\n" \ - " prfm pstl1strm, %2\n" \ - "1: ldxp %0, %1, %2\n" \ - " eor %0, %0, %3\n" \ - " eor %1, %1, %4\n" \ - " orr %1, %0, %1\n" \ - " cbnz %1, 2f\n" \ - " st" #rel "xp %w0, %5, %6, %2\n" \ - " cbnz %w0, 1b\n" \ + asm volatile("// __cmpxchg128" #name "\n" \ + " prfm pstl1strm, %[v]\n" \ + "1: ldxp %[rl], %[rh], %[v]\n" \ + " cmp %[rl], %[ol]\n" \ + " ccmp %[rh], %[oh], 0, eq\n" \ + " b.ne 2f\n" \ + " st" #rel "xp %w[tmp], %[nl], %[nh], %[v]\n" \ + " cbnz %w[tmp], 1b\n" \ " " #mb "\n" \ "2:" \ - : "=&r" (tmp), "=&r" (ret), "+Q" (*(__uint128_t *)ptr) \ - : "r" (old1), "r" (old2), "r" (new1), "r" (new2) \ - : cl); \ + : [v] "+Q" (*(u128 *)ptr), \ + [rl] "=&r" (r.low), [rh] "=&r" (r.high), \ + [tmp] "=&r" (tmp) \ + : [ol] "r" (o.low), [oh] "r" (o.high), \ + [nl] "r" (n.low), [nh] "r" (n.high) \ + : "cc", ##cl); \ \ - return ret; \ + return r.full; \ } -__CMPXCHG_DBL( , , , ) -__CMPXCHG_DBL(_mb, dmb ish, l, "memory") +__CMPXCHG128( , , ) +__CMPXCHG128(_mb, dmb ish, l, "memory") + +#undef __CMPXCHG128 -#undef __CMPXCHG_DBL #undef K #endif /* __ASM_ATOMIC_LL_SC_H */ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 319958b95cfd..87f568a94e55 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -281,40 +281,35 @@ __CMPXCHG_CASE(x, , mb_, 64, al, "memory") #undef __CMPXCHG_CASE -#define __CMPXCHG_DBL(name, mb, cl...) \ -static __always_inline long \ -__lse__cmpxchg_double##name(unsigned long old1, \ - unsigned long old2, \ - unsigned long new1, \ - unsigned long new2, \ - volatile void *ptr) \ +#define __CMPXCHG128(name, mb, cl...) \ +static __always_inline u128 \ +__lse__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new) \ { \ - unsigned long oldval1 = old1; \ - unsigned long oldval2 = old2; \ - register unsigned long x0 asm ("x0") = old1; \ - register unsigned long x1 asm ("x1") = old2; \ - register unsigned long x2 asm ("x2") = new1; \ - register unsigned long x3 asm ("x3") = new2; \ + union __u128_halves r, o = { .full = (old) }, \ + n = { .full = (new) }; \ + register unsigned long x0 asm ("x0") = o.low; \ + register unsigned long x1 asm ("x1") = o.high; \ + register unsigned long x2 asm ("x2") = n.low; \ + register unsigned long x3 asm ("x3") = n.high; \ register unsigned long x4 asm ("x4") = (unsigned long)ptr; \ \ asm volatile( \ __LSE_PREAMBLE \ " casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\ - " eor %[old1], %[old1], %[oldval1]\n" \ - " eor %[old2], %[old2], %[oldval2]\n" \ - " orr %[old1], %[old1], %[old2]" \ : [old1] "+&r" (x0), [old2] "+&r" (x1), \ - [v] "+Q" (*(__uint128_t *)ptr) \ + [v] "+Q" (*(u128 *)ptr) \ : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \ - [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \ + [oldval1] "r" (o.low), [oldval2] "r" (o.high) \ : cl); \ \ - return x0; \ + r.low = x0; r.high = x1; \ + \ + return r.full; \ } -__CMPXCHG_DBL( , ) -__CMPXCHG_DBL(_mb, al, "memory") +__CMPXCHG128( , ) +__CMPXCHG128(_mb, al, "memory") -#undef __CMPXCHG_DBL +#undef __CMPXCHG128 #endif /* __ASM_ATOMIC_LSE_H */ diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index c6bc5d8ec3ca..d7a540736741 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -130,21 +130,18 @@ __CMPXCHG_CASE(mb_, 64) #undef __CMPXCHG_CASE -#define __CMPXCHG_DBL(name) \ -static inline long __cmpxchg_double##name(unsigned long old1, \ - unsigned long old2, \ - unsigned long new1, \ - unsigned long new2, \ - volatile void *ptr) \ +#define __CMPXCHG128(name) \ +static inline u128 __cmpxchg128##name(volatile u128 *ptr, \ + u128 old, u128 new) \ { \ - return __lse_ll_sc_body(_cmpxchg_double##name, \ - old1, old2, new1, new2, ptr); \ + return __lse_ll_sc_body(_cmpxchg128##name, \ + ptr, old, new); \ } -__CMPXCHG_DBL( ) -__CMPXCHG_DBL(_mb) +__CMPXCHG128( ) +__CMPXCHG128(_mb) -#undef __CMPXCHG_DBL +#undef __CMPXCHG128 #define __CMPXCHG_GEN(sfx) \ static __always_inline unsigned long __cmpxchg##sfx(volatile void *ptr, \ @@ -198,34 +195,17 @@ __CMPXCHG_GEN(_mb) #define arch_cmpxchg64 arch_cmpxchg #define arch_cmpxchg64_local arch_cmpxchg_local -/* cmpxchg_double */ -#define system_has_cmpxchg_double() 1 - -#define __cmpxchg_double_check(ptr1, ptr2) \ -({ \ - if (sizeof(*(ptr1)) != 8) \ - BUILD_BUG(); \ - VM_BUG_ON((unsigned long *)(ptr2) - (unsigned long *)(ptr1) != 1); \ -}) +/* cmpxchg128 */ +#define system_has_cmpxchg128() 1 -#define arch_cmpxchg_double(ptr1, ptr2, o1, o2, n1, n2) \ +#define arch_cmpxchg128(ptr, o, n) \ ({ \ - int __ret; \ - __cmpxchg_double_check(ptr1, ptr2); \ - __ret = !__cmpxchg_double_mb((unsigned long)(o1), (unsigned long)(o2), \ - (unsigned long)(n1), (unsigned long)(n2), \ - ptr1); \ - __ret; \ + __cmpxchg128_mb((ptr), (o), (n)); \ }) -#define arch_cmpxchg_double_local(ptr1, ptr2, o1, o2, n1, n2) \ +#define arch_cmpxchg128_local(ptr, o, n) \ ({ \ - int __ret; \ - __cmpxchg_double_check(ptr1, ptr2); \ - __ret = !__cmpxchg_double((unsigned long)(o1), (unsigned long)(o2), \ - (unsigned long)(n1), (unsigned long)(n2), \ - ptr1); \ - __ret; \ + __cmpxchg128((ptr), (o), (n)); \ }) #define __CMPWAIT_CASE(w, sfx, sz) \ diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 74575c3d6987..ae904a1ad529 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -96,6 +96,8 @@ static inline int is_compat_thread(struct thread_info *thread) return test_ti_thread_flag(thread, TIF_32BIT); } +long compat_arm_syscall(struct pt_regs *regs, int scno); + #else /* !CONFIG_COMPAT */ static inline int is_compat_thread(struct thread_info *thread) diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index fd7a92219eea..e749838b9c5d 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -56,6 +56,7 @@ struct cpuinfo_arm64 { u64 reg_id_aa64mmfr0; u64 reg_id_aa64mmfr1; u64 reg_id_aa64mmfr2; + u64 reg_id_aa64mmfr3; u64 reg_id_aa64pfr0; u64 reg_id_aa64pfr1; u64 reg_id_aa64zfr0; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 6bf013fb110d..7a95c324e52a 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -107,7 +107,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; * CPU capabilities: * * We use arm64_cpu_capabilities to represent system features, errata work - * arounds (both used internally by kernel and tracked in cpu_hwcaps) and + * arounds (both used internally by kernel and tracked in system_cpucaps) and * ELF HWCAPs (which are exposed to user). * * To support systems with heterogeneous CPUs, we need to make sure that we @@ -419,12 +419,12 @@ static __always_inline bool is_hyp_code(void) return is_vhe_hyp_code() || is_nvhe_hyp_code(); } -extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); +extern DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS); -extern DECLARE_BITMAP(boot_capabilities, ARM64_NCAPS); +extern DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS); #define for_each_available_cap(cap) \ - for_each_set_bit(cap, cpu_hwcaps, ARM64_NCAPS) + for_each_set_bit(cap, system_cpucaps, ARM64_NCAPS) bool this_cpu_has_cap(unsigned int cap); void cpu_set_feature(unsigned int num); @@ -437,7 +437,7 @@ unsigned long cpu_get_elf_hwcap2(void); static __always_inline bool system_capabilities_finalized(void) { - return alternative_has_feature_likely(ARM64_ALWAYS_SYSTEM); + return alternative_has_cap_likely(ARM64_ALWAYS_SYSTEM); } /* @@ -449,7 +449,7 @@ static __always_inline bool cpus_have_cap(unsigned int num) { if (num >= ARM64_NCAPS) return false; - return arch_test_bit(num, cpu_hwcaps); + return arch_test_bit(num, system_cpucaps); } /* @@ -464,7 +464,7 @@ static __always_inline bool __cpus_have_const_cap(int num) { if (num >= ARM64_NCAPS) return false; - return alternative_has_feature_unlikely(num); + return alternative_has_cap_unlikely(num); } /* @@ -504,16 +504,6 @@ static __always_inline bool cpus_have_const_cap(int num) return cpus_have_cap(num); } -static inline void cpus_set_cap(unsigned int num) -{ - if (num >= ARM64_NCAPS) { - pr_warn("Attempt to set an illegal CPU capability (%d >= %d)\n", - num, ARM64_NCAPS); - } else { - __set_bit(num, cpu_hwcaps); - } -} - static inline int __attribute_const__ cpuid_feature_extract_signed_field_width(u64 features, int field, int width) { diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 683ca3af4084..5f6f84837a49 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -126,6 +126,10 @@ #define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029 #define APPLE_CPU_PART_M2_BLIZZARD 0x032 #define APPLE_CPU_PART_M2_AVALANCHE 0x033 +#define APPLE_CPU_PART_M2_BLIZZARD_PRO 0x034 +#define APPLE_CPU_PART_M2_AVALANCHE_PRO 0x035 +#define APPLE_CPU_PART_M2_BLIZZARD_MAX 0x038 +#define APPLE_CPU_PART_M2_AVALANCHE_MAX 0x039 #define AMPERE_CPU_PART_AMPERE1 0xAC3 @@ -181,6 +185,10 @@ #define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX) #define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD) #define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE) +#define MIDR_APPLE_M2_BLIZZARD_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_PRO) +#define MIDR_APPLE_M2_AVALANCHE_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_PRO) +#define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX) +#define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index f86b157a5da3..ef46f2daca62 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -166,4 +166,6 @@ static inline void efi_capsule_flush_cache_range(void *addr, int size) dcache_clean_inval_poc((unsigned long)addr, (unsigned long)addr + size); } +efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f); + #endif /* _ASM_EFI_H */ diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 037724b19c5c..f4c3d30bf746 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -22,6 +22,15 @@ isb .endm +.macro __init_el2_hcrx + mrs x0, id_aa64mmfr1_el1 + ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 + cbz x0, .Lskip_hcrx_\@ + mov_q x0, HCRX_HOST_FLAGS + msr_s SYS_HCRX_EL2, x0 +.Lskip_hcrx_\@: +.endm + /* * Allow Non-secure EL1 and EL0 to access physical timer and counter. * This is not necessary for VHE, since the host kernel runs in EL2, @@ -69,7 +78,7 @@ cbz x0, .Lskip_trace_\@ // Skip if TraceBuffer is not present mrs_s x0, SYS_TRBIDR_EL1 - and x0, x0, TRBIDR_PROG + and x0, x0, TRBIDR_EL1_P cbnz x0, .Lskip_trace_\@ // If TRBE is available at EL2 mov x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT) @@ -150,12 +159,21 @@ mov x0, xzr mrs x1, id_aa64pfr1_el1 ubfx x1, x1, #ID_AA64PFR1_EL1_SME_SHIFT, #4 - cbz x1, .Lset_fgt_\@ + cbz x1, .Lset_pie_fgt_\@ /* Disable nVHE traps of TPIDR2 and SMPRI */ orr x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK orr x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK +.Lset_pie_fgt_\@: + mrs_s x1, SYS_ID_AA64MMFR3_EL1 + ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4 + cbz x1, .Lset_fgt_\@ + + /* Disable trapping of PIR_EL1 / PIRE0_EL1 */ + orr x0, x0, #HFGxTR_EL2_nPIR_EL1 + orr x0, x0, #HFGxTR_EL2_nPIRE0_EL1 + .Lset_fgt_\@: msr_s SYS_HFGRTR_EL2, x0 msr_s SYS_HFGWTR_EL2, x0 @@ -184,6 +202,7 @@ */ .macro init_el2_state __init_el2_sctlr + __init_el2_hcrx __init_el2_timers __init_el2_debug __init_el2_lor @@ -284,14 +303,6 @@ cbz x1, .Lskip_sme_\@ msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal - - mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present? - ubfx x1, x1, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 - cbz x1, .Lskip_sme_\@ - - mrs_s x1, SYS_HCRX_EL2 - orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping - msr_s SYS_HCRX_EL2, x1 .Lskip_sme_\@: .endm diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 8487aec9b658..ae35939f395b 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -47,7 +47,7 @@ #define ESR_ELx_EC_DABT_LOW (0x24) #define ESR_ELx_EC_DABT_CUR (0x25) #define ESR_ELx_EC_SP_ALIGN (0x26) -/* Unallocated EC: 0x27 */ +#define ESR_ELx_EC_MOPS (0x27) #define ESR_ELx_EC_FP_EXC32 (0x28) /* Unallocated EC: 0x29 - 0x2B */ #define ESR_ELx_EC_FP_EXC64 (0x2C) @@ -75,8 +75,11 @@ #define ESR_ELx_IL_SHIFT (25) #define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT) -#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) +#define ESR_ELx_ISS_MASK (GENMASK(24, 0)) #define ESR_ELx_ISS(esr) ((esr) & ESR_ELx_ISS_MASK) +#define ESR_ELx_ISS2_SHIFT (32) +#define ESR_ELx_ISS2_MASK (GENMASK_ULL(55, 32)) +#define ESR_ELx_ISS2(esr) (((esr) & ESR_ELx_ISS2_MASK) >> ESR_ELx_ISS2_SHIFT) /* ISS field definitions shared by different classes */ #define ESR_ELx_WNR_SHIFT (6) @@ -140,6 +143,20 @@ #define ESR_ELx_CM_SHIFT (8) #define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT) +/* ISS2 field definitions for Data Aborts */ +#define ESR_ELx_TnD_SHIFT (10) +#define ESR_ELx_TnD (UL(1) << ESR_ELx_TnD_SHIFT) +#define ESR_ELx_TagAccess_SHIFT (9) +#define ESR_ELx_TagAccess (UL(1) << ESR_ELx_TagAccess_SHIFT) +#define ESR_ELx_GCS_SHIFT (8) +#define ESR_ELx_GCS (UL(1) << ESR_ELx_GCS_SHIFT) +#define ESR_ELx_Overlay_SHIFT (6) +#define ESR_ELx_Overlay (UL(1) << ESR_ELx_Overlay_SHIFT) +#define ESR_ELx_DirtyBit_SHIFT (5) +#define ESR_ELx_DirtyBit (UL(1) << ESR_ELx_DirtyBit_SHIFT) +#define ESR_ELx_Xs_SHIFT (0) +#define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0)) + /* ISS field definitions for exceptions taken in to Hyp */ #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) @@ -356,6 +373,15 @@ #define ESR_ELx_SME_ISS_ZA_DISABLED 3 #define ESR_ELx_SME_ISS_ZT_DISABLED 4 +/* ISS field definitions for MOPS exceptions */ +#define ESR_ELx_MOPS_ISS_MEM_INST (UL(1) << 24) +#define ESR_ELx_MOPS_ISS_FROM_EPILOGUE (UL(1) << 18) +#define ESR_ELx_MOPS_ISS_WRONG_OPTION (UL(1) << 17) +#define ESR_ELx_MOPS_ISS_OPTION_A (UL(1) << 16) +#define ESR_ELx_MOPS_ISS_DESTREG(esr) (((esr) & (UL(0x1f) << 10)) >> 10) +#define ESR_ELx_MOPS_ISS_SRCREG(esr) (((esr) & (UL(0x1f) << 5)) >> 5) +#define ESR_ELx_MOPS_ISS_SIZEREG(esr) (((esr) & (UL(0x1f) << 0)) >> 0) + #ifndef __ASSEMBLY__ #include <asm/types.h> diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index e73af709cb7a..ad688e157c9b 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -8,16 +8,11 @@ #define __ASM_EXCEPTION_H #include <asm/esr.h> -#include <asm/kprobes.h> #include <asm/ptrace.h> #include <linux/interrupt.h> -#ifdef CONFIG_FUNCTION_GRAPH_TRACER #define __exception_irq_entry __irq_entry -#else -#define __exception_irq_entry __kprobes -#endif static inline unsigned long disr_to_esr(u64 disr) { @@ -77,6 +72,7 @@ void do_el0_svc(struct pt_regs *regs); void do_el0_svc_compat(struct pt_regs *regs); void do_el0_fpac(struct pt_regs *regs, unsigned long esr); void do_el1_fpac(struct pt_regs *regs, unsigned long esr); +void do_el0_mops(struct pt_regs *regs, unsigned long esr); void do_serror(struct pt_regs *regs, unsigned long esr); void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags); diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index fa4c6ff3aa9b..84055329cd8b 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -154,4 +154,12 @@ static inline int get_num_wrps(void) ID_AA64DFR0_EL1_WRPs_SHIFT); } +#ifdef CONFIG_CPU_PM +extern void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)); +#else +static inline void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)) +{ +} +#endif + #endif /* __ASM_BREAKPOINT_H */ diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 5d45f19fda7f..692b1ec663b2 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -137,6 +137,7 @@ #define KERNEL_HWCAP_SME_BI32I32 __khwcap2_feature(SME_BI32I32) #define KERNEL_HWCAP_SME_B16B16 __khwcap2_feature(SME_B16B16) #define KERNEL_HWCAP_SME_F16F16 __khwcap2_feature(SME_F16F16) +#define KERNEL_HWCAP_MOPS __khwcap2_feature(MOPS) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 877495a0fd0c..51d92abf945e 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -22,13 +22,13 @@ * Generic IO read/write. These perform native-endian accesses. */ #define __raw_writeb __raw_writeb -static inline void __raw_writeb(u8 val, volatile void __iomem *addr) +static __always_inline void __raw_writeb(u8 val, volatile void __iomem *addr) { asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr)); } #define __raw_writew __raw_writew -static inline void __raw_writew(u16 val, volatile void __iomem *addr) +static __always_inline void __raw_writew(u16 val, volatile void __iomem *addr) { asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr)); } @@ -40,13 +40,13 @@ static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr) } #define __raw_writeq __raw_writeq -static inline void __raw_writeq(u64 val, volatile void __iomem *addr) +static __always_inline void __raw_writeq(u64 val, volatile void __iomem *addr) { asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr)); } #define __raw_readb __raw_readb -static inline u8 __raw_readb(const volatile void __iomem *addr) +static __always_inline u8 __raw_readb(const volatile void __iomem *addr) { u8 val; asm volatile(ALTERNATIVE("ldrb %w0, [%1]", @@ -57,7 +57,7 @@ static inline u8 __raw_readb(const volatile void __iomem *addr) } #define __raw_readw __raw_readw -static inline u16 __raw_readw(const volatile void __iomem *addr) +static __always_inline u16 __raw_readw(const volatile void __iomem *addr) { u16 val; @@ -80,7 +80,7 @@ static __always_inline u32 __raw_readl(const volatile void __iomem *addr) } #define __raw_readq __raw_readq -static inline u64 __raw_readq(const volatile void __iomem *addr) +static __always_inline u64 __raw_readq(const volatile void __iomem *addr) { u64 val; asm volatile(ALTERNATIVE("ldr %0, [%1]", diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index e0f5f6b73edd..1f31ec146d16 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -24,7 +24,7 @@ static __always_inline bool __irqflags_uses_pmr(void) { return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && - alternative_has_feature_unlikely(ARM64_HAS_GIC_PRIO_MASKING); + alternative_has_cap_unlikely(ARM64_HAS_GIC_PRIO_MASKING); } static __always_inline void __daif_local_irq_enable(void) diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 186dd7f85b14..577773870b66 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -107,14 +107,14 @@ /* * Initial memory map attributes. */ -#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) +#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_UXN) +#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PTE_UXN) #ifdef CONFIG_ARM64_4K_PAGES -#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) +#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS | PTE_WRITE) #define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY) #else -#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) +#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS | PTE_WRITE) #define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY) #endif diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index baef29fcbeee..c6e12e8f2751 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -9,6 +9,7 @@ #include <asm/esr.h> #include <asm/memory.h> +#include <asm/sysreg.h> #include <asm/types.h> /* Hyp Configuration Register (HCR) bits */ @@ -92,6 +93,9 @@ #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) +#define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME | HCRX_EL2_TCR2En) +#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En) + /* TCR_EL2 Registers bits */ #define TCR_EL2_RES1 ((1U << 31) | (1 << 23)) #define TCR_EL2_TBI (1 << 20) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 43c3bc0f9544..86042afa86c3 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -267,6 +267,24 @@ extern u64 __kvm_get_mdcr_el2(void); __kvm_at_err; \ } ) +void __noreturn hyp_panic(void); +asmlinkage void kvm_unexpected_el2_exception(void); +asmlinkage void __noreturn hyp_panic(void); +asmlinkage void __noreturn hyp_panic_bad_stack(void); +asmlinkage void kvm_unexpected_el2_exception(void); +struct kvm_cpu_context; +void handle_trap(struct kvm_cpu_context *host_ctxt); +asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on); +void __noreturn __pkvm_init_finalise(void); +void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc); +void kvm_patch_vector_branch(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void kvm_get_kimage_voffset(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void kvm_compute_final_ctr_el0(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, + u64 elr_phys, u64 par, uintptr_t vcpu, u64 far, u64 hpfar); #else /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7e7e19ef6993..d48609d95423 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -279,6 +279,7 @@ enum vcpu_sysreg { TTBR0_EL1, /* Translation Table Base Register 0 */ TTBR1_EL1, /* Translation Table Base Register 1 */ TCR_EL1, /* Translation Control Register */ + TCR2_EL1, /* Extended Translation Control Register */ ESR_EL1, /* Exception Syndrome Register */ AFSR0_EL1, /* Auxiliary Fault Status Register 0 */ AFSR1_EL1, /* Auxiliary Fault Status Register 1 */ @@ -339,6 +340,10 @@ enum vcpu_sysreg { TFSR_EL1, /* Tag Fault Status Register (EL1) */ TFSRE0_EL1, /* Tag Fault Status Register (EL0) */ + /* Permission Indirection Extension registers */ + PIR_EL1, /* Permission Indirection Register 1 (EL1) */ + PIRE0_EL1, /* Permission Indirection Register 0 (EL1) */ + /* 32bit specific registers. */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ @@ -699,6 +704,8 @@ struct kvm_vcpu_arch { #define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4)) /* Software step state is Active-pending */ #define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5)) +/* PMUSERENR for the guest EL0 is on physical CPU */ +#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(6)) /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ @@ -1031,7 +1038,7 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); #define kvm_vcpu_os_lock_enabled(vcpu) \ - (!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & SYS_OSLSR_OSLK)) + (!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & OSLSR_EL1_OSLK)) int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); @@ -1065,9 +1072,14 @@ void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr); void kvm_clr_pmu_events(u32 clr); +bool kvm_set_pmuserenr(u64 val); #else static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} static inline void kvm_clr_pmu_events(u32 clr) {} +static inline bool kvm_set_pmuserenr(u64 val) +{ + return false; +} #endif void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 4cd6762bda80..93bd0975b15f 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -209,6 +209,7 @@ struct kvm_pgtable_visit_ctx { kvm_pte_t old; void *arg; struct kvm_pgtable_mm_ops *mm_ops; + u64 start; u64 addr; u64 end; u32 level; @@ -631,9 +632,9 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size); * * The walker will walk the page-table entries corresponding to the input * address range specified, visiting entries according to the walker flags. - * Invalid entries are treated as leaf entries. Leaf entries are reloaded - * after invoking the walker callback, allowing the walker to descend into - * a newly installed table. + * Invalid entries are treated as leaf entries. The visited page table entry is + * reloaded after invoking the walker callback, allowing the walker to descend + * into a newly installed table. * * Returning a negative error code from the walker callback function will * terminate the walk immediately with the same error code. diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index f99d74826a7e..cbbcdc35c4cd 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -18,7 +18,7 @@ static __always_inline bool system_uses_lse_atomics(void) { - return alternative_has_feature_likely(ARM64_HAS_LSE_ATOMICS); + return alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS); } #define __lse_ll_sc_body(op, ...) \ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index c735afdf639b..6e0e5722f229 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -46,7 +46,7 @@ #define KIMAGE_VADDR (MODULES_END) #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) #define MODULES_VADDR (_PAGE_END(VA_BITS_MIN)) -#define MODULES_VSIZE (SZ_128M) +#define MODULES_VSIZE (SZ_2G) #define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT))) #define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE) #define PCI_IO_END (VMEMMAP_START - SZ_8M) @@ -204,15 +204,17 @@ static inline unsigned long kaslr_offset(void) return kimage_vaddr - KIMAGE_VADDR; } +#ifdef CONFIG_RANDOMIZE_BASE +void kaslr_init(void); static inline bool kaslr_enabled(void) { - /* - * The KASLR offset modulo MIN_KIMG_ALIGN is taken from the physical - * placement of the image rather than from the seed, so a displacement - * of less than MIN_KIMG_ALIGN means that no seed was provided. - */ - return kaslr_offset() >= MIN_KIMG_ALIGN; + extern bool __kaslr_is_enabled; + return __kaslr_is_enabled; } +#else +static inline void kaslr_init(void) { } +static inline bool kaslr_enabled(void) { return false; } +#endif /* * Allow all memory at the discovery stage. We will clip it later. diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 56911691bef0..a6fb325424e7 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -39,11 +39,16 @@ static inline void contextidr_thread_switch(struct task_struct *next) /* * Set TTBR0 to reserved_pg_dir. No translations will be possible via TTBR0. */ -static inline void cpu_set_reserved_ttbr0(void) +static inline void cpu_set_reserved_ttbr0_nosync(void) { unsigned long ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir)); write_sysreg(ttbr, ttbr0_el1); +} + +static inline void cpu_set_reserved_ttbr0(void) +{ + cpu_set_reserved_ttbr0_nosync(); isb(); } @@ -52,7 +57,6 @@ void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) { BUG_ON(pgd == swapper_pg_dir); - cpu_set_reserved_ttbr0(); cpu_do_switch_mm(virt_to_phys(pgd),mm); } @@ -164,7 +168,7 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) * up (i.e. cpufeature framework is not up yet) and * latter only when we enable CNP via cpufeature's * enable() callback. - * Also we rely on the cpu_hwcap bit being set before + * Also we rely on the system_cpucaps bit being set before * calling the enable() function. */ ttbr1 |= TTBR_CNP_BIT; diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 18734fed3bdd..bfa6638b4c93 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -7,7 +7,6 @@ #include <asm-generic/module.h> -#ifdef CONFIG_ARM64_MODULE_PLTS struct mod_plt_sec { int plt_shndx; int plt_num_entries; @@ -21,7 +20,6 @@ struct mod_arch_specific { /* for CONFIG_DYNAMIC_FTRACE */ struct plt_entry *ftrace_trampolines; }; -#endif u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs, void *loc, const Elf64_Rela *rela, @@ -30,12 +28,6 @@ u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs, u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs, void *loc, u64 val); -#ifdef CONFIG_RANDOMIZE_BASE -extern u64 module_alloc_base; -#else -#define module_alloc_base ((u64)_etext - MODULES_VSIZE) -#endif - struct plt_entry { /* * A program that conforms to the AArch64 Procedure Call Standard diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h index dbba4b7559aa..b9ae8349e35d 100644 --- a/arch/arm64/include/asm/module.lds.h +++ b/arch/arm64/include/asm/module.lds.h @@ -1,9 +1,7 @@ SECTIONS { -#ifdef CONFIG_ARM64_MODULE_PLTS .plt 0 : { BYTE(0) } .init.plt 0 : { BYTE(0) } .text.ftrace_trampoline 0 : { BYTE(0) } -#endif #ifdef CONFIG_KASAN_SW_TAGS /* diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index b9ba19dbdb69..9abcc8ef3087 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -140,17 +140,11 @@ PERCPU_RET_OP(add, add, ldadd) * re-enabling preemption for preemptible kernels, but doing that in a way * which builds inside a module would mean messing directly with the preempt * count. If you do this, peterz and tglx will hunt you down. + * + * Not to mention it'll break the actual preemption model for missing a + * preemption point when TIF_NEED_RESCHED gets set while preemption is + * disabled. */ -#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \ -({ \ - int __ret; \ - preempt_disable_notrace(); \ - __ret = cmpxchg_double_local( raw_cpu_ptr(&(ptr1)), \ - raw_cpu_ptr(&(ptr2)), \ - o1, o2, n1, n2); \ - preempt_enable_notrace(); \ - __ret; \ -}) #define _pcp_protect(op, pcp, ...) \ ({ \ @@ -240,6 +234,22 @@ PERCPU_RET_OP(add, add, ldadd) #define this_cpu_cmpxchg_8(pcp, o, n) \ _pcp_protect_return(cmpxchg_relaxed, pcp, o, n) +#define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n) + +#define this_cpu_cmpxchg128(pcp, o, n) \ +({ \ + typedef typeof(pcp) pcp_op_T__; \ + u128 old__, new__, ret__; \ + pcp_op_T__ *ptr__; \ + old__ = o; \ + new__ = n; \ + preempt_disable_notrace(); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ + ret__ = cmpxchg128_local((void *)ptr__, old__, new__); \ + preempt_enable_notrace(); \ + ret__; \ +}) + #ifdef __KVM_NVHE_HYPERVISOR__ extern unsigned long __hyp_per_cpu_offset(unsigned int cpu); #define __per_cpu_offset diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index f658aafc47df..e4944d517c99 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -171,6 +171,14 @@ #define PTE_ATTRINDX_MASK (_AT(pteval_t, 7) << 2) /* + * PIIndex[3:0] encoding (Permission Indirection Extension) + */ +#define PTE_PI_IDX_0 6 /* AP[1], USER */ +#define PTE_PI_IDX_1 51 /* DBM */ +#define PTE_PI_IDX_2 53 /* PXN */ +#define PTE_PI_IDX_3 54 /* UXN */ + +/* * Memory Attribute override for Stage-2 (MemAttr[3:0]) */ #define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 9b165117a454..eed814b00a38 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -27,6 +27,40 @@ */ #define PMD_PRESENT_INVALID (_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */ +#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) + +#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) +#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) + +#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) +#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) +#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) +#define PROT_NORMAL_TAGGED (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED)) + +#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PTE_WRITE | PMD_ATTRINDX(MT_NORMAL)) +#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) + +#define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) + +#define _PAGE_KERNEL (PROT_NORMAL) +#define _PAGE_KERNEL_RO ((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) +#define _PAGE_KERNEL_ROX ((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY) +#define _PAGE_KERNEL_EXEC (PROT_NORMAL & ~PTE_PXN) +#define _PAGE_KERNEL_EXEC_CONT ((PROT_NORMAL & ~PTE_PXN) | PTE_CONT) + +#define _PAGE_SHARED (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define _PAGE_SHARED_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) +#define _PAGE_READONLY (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) +#define _PAGE_READONLY_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) +#define _PAGE_EXECONLY (_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) + +#ifdef __ASSEMBLY__ +#define PTE_MAYBE_NG 0 +#endif + #ifndef __ASSEMBLY__ #include <asm/cpufeature.h> @@ -34,9 +68,6 @@ extern bool arm64_use_ng_mappings; -#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) - #define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0) #define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0) @@ -50,26 +81,11 @@ extern bool arm64_use_ng_mappings; #define PTE_MAYBE_GP 0 #endif -#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) -#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) - -#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) -#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) -#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) -#define PROT_NORMAL_TAGGED (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED)) - -#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) -#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) - -#define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) - -#define PAGE_KERNEL __pgprot(PROT_NORMAL) -#define PAGE_KERNEL_RO __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) -#define PAGE_KERNEL_ROX __pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY) -#define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN) -#define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT) +#define PAGE_KERNEL __pgprot(_PAGE_KERNEL) +#define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL_RO) +#define PAGE_KERNEL_ROX __pgprot(_PAGE_KERNEL_ROX) +#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL_EXEC) +#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_KERNEL_EXEC_CONT) #define PAGE_S2_MEMATTR(attr, has_fwb) \ ({ \ @@ -83,12 +99,62 @@ extern bool arm64_use_ng_mappings; #define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) /* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */ -#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) -#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) -#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) -#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) -#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) +#define PAGE_SHARED __pgprot(_PAGE_SHARED) +#define PAGE_SHARED_EXEC __pgprot(_PAGE_SHARED_EXEC) +#define PAGE_READONLY __pgprot(_PAGE_READONLY) +#define PAGE_READONLY_EXEC __pgprot(_PAGE_READONLY_EXEC) +#define PAGE_EXECONLY __pgprot(_PAGE_EXECONLY) #endif /* __ASSEMBLY__ */ +#define pte_pi_index(pte) ( \ + ((pte & BIT(PTE_PI_IDX_3)) >> (PTE_PI_IDX_3 - 3)) | \ + ((pte & BIT(PTE_PI_IDX_2)) >> (PTE_PI_IDX_2 - 2)) | \ + ((pte & BIT(PTE_PI_IDX_1)) >> (PTE_PI_IDX_1 - 1)) | \ + ((pte & BIT(PTE_PI_IDX_0)) >> (PTE_PI_IDX_0 - 0))) + +/* + * Page types used via Permission Indirection Extension (PIE). PIE uses + * the USER, DBM, PXN and UXN bits to to generate an index which is used + * to look up the actual permission in PIR_ELx and PIRE0_EL1. We define + * combinations we use on non-PIE systems with the same encoding, for + * convenience these are listed here as comments as are the unallocated + * encodings. + */ + +/* 0: PAGE_DEFAULT */ +/* 1: PTE_USER */ +/* 2: PTE_WRITE */ +/* 3: PTE_WRITE | PTE_USER */ +/* 4: PAGE_EXECONLY PTE_PXN */ +/* 5: PAGE_READONLY_EXEC PTE_PXN | PTE_USER */ +/* 6: PTE_PXN | PTE_WRITE */ +/* 7: PAGE_SHARED_EXEC PTE_PXN | PTE_WRITE | PTE_USER */ +/* 8: PAGE_KERNEL_ROX PTE_UXN */ +/* 9: PTE_UXN | PTE_USER */ +/* a: PAGE_KERNEL_EXEC PTE_UXN | PTE_WRITE */ +/* b: PTE_UXN | PTE_WRITE | PTE_USER */ +/* c: PAGE_KERNEL_RO PTE_UXN | PTE_PXN */ +/* d: PAGE_READONLY PTE_UXN | PTE_PXN | PTE_USER */ +/* e: PAGE_KERNEL PTE_UXN | PTE_PXN | PTE_WRITE */ +/* f: PAGE_SHARED PTE_UXN | PTE_PXN | PTE_WRITE | PTE_USER */ + +#define PIE_E0 ( \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW)) + +#define PIE_E1 ( \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_R) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RW) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_ROX), PIE_RX) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_EXEC), PIE_RWX) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_RO), PIE_R) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL), PIE_RW)) + #endif /* __ASM_PGTABLE_PROT_H */ diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h index 13df982a0808..3fdae5fe3142 100644 --- a/arch/arm64/include/asm/scs.h +++ b/arch/arm64/include/asm/scs.h @@ -73,6 +73,7 @@ static inline void dynamic_scs_init(void) {} #endif int scs_patch(const u8 eh_frame[], int size); +asmlinkage void scs_patch_vmlinux(void); #endif /* __ASSEMBLY __ */ diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index f2d26235bfb4..9b31e6d0da17 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -99,7 +99,7 @@ static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask) extern int __cpu_disable(void); -extern void __cpu_die(unsigned int cpu); +static inline void __cpu_die(unsigned int cpu) { } extern void __noreturn cpu_die(void); extern void __noreturn cpu_die_early(void); diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index db7b371b367c..9cc501450486 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -100,5 +100,21 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int sco u8 spectre_bhb_loop_affected(int scope); void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr); + +void spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); +void smccc_patch_fw_mitigation_conduit(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); +void spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); +void spectre_bhb_patch_fw_mitigation_enabled(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); +void spectre_bhb_patch_loop_iter(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void spectre_bhb_patch_wa3(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void spectre_bhb_patch_clearbhb(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); + #endif /* __ASSEMBLY__ */ #endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h index d30217c21eff..17f687510c48 100644 --- a/arch/arm64/include/asm/syscall_wrapper.h +++ b/arch/arm64/include/asm/syscall_wrapper.h @@ -38,6 +38,7 @@ asmlinkage long __arm64_compat_sys_##sname(const struct pt_regs *__unused) #define COND_SYSCALL_COMPAT(name) \ + asmlinkage long __arm64_compat_sys_##name(const struct pt_regs *regs); \ asmlinkage long __weak __arm64_compat_sys_##name(const struct pt_regs *regs) \ { \ return sys_ni_syscall(); \ @@ -53,6 +54,7 @@ ALLOW_ERROR_INJECTION(__arm64_sys##name, ERRNO); \ static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + asmlinkage long __arm64_sys##name(const struct pt_regs *regs); \ asmlinkage long __arm64_sys##name(const struct pt_regs *regs) \ { \ return __se_sys##name(SC_ARM64_REGS_TO_ARGS(x,__VA_ARGS__)); \ @@ -73,11 +75,13 @@ asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused) #define COND_SYSCALL(name) \ + asmlinkage long __arm64_sys_##name(const struct pt_regs *regs); \ asmlinkage long __weak __arm64_sys_##name(const struct pt_regs *regs) \ { \ return sys_ni_syscall(); \ } +asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused); #define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers); #endif /* __ASM_SYSCALL_WRAPPER_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e72d9aaab6b1..7a1e62631814 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -115,8 +115,14 @@ #define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31) #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) +#define SYS_DC_IGSW sys_insn(1, 0, 7, 6, 4) +#define SYS_DC_IGDSW sys_insn(1, 0, 7, 6, 6) #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) +#define SYS_DC_CGSW sys_insn(1, 0, 7, 10, 4) +#define SYS_DC_CGDSW sys_insn(1, 0, 7, 10, 6) #define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2) +#define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4) +#define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6) /* * Automatically generated definitions for system registers, the @@ -134,25 +140,17 @@ #define SYS_SVCR_SMSTART_SM_EL0 sys_reg(0, 3, 4, 3, 3) #define SYS_SVCR_SMSTOP_SMZA_EL0 sys_reg(0, 3, 4, 6, 3) -#define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2) -#define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0) -#define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2) -#define SYS_OSDTRTX_EL1 sys_reg(2, 0, 0, 3, 2) -#define SYS_OSECCR_EL1 sys_reg(2, 0, 0, 6, 2) #define SYS_DBGBVRn_EL1(n) sys_reg(2, 0, 0, n, 4) #define SYS_DBGBCRn_EL1(n) sys_reg(2, 0, 0, n, 5) #define SYS_DBGWVRn_EL1(n) sys_reg(2, 0, 0, n, 6) #define SYS_DBGWCRn_EL1(n) sys_reg(2, 0, 0, n, 7) #define SYS_MDRAR_EL1 sys_reg(2, 0, 1, 0, 0) -#define SYS_OSLAR_EL1 sys_reg(2, 0, 1, 0, 4) -#define SYS_OSLAR_OSLK BIT(0) - #define SYS_OSLSR_EL1 sys_reg(2, 0, 1, 1, 4) -#define SYS_OSLSR_OSLM_MASK (BIT(3) | BIT(0)) -#define SYS_OSLSR_OSLM_NI 0 -#define SYS_OSLSR_OSLM_IMPLEMENTED BIT(3) -#define SYS_OSLSR_OSLK BIT(1) +#define OSLSR_EL1_OSLM_MASK (BIT(3) | BIT(0)) +#define OSLSR_EL1_OSLM_NI 0 +#define OSLSR_EL1_OSLM_IMPLEMENTED BIT(3) +#define OSLSR_EL1_OSLK BIT(1) #define SYS_OSDLR_EL1 sys_reg(2, 0, 1, 3, 4) #define SYS_DBGPRCR_EL1 sys_reg(2, 0, 1, 4, 4) @@ -235,54 +233,8 @@ /*** End of Statistical Profiling Extension ***/ -/* - * TRBE Registers - */ -#define SYS_TRBLIMITR_EL1 sys_reg(3, 0, 9, 11, 0) -#define SYS_TRBPTR_EL1 sys_reg(3, 0, 9, 11, 1) -#define SYS_TRBBASER_EL1 sys_reg(3, 0, 9, 11, 2) -#define SYS_TRBSR_EL1 sys_reg(3, 0, 9, 11, 3) -#define SYS_TRBMAR_EL1 sys_reg(3, 0, 9, 11, 4) -#define SYS_TRBTRG_EL1 sys_reg(3, 0, 9, 11, 6) -#define SYS_TRBIDR_EL1 sys_reg(3, 0, 9, 11, 7) - -#define TRBLIMITR_LIMIT_MASK GENMASK_ULL(51, 0) -#define TRBLIMITR_LIMIT_SHIFT 12 -#define TRBLIMITR_NVM BIT(5) -#define TRBLIMITR_TRIG_MODE_MASK GENMASK(1, 0) -#define TRBLIMITR_TRIG_MODE_SHIFT 3 -#define TRBLIMITR_FILL_MODE_MASK GENMASK(1, 0) -#define TRBLIMITR_FILL_MODE_SHIFT 1 -#define TRBLIMITR_ENABLE BIT(0) -#define TRBPTR_PTR_MASK GENMASK_ULL(63, 0) -#define TRBPTR_PTR_SHIFT 0 -#define TRBBASER_BASE_MASK GENMASK_ULL(51, 0) -#define TRBBASER_BASE_SHIFT 12 -#define TRBSR_EC_MASK GENMASK(5, 0) -#define TRBSR_EC_SHIFT 26 -#define TRBSR_IRQ BIT(22) -#define TRBSR_TRG BIT(21) -#define TRBSR_WRAP BIT(20) -#define TRBSR_ABORT BIT(18) -#define TRBSR_STOP BIT(17) -#define TRBSR_MSS_MASK GENMASK(15, 0) -#define TRBSR_MSS_SHIFT 0 -#define TRBSR_BSC_MASK GENMASK(5, 0) -#define TRBSR_BSC_SHIFT 0 -#define TRBSR_FSC_MASK GENMASK(5, 0) -#define TRBSR_FSC_SHIFT 0 -#define TRBMAR_SHARE_MASK GENMASK(1, 0) -#define TRBMAR_SHARE_SHIFT 8 -#define TRBMAR_OUTER_MASK GENMASK(3, 0) -#define TRBMAR_OUTER_SHIFT 4 -#define TRBMAR_INNER_MASK GENMASK(3, 0) -#define TRBMAR_INNER_SHIFT 0 -#define TRBTRG_TRG_MASK GENMASK(31, 0) -#define TRBTRG_TRG_SHIFT 0 -#define TRBIDR_FLAG BIT(5) -#define TRBIDR_PROG BIT(4) -#define TRBIDR_ALIGN_MASK GENMASK(3, 0) -#define TRBIDR_ALIGN_SHIFT 0 +#define TRBSR_EL1_BSC_MASK GENMASK(5, 0) +#define TRBSR_EL1_BSC_SHIFT 0 #define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1) #define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2) @@ -758,6 +710,25 @@ #define ICH_VTR_TDS_SHIFT 19 #define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT) +/* + * Permission Indirection Extension (PIE) permission encodings. + * Encodings with the _O suffix, have overlays applied (Permission Overlay Extension). + */ +#define PIE_NONE_O 0x0 +#define PIE_R_O 0x1 +#define PIE_X_O 0x2 +#define PIE_RX_O 0x3 +#define PIE_RW_O 0x5 +#define PIE_RWnX_O 0x6 +#define PIE_RWX_O 0x7 +#define PIE_R 0x8 +#define PIE_GCS 0x9 +#define PIE_RX 0xa +#define PIE_RW 0xc +#define PIE_RWX 0xe + +#define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4)) + #define ARM64_FEATURE_FIELD_BITS 4 /* Defined for compatibility only, do not add new users. */ diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 1f361e2da516..d66dfb3a72dd 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -29,6 +29,8 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *s void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str); void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str); +int early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs); + /* * Move regs->pc to next instruction and do necessary setup before it * is executed. diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 05f4fc265428..14be5000c5a0 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -65,7 +65,6 @@ static inline void __uaccess_ttbr0_disable(void) ttbr &= ~TTBR_ASID_MASK; /* reserved_pg_dir placed before swapper_pg_dir */ write_sysreg(ttbr - RESERVED_SWAPPER_OFFSET, ttbr0_el1); - isb(); /* Set reserved ASID */ write_sysreg(ttbr, ttbr1_el1); isb(); @@ -89,7 +88,6 @@ static inline void __uaccess_ttbr0_enable(void) ttbr1 &= ~TTBR_ASID_MASK; /* safety measure */ ttbr1 |= ttbr0 & TTBR_ASID_MASK; write_sysreg(ttbr1, ttbr1_el1); - isb(); /* Restore user page table */ write_sysreg(ttbr0, ttbr0_el1); diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 69a4fb749c65..a2cac4305b1e 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -102,5 +102,6 @@ #define HWCAP2_SME_BI32I32 (1UL << 40) #define HWCAP2_SME_B16B16 (1UL << 41) #define HWCAP2_SME_F16F16 (1UL << 42) +#define HWCAP2_MOPS (1UL << 43) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 7c2bb4e72476..3864a64e2b2b 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -42,8 +42,7 @@ obj-$(CONFIG_COMPAT) += sigreturn32.o obj-$(CONFIG_COMPAT_ALIGNMENT_FIXUPS) += compat_alignment.o obj-$(CONFIG_KUSER_HELPERS) += kuser32.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o -obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o +obj-$(CONFIG_MODULES) += module.o module-plts.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index d32d4ed5519b..8ff6610af496 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -24,8 +24,8 @@ #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) -#define ALT_CAP(a) ((a)->cpufeature & ~ARM64_CB_BIT) -#define ALT_HAS_CB(a) ((a)->cpufeature & ARM64_CB_BIT) +#define ALT_CAP(a) ((a)->cpucap & ~ARM64_CB_BIT) +#define ALT_HAS_CB(a) ((a)->cpucap & ARM64_CB_BIT) /* Volatile, as we may be patching the guts of READ_ONCE() */ static volatile int all_alternatives_applied; @@ -37,12 +37,12 @@ struct alt_region { struct alt_instr *end; }; -bool alternative_is_applied(u16 cpufeature) +bool alternative_is_applied(u16 cpucap) { - if (WARN_ON(cpufeature >= ARM64_NCAPS)) + if (WARN_ON(cpucap >= ARM64_NCAPS)) return false; - return test_bit(cpufeature, applied_alternatives); + return test_bit(cpucap, applied_alternatives); } /* @@ -121,11 +121,11 @@ static noinstr void patch_alternative(struct alt_instr *alt, * accidentally call into the cache.S code, which is patched by us at * runtime. */ -static void clean_dcache_range_nopatch(u64 start, u64 end) +static noinstr void clean_dcache_range_nopatch(u64 start, u64 end) { u64 cur, d_size, ctr_el0; - ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0); + ctr_el0 = arm64_ftr_reg_ctrel0.sys_val; d_size = 4 << cpuid_feature_extract_unsigned_field(ctr_el0, CTR_EL0_DminLine_SHIFT); cur = start & ~(d_size - 1); @@ -141,7 +141,7 @@ static void clean_dcache_range_nopatch(u64 start, u64 end) static void __apply_alternatives(const struct alt_region *region, bool is_module, - unsigned long *feature_mask) + unsigned long *cpucap_mask) { struct alt_instr *alt; __le32 *origptr, *updptr; @@ -151,7 +151,7 @@ static void __apply_alternatives(const struct alt_region *region, int nr_inst; int cap = ALT_CAP(alt); - if (!test_bit(cap, feature_mask)) + if (!test_bit(cap, cpucap_mask)) continue; if (!cpus_have_cap(cap)) @@ -188,11 +188,10 @@ static void __apply_alternatives(const struct alt_region *region, icache_inval_all_pou(); isb(); - /* Ignore ARM64_CB bit from feature mask */ bitmap_or(applied_alternatives, applied_alternatives, - feature_mask, ARM64_NCAPS); + cpucap_mask, ARM64_NCAPS); bitmap_and(applied_alternatives, applied_alternatives, - cpu_hwcaps, ARM64_NCAPS); + system_cpucaps, ARM64_NCAPS); } } @@ -239,7 +238,7 @@ static int __init __apply_alternatives_multi_stop(void *unused) } else { DECLARE_BITMAP(remaining_capabilities, ARM64_NCAPS); - bitmap_complement(remaining_capabilities, boot_capabilities, + bitmap_complement(remaining_capabilities, boot_cpucaps, ARM64_NCAPS); BUG_ON(all_alternatives_applied); @@ -274,7 +273,7 @@ void __init apply_boot_alternatives(void) pr_info("applying boot alternatives\n"); __apply_alternatives(&kernel_alternatives, false, - &boot_capabilities[0]); + &boot_cpucaps[0]); } #ifdef CONFIG_MODULES diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 7d7128c65161..6ea7f23b1287 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -105,11 +105,11 @@ unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; #endif -DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); -EXPORT_SYMBOL(cpu_hwcaps); -static struct arm64_cpu_capabilities const __ro_after_init *cpu_hwcaps_ptrs[ARM64_NCAPS]; +DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS); +EXPORT_SYMBOL(system_cpucaps); +static struct arm64_cpu_capabilities const __ro_after_init *cpucap_ptrs[ARM64_NCAPS]; -DECLARE_BITMAP(boot_capabilities, ARM64_NCAPS); +DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS); bool arm64_use_ng_mappings = false; EXPORT_SYMBOL(arm64_use_ng_mappings); @@ -137,7 +137,7 @@ static cpumask_var_t cpu_32bit_el0_mask __cpumask_var_read_mostly; void dump_cpu_features(void) { /* file-wide pr_fmt adds "CPU features: " prefix */ - pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps); + pr_emerg("0x%*pb\n", ARM64_NCAPS, &system_cpucaps); } #define ARM64_CPUID_FIELDS(reg, field, min_value) \ @@ -223,6 +223,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_MOPS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), @@ -364,6 +365,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HCX_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ETS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TWED_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_XNX_SHIFT, 4, 0), @@ -396,6 +398,12 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1PIE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_TCRX_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DIC_SHIFT, 1, 1), @@ -722,6 +730,7 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1, &id_aa64mmfr1_override), ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2), + ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3), /* Op1 = 0, CRn = 1, CRm = 2 */ ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr), @@ -954,24 +963,24 @@ extern const struct arm64_cpu_capabilities arm64_errata[]; static const struct arm64_cpu_capabilities arm64_features[]; static void __init -init_cpu_hwcaps_indirect_list_from_array(const struct arm64_cpu_capabilities *caps) +init_cpucap_indirect_list_from_array(const struct arm64_cpu_capabilities *caps) { for (; caps->matches; caps++) { if (WARN(caps->capability >= ARM64_NCAPS, "Invalid capability %d\n", caps->capability)) continue; - if (WARN(cpu_hwcaps_ptrs[caps->capability], + if (WARN(cpucap_ptrs[caps->capability], "Duplicate entry for capability %d\n", caps->capability)) continue; - cpu_hwcaps_ptrs[caps->capability] = caps; + cpucap_ptrs[caps->capability] = caps; } } -static void __init init_cpu_hwcaps_indirect_list(void) +static void __init init_cpucap_indirect_list(void) { - init_cpu_hwcaps_indirect_list_from_array(arm64_features); - init_cpu_hwcaps_indirect_list_from_array(arm64_errata); + init_cpucap_indirect_list_from_array(arm64_features); + init_cpucap_indirect_list_from_array(arm64_errata); } static void __init setup_boot_cpu_capabilities(void); @@ -1017,6 +1026,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0); init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1); init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2); + init_cpu_ftr_reg(SYS_ID_AA64MMFR3_EL1, info->reg_id_aa64mmfr3); init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0); @@ -1049,10 +1059,10 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid); /* - * Initialize the indirect array of CPU hwcaps capabilities pointers - * before we handle the boot CPU below. + * Initialize the indirect array of CPU capabilities pointers before we + * handle the boot CPU below. */ - init_cpu_hwcaps_indirect_list(); + init_cpucap_indirect_list(); /* * Detect and enable early CPU capabilities based on the boot CPU, @@ -1262,6 +1272,8 @@ void update_cpu_features(int cpu, info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1); taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu, info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2); + taint |= check_update_ftr_reg(SYS_ID_AA64MMFR3_EL1, cpu, + info->reg_id_aa64mmfr3, boot->reg_id_aa64mmfr3); taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu, info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); @@ -1391,6 +1403,7 @@ u64 __read_sysreg_by_encoding(u32 sys_id) read_sysreg_case(SYS_ID_AA64MMFR0_EL1); read_sysreg_case(SYS_ID_AA64MMFR1_EL1); read_sysreg_case(SYS_ID_AA64MMFR2_EL1); + read_sysreg_case(SYS_ID_AA64MMFR3_EL1); read_sysreg_case(SYS_ID_AA64ISAR0_EL1); read_sysreg_case(SYS_ID_AA64ISAR1_EL1); read_sysreg_case(SYS_ID_AA64ISAR2_EL1); @@ -2048,9 +2061,9 @@ static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry, static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry, int scope) { - bool api = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope); - bool apa = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope); - bool apa3 = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3], scope); + bool api = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope); + bool apa = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope); + bool apa3 = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3], scope); return apa || apa3 || api; } @@ -2186,6 +2199,11 @@ static void cpu_enable_dit(const struct arm64_cpu_capabilities *__unused) set_pstate_dit(1); } +static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused) +{ + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn); +} + /* Internal helper functions to match cpu capability type */ static bool cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) @@ -2235,11 +2253,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_ECV_CNTPOFF, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, - .sys_reg = SYS_ID_AA64MMFR0_EL1, - .field_pos = ID_AA64MMFR0_EL1_ECV_SHIFT, - .field_width = 4, - .sign = FTR_UNSIGNED, - .min_field_value = ID_AA64MMFR0_EL1_ECV_CNTPOFF, + ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, ECV, CNTPOFF) }, #ifdef CONFIG_ARM64_PAN { @@ -2309,6 +2323,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = is_kvm_protected_mode, }, + { + .desc = "HCRX_EL2 register", + .capability = ARM64_HAS_HCX, + .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HCX, IMP) + }, #endif { .desc = "Kernel page table isolation (KPTI)", @@ -2641,6 +2662,27 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_dit, ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, DIT, IMP) }, + { + .desc = "Memory Copy and Memory Set instructions", + .capability = ARM64_HAS_MOPS, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .cpu_enable = cpu_enable_mops, + ARM64_CPUID_FIELDS(ID_AA64ISAR2_EL1, MOPS, IMP) + }, + { + .capability = ARM64_HAS_TCR2, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, TCRX, IMP) + }, + { + .desc = "Stage-1 Permission Indirection Extension (S1PIE)", + .capability = ARM64_HAS_S1PIE, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, S1PIE, IMP) + }, {}, }; @@ -2769,6 +2811,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR2_EL1, RPRFM, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRFM), HWCAP_CAP(ID_AA64ISAR2_EL1, RPRES, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRES), HWCAP_CAP(ID_AA64ISAR2_EL1, WFxT, IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT), + HWCAP_CAP(ID_AA64ISAR2_EL1, MOPS, IMP, CAP_HWCAP, KERNEL_HWCAP_MOPS), #ifdef CONFIG_ARM64_SME HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME), HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), @@ -2895,7 +2938,7 @@ static void update_cpu_capabilities(u16 scope_mask) scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (i = 0; i < ARM64_NCAPS; i++) { - caps = cpu_hwcaps_ptrs[i]; + caps = cpucap_ptrs[i]; if (!caps || !(caps->type & scope_mask) || cpus_have_cap(caps->capability) || !caps->matches(caps, cpucap_default_scope(caps))) @@ -2903,10 +2946,11 @@ static void update_cpu_capabilities(u16 scope_mask) if (caps->desc) pr_info("detected: %s\n", caps->desc); - cpus_set_cap(caps->capability); + + __set_bit(caps->capability, system_cpucaps); if ((scope_mask & SCOPE_BOOT_CPU) && (caps->type & SCOPE_BOOT_CPU)) - set_bit(caps->capability, boot_capabilities); + set_bit(caps->capability, boot_cpucaps); } } @@ -2920,7 +2964,7 @@ static int cpu_enable_non_boot_scope_capabilities(void *__unused) u16 non_boot_scope = SCOPE_ALL & ~SCOPE_BOOT_CPU; for_each_available_cap(i) { - const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[i]; + const struct arm64_cpu_capabilities *cap = cpucap_ptrs[i]; if (WARN_ON(!cap)) continue; @@ -2950,7 +2994,7 @@ static void __init enable_cpu_capabilities(u16 scope_mask) for (i = 0; i < ARM64_NCAPS; i++) { unsigned int num; - caps = cpu_hwcaps_ptrs[i]; + caps = cpucap_ptrs[i]; if (!caps || !(caps->type & scope_mask)) continue; num = caps->capability; @@ -2995,7 +3039,7 @@ static void verify_local_cpu_caps(u16 scope_mask) scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (i = 0; i < ARM64_NCAPS; i++) { - caps = cpu_hwcaps_ptrs[i]; + caps = cpucap_ptrs[i]; if (!caps || !(caps->type & scope_mask)) continue; @@ -3194,7 +3238,7 @@ static void __init setup_boot_cpu_capabilities(void) bool this_cpu_has_cap(unsigned int n) { if (!WARN_ON(preemptible()) && n < ARM64_NCAPS) { - const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[n]; + const struct arm64_cpu_capabilities *cap = cpucap_ptrs[n]; if (cap) return cap->matches(cap, SCOPE_LOCAL_CPU); @@ -3207,13 +3251,13 @@ EXPORT_SYMBOL_GPL(this_cpu_has_cap); /* * This helper function is used in a narrow window when, * - The system wide safe registers are set with all the SMP CPUs and, - * - The SYSTEM_FEATURE cpu_hwcaps may not have been set. + * - The SYSTEM_FEATURE system_cpucaps may not have been set. * In all other cases cpus_have_{const_}cap() should be used. */ static bool __maybe_unused __system_matches_cap(unsigned int n) { if (n < ARM64_NCAPS) { - const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[n]; + const struct arm64_cpu_capabilities *cap = cpucap_ptrs[n]; if (cap) return cap->matches(cap, SCOPE_SYSTEM); diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 42e19fff40ee..d1f68599c29f 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -13,7 +13,7 @@ #include <linux/of_device.h> #include <linux/psci.h> -#ifdef CONFIG_ACPI +#ifdef CONFIG_ACPI_PROCESSOR_IDLE #include <acpi/processor.h> diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index eb4378c23b3c..58622dc85917 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -125,6 +125,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_SME_BI32I32] = "smebi32i32", [KERNEL_HWCAP_SME_B16B16] = "smeb16b16", [KERNEL_HWCAP_SME_F16F16] = "smef16f16", + [KERNEL_HWCAP_MOPS] = "mops", }; #ifdef CONFIG_COMPAT @@ -446,6 +447,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1); + info->reg_id_aa64mmfr3 = read_cpuid(ID_AA64MMFR3_EL1); info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 3af3c01c93a6..6b2e0c367702 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -126,7 +126,7 @@ static __always_inline void __exit_to_user_mode(void) lockdep_hardirqs_on(CALLER_ADDR0); } -static __always_inline void prepare_exit_to_user_mode(struct pt_regs *regs) +static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs) { unsigned long flags; @@ -135,11 +135,13 @@ static __always_inline void prepare_exit_to_user_mode(struct pt_regs *regs) flags = read_thread_flags(); if (unlikely(flags & _TIF_WORK_MASK)) do_notify_resume(regs, flags); + + lockdep_sys_exit(); } static __always_inline void exit_to_user_mode(struct pt_regs *regs) { - prepare_exit_to_user_mode(regs); + exit_to_user_mode_prepare(regs); mte_check_tfsr_exit(); __exit_to_user_mode(); } @@ -611,6 +613,14 @@ static void noinstr el0_bti(struct pt_regs *regs) exit_to_user_mode(regs); } +static void noinstr el0_mops(struct pt_regs *regs, unsigned long esr) +{ + enter_from_user_mode(regs); + local_daif_restore(DAIF_PROCCTX); + do_el0_mops(regs, esr); + exit_to_user_mode(regs); +} + static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr) { enter_from_user_mode(regs); @@ -688,6 +698,9 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_BTI: el0_bti(regs); break; + case ESR_ELx_EC_MOPS: + el0_mops(regs, esr); + break; case ESR_ELx_EC_BREAKPT_LOW: case ESR_ELx_EC_SOFTSTP_LOW: case ESR_ELx_EC_WATCHPT_LOW: diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index ab2a6e33c052..a40e5e50fa55 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -101,12 +101,11 @@ .org .Lventry_start\@ + 128 // Did we overflow the ventry slot? .endm - .macro tramp_alias, dst, sym, tmp - mov_q \dst, TRAMP_VALIAS - adr_l \tmp, \sym - add \dst, \dst, \tmp - adr_l \tmp, .entry.tramp.text - sub \dst, \dst, \tmp + .macro tramp_alias, dst, sym + .set .Lalias\@, TRAMP_VALIAS + \sym - .entry.tramp.text + movz \dst, :abs_g2_s:.Lalias\@ + movk \dst, :abs_g1_nc:.Lalias\@ + movk \dst, :abs_g0_nc:.Lalias\@ .endm /* @@ -435,13 +434,14 @@ alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 eret alternative_else_nop_endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - bne 4f msr far_el1, x29 - tramp_alias x30, tramp_exit_native, x29 - br x30 -4: - tramp_alias x30, tramp_exit_compat, x29 - br x30 + + ldr_this_cpu x30, this_cpu_vector, x29 + tramp_alias x29, tramp_exit + msr vbar_el1, x30 // install vector table + ldr lr, [sp, #S_LR] // restore x30 + add sp, sp, #PT_REGS_SIZE // restore sp + br x29 #endif .else ldr lr, [sp, #S_LR] @@ -732,22 +732,6 @@ alternative_else_nop_endif .org 1b + 128 // Did we overflow the ventry slot? .endm - .macro tramp_exit, regsize = 64 - tramp_data_read_var x30, this_cpu_vector - get_this_cpu_offset x29 - ldr x30, [x30, x29] - - msr vbar_el1, x30 - ldr lr, [sp, #S_LR] - tramp_unmap_kernel x29 - .if \regsize == 64 - mrs x29, far_el1 - .endif - add sp, sp, #PT_REGS_SIZE // restore sp - eret - sb - .endm - .macro generate_tramp_vector, kpti, bhb .Lvector_start\@: .space 0x400 @@ -768,7 +752,7 @@ alternative_else_nop_endif */ .pushsection ".entry.tramp.text", "ax" .align 11 -SYM_CODE_START_NOALIGN(tramp_vectors) +SYM_CODE_START_LOCAL_NOALIGN(tramp_vectors) #ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_LOOP generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_FW @@ -777,13 +761,12 @@ SYM_CODE_START_NOALIGN(tramp_vectors) generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_NONE SYM_CODE_END(tramp_vectors) -SYM_CODE_START(tramp_exit_native) - tramp_exit -SYM_CODE_END(tramp_exit_native) - -SYM_CODE_START(tramp_exit_compat) - tramp_exit 32 -SYM_CODE_END(tramp_exit_compat) +SYM_CODE_START_LOCAL(tramp_exit) + tramp_unmap_kernel x29 + mrs x29, far_el1 // restore x29 + eret + sb +SYM_CODE_END(tramp_exit) .popsection // .entry.tramp.text #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ @@ -1077,7 +1060,7 @@ alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 alternative_else_nop_endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline, tmp=x3 + tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline br x5 #endif SYM_CODE_END(__sdei_asm_handler) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 2fbafa5cc7ac..7a1aeb95d7c3 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1649,6 +1649,7 @@ void fpsimd_flush_thread(void) fpsimd_flush_thread_vl(ARM64_VEC_SME); current->thread.svcr = 0; + sme_smstop(); } current->thread.fp_type = FP_STATE_FPSIMD; diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 432626c866a8..a650f5e11fc5 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -197,7 +197,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) static struct plt_entry *get_ftrace_plt(struct module *mod) { -#ifdef CONFIG_ARM64_MODULE_PLTS +#ifdef CONFIG_MODULES struct plt_entry *plt = mod->arch.ftrace_trampolines; return &plt[FTRACE_PLT_IDX]; @@ -249,7 +249,7 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, * must use a PLT to reach it. We can only place PLTs for modules, and * only when module PLT support is built-in. */ - if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) + if (!IS_ENABLED(CONFIG_MODULES)) return false; /* @@ -431,10 +431,8 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, * * Note: 'mod' is only set at module load time. */ - if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) && - IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && mod) { + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) && mod) return aarch64_insn_patch_text_nosync((void *)pc, new); - } if (!ftrace_find_callable_addr(rec, mod, &addr)) return -EINVAL; diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index e92caebff46a..0f5a30f109d9 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -382,7 +382,7 @@ SYM_FUNC_START_LOCAL(create_idmap) adrp x0, init_idmap_pg_dir adrp x3, _text adrp x6, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE - mov x7, SWAPPER_RX_MMUFLAGS + mov_q x7, SWAPPER_RX_MMUFLAGS map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14, EXTRA_SHIFT @@ -391,7 +391,7 @@ SYM_FUNC_START_LOCAL(create_idmap) adrp x2, init_pg_dir adrp x3, init_pg_end bic x4, x2, #SWAPPER_BLOCK_SIZE - 1 - mov x5, SWAPPER_RW_MMUFLAGS + mov_q x5, SWAPPER_RW_MMUFLAGS mov x6, #SWAPPER_BLOCK_SHIFT bl remap_region @@ -402,7 +402,7 @@ SYM_FUNC_START_LOCAL(create_idmap) bfi x22, x21, #0, #SWAPPER_BLOCK_SHIFT // remapped FDT address add x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE bic x4, x21, #SWAPPER_BLOCK_SIZE - 1 - mov x5, SWAPPER_RW_MMUFLAGS + mov_q x5, SWAPPER_RW_MMUFLAGS mov x6, #SWAPPER_BLOCK_SHIFT bl remap_region @@ -430,7 +430,7 @@ SYM_FUNC_START_LOCAL(create_kernel_mapping) adrp x3, _text // runtime __pa(_text) sub x6, x6, x3 // _end - _text add x6, x6, x5 // runtime __va(_end) - mov x7, SWAPPER_RW_MMUFLAGS + mov_q x7, SWAPPER_RW_MMUFLAGS map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14 diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 788597a6b6a2..02870beb271e 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -99,7 +99,6 @@ int pfn_is_nosave(unsigned long pfn) void notrace save_processor_state(void) { - WARN_ON(num_online_cpus() != 1); } void notrace restore_processor_state(void) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index b29a311bb055..db2a1861bb97 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -973,14 +973,6 @@ static int hw_breakpoint_reset(unsigned int cpu) return 0; } -#ifdef CONFIG_CPU_PM -extern void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)); -#else -static inline void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)) -{ -} -#endif - /* * One-time initialisation. */ diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 9439240c3fcf..d63de1973ddb 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -119,6 +119,24 @@ SYM_CODE_START_LOCAL(__finalise_el2) msr ttbr1_el1, x0 mrs_s x0, SYS_MAIR_EL12 msr mair_el1, x0 + mrs x1, REG_ID_AA64MMFR3_EL1 + ubfx x1, x1, #ID_AA64MMFR3_EL1_TCRX_SHIFT, #4 + cbz x1, .Lskip_tcr2 + mrs x0, REG_TCR2_EL12 + msr REG_TCR2_EL1, x0 + + // Transfer permission indirection state + mrs x1, REG_ID_AA64MMFR3_EL1 + ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4 + cbz x1, .Lskip_indirection + mrs x0, REG_PIRE0_EL12 + msr REG_PIRE0_EL1, x0 + mrs x0, REG_PIR_EL12 + msr REG_PIR_EL1, x0 + +.Lskip_indirection: +.Lskip_tcr2: + isb // Hack the exception return to stay at EL2 diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 370ab84fd06e..8439248c21d3 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -123,6 +123,7 @@ static const struct ftr_set_desc isar2 __initconst = { .fields = { FIELD("gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT, NULL), FIELD("apa3", ID_AA64ISAR2_EL1_APA3_SHIFT, NULL), + FIELD("mops", ID_AA64ISAR2_EL1_MOPS_SHIFT, NULL), {} }, }; @@ -174,6 +175,7 @@ static const struct { "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 " "id_aa64isar1.api=0 id_aa64isar1.apa=0 " "id_aa64isar2.gpa3=0 id_aa64isar2.apa3=0" }, + { "arm64.nomops", "id_aa64isar2.mops=0" }, { "arm64.nomte", "id_aa64pfr1.mte=0" }, { "nokaslr", "kaslr.disabled=1" }, }; diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index e7477f21a4c9..17f96a19781d 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -4,90 +4,35 @@ */ #include <linux/cache.h> -#include <linux/crc32.h> #include <linux/init.h> -#include <linux/libfdt.h> -#include <linux/mm_types.h> -#include <linux/sched.h> -#include <linux/types.h> -#include <linux/pgtable.h> -#include <linux/random.h> +#include <linux/printk.h> -#include <asm/fixmap.h> -#include <asm/kernel-pgtable.h> +#include <asm/cpufeature.h> #include <asm/memory.h> -#include <asm/mmu.h> -#include <asm/sections.h> -#include <asm/setup.h> -u64 __ro_after_init module_alloc_base; u16 __initdata memstart_offset_seed; struct arm64_ftr_override kaslr_feature_override __initdata; -static int __init kaslr_init(void) -{ - u64 module_range; - u32 seed; - - /* - * Set a reasonable default for module_alloc_base in case - * we end up running with module randomization disabled. - */ - module_alloc_base = (u64)_etext - MODULES_VSIZE; +bool __ro_after_init __kaslr_is_enabled = false; +void __init kaslr_init(void) +{ if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) { pr_info("KASLR disabled on command line\n"); - return 0; - } - - if (!kaslr_enabled()) { - pr_warn("KASLR disabled due to lack of seed\n"); - return 0; + return; } - pr_info("KASLR enabled\n"); - /* - * KASAN without KASAN_VMALLOC does not expect the module region to - * intersect the vmalloc region, since shadow memory is allocated for - * each module at load time, whereas the vmalloc region will already be - * shadowed by KASAN zero pages. + * The KASLR offset modulo MIN_KIMG_ALIGN is taken from the physical + * placement of the image rather than from the seed, so a displacement + * of less than MIN_KIMG_ALIGN means that no seed was provided. */ - BUILD_BUG_ON((IS_ENABLED(CONFIG_KASAN_GENERIC) || - IS_ENABLED(CONFIG_KASAN_SW_TAGS)) && - !IS_ENABLED(CONFIG_KASAN_VMALLOC)); - - seed = get_random_u32(); - - if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { - /* - * Randomize the module region over a 2 GB window covering the - * kernel. This reduces the risk of modules leaking information - * about the address of the kernel itself, but results in - * branches between modules and the core kernel that are - * resolved via PLTs. (Branches between modules will be - * resolved normally.) - */ - module_range = SZ_2G - (u64)(_end - _stext); - module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR); - } else { - /* - * Randomize the module region by setting module_alloc_base to - * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE, - * _stext) . This guarantees that the resulting region still - * covers [_stext, _etext], and that all relative branches can - * be resolved without veneers unless this region is exhausted - * and we fall back to a larger 2GB window in module_alloc() - * when ARM64_MODULE_PLTS is enabled. - */ - module_range = MODULES_VSIZE - (u64)(_etext - _stext); + if (kaslr_offset() < MIN_KIMG_ALIGN) { + pr_warn("KASLR disabled due to lack of seed\n"); + return; } - /* use the lower 21 bits to randomize the base of the module region */ - module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; - module_alloc_base &= PAGE_MASK; - - return 0; + pr_info("KASLR enabled\n"); + __kaslr_is_enabled = true; } -subsys_initcall(kaslr_init) diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 692e9d2e31e5..af046ceac22d 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -10,7 +10,7 @@ * aarch32_setup_additional_pages() and are provided for compatibility * reasons with 32 bit (aarch32) applications that need them. * - * See Documentation/arm/kernel_user_helpers.rst for formal definitions. + * See Documentation/arch/arm/kernel_user_helpers.rst for formal definitions. */ #include <asm/unistd.h> diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index 543493bf924d..ad02058756b5 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -7,6 +7,7 @@ #include <linux/ftrace.h> #include <linux/kernel.h> #include <linux/module.h> +#include <linux/moduleloader.h> #include <linux/sort.h> static struct plt_entry __get_adrp_add_pair(u64 dst, u64 pc, diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 5af4975caeb5..dd851297596e 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -7,6 +7,8 @@ * Author: Will Deacon <will.deacon@arm.com> */ +#define pr_fmt(fmt) "Modules: " fmt + #include <linux/bitops.h> #include <linux/elf.h> #include <linux/ftrace.h> @@ -15,52 +17,131 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/moduleloader.h> +#include <linux/random.h> #include <linux/scs.h> #include <linux/vmalloc.h> + #include <asm/alternative.h> #include <asm/insn.h> #include <asm/scs.h> #include <asm/sections.h> +static u64 module_direct_base __ro_after_init = 0; +static u64 module_plt_base __ro_after_init = 0; + +/* + * Choose a random page-aligned base address for a window of 'size' bytes which + * entirely contains the interval [start, end - 1]. + */ +static u64 __init random_bounding_box(u64 size, u64 start, u64 end) +{ + u64 max_pgoff, pgoff; + + if ((end - start) >= size) + return 0; + + max_pgoff = (size - (end - start)) / PAGE_SIZE; + pgoff = get_random_u32_inclusive(0, max_pgoff); + + return start - pgoff * PAGE_SIZE; +} + +/* + * Modules may directly reference data and text anywhere within the kernel + * image and other modules. References using PREL32 relocations have a +/-2G + * range, and so we need to ensure that the entire kernel image and all modules + * fall within a 2G window such that these are always within range. + * + * Modules may directly branch to functions and code within the kernel text, + * and to functions and code within other modules. These branches will use + * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure + * that the entire kernel text and all module text falls within a 128M window + * such that these are always within range. With PLTs, we can expand this to a + * 2G window. + * + * We chose the 128M region to surround the entire kernel image (rather than + * just the text) as using the same bounds for the 128M and 2G regions ensures + * by construction that we never select a 128M region that is not a subset of + * the 2G region. For very large and unusual kernel configurations this means + * we may fall back to PLTs where they could have been avoided, but this keeps + * the logic significantly simpler. + */ +static int __init module_init_limits(void) +{ + u64 kernel_end = (u64)_end; + u64 kernel_start = (u64)_text; + u64 kernel_size = kernel_end - kernel_start; + + /* + * The default modules region is placed immediately below the kernel + * image, and is large enough to use the full 2G relocation range. + */ + BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END); + BUILD_BUG_ON(MODULES_VSIZE < SZ_2G); + + if (!kaslr_enabled()) { + if (kernel_size < SZ_128M) + module_direct_base = kernel_end - SZ_128M; + if (kernel_size < SZ_2G) + module_plt_base = kernel_end - SZ_2G; + } else { + u64 min = kernel_start; + u64 max = kernel_end; + + if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { + pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n"); + } else { + module_direct_base = random_bounding_box(SZ_128M, min, max); + if (module_direct_base) { + min = module_direct_base; + max = module_direct_base + SZ_128M; + } + } + + module_plt_base = random_bounding_box(SZ_2G, min, max); + } + + pr_info("%llu pages in range for non-PLT usage", + module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0); + pr_info("%llu pages in range for PLT usage", + module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0); + + return 0; +} +subsys_initcall(module_init_limits); + void *module_alloc(unsigned long size) { - u64 module_alloc_end = module_alloc_base + MODULES_VSIZE; - gfp_t gfp_mask = GFP_KERNEL; - void *p; - - /* Silence the initial allocation */ - if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) - gfp_mask |= __GFP_NOWARN; - - if (IS_ENABLED(CONFIG_KASAN_GENERIC) || - IS_ENABLED(CONFIG_KASAN_SW_TAGS)) - /* don't exceed the static module region - see below */ - module_alloc_end = MODULES_END; - - p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, - module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK, - NUMA_NO_NODE, __builtin_return_address(0)); - - if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && - (IS_ENABLED(CONFIG_KASAN_VMALLOC) || - (!IS_ENABLED(CONFIG_KASAN_GENERIC) && - !IS_ENABLED(CONFIG_KASAN_SW_TAGS)))) - /* - * KASAN without KASAN_VMALLOC can only deal with module - * allocations being served from the reserved module region, - * since the remainder of the vmalloc region is already - * backed by zero shadow pages, and punching holes into it - * is non-trivial. Since the module region is not randomized - * when KASAN is enabled without KASAN_VMALLOC, it is even - * less likely that the module region gets exhausted, so we - * can simply omit this fallback in that case. - */ - p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, - module_alloc_base + SZ_2G, GFP_KERNEL, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); + void *p = NULL; + + /* + * Where possible, prefer to allocate within direct branch range of the + * kernel such that no PLTs are necessary. + */ + if (module_direct_base) { + p = __vmalloc_node_range(size, MODULE_ALIGN, + module_direct_base, + module_direct_base + SZ_128M, + GFP_KERNEL | __GFP_NOWARN, + PAGE_KERNEL, 0, NUMA_NO_NODE, + __builtin_return_address(0)); + } - if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { + if (!p && module_plt_base) { + p = __vmalloc_node_range(size, MODULE_ALIGN, + module_plt_base, + module_plt_base + SZ_2G, + GFP_KERNEL | __GFP_NOWARN, + PAGE_KERNEL, 0, NUMA_NO_NODE, + __builtin_return_address(0)); + } + + if (!p) { + pr_warn_ratelimited("%s: unable to allocate memory\n", + __func__); + } + + if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) { vfree(p); return NULL; } @@ -448,9 +529,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_CALL26: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, AARCH64_INSN_IMM_26); - - if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && - ovf == -ERANGE) { + if (ovf == -ERANGE) { val = module_emit_plt_entry(me, sechdrs, loc, &rel[i], sym); if (!val) return -ENOEXEC; @@ -487,7 +566,7 @@ static int module_init_ftrace_plt(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { -#if defined(CONFIG_ARM64_MODULE_PLTS) && defined(CONFIG_DYNAMIC_FTRACE) +#if defined(CONFIG_DYNAMIC_FTRACE) const Elf_Shdr *s; struct plt_entry *plts; diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index f5bcb0dc6267..7e89968bd282 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -66,13 +66,10 @@ void mte_sync_tags(pte_t old_pte, pte_t pte) return; /* if PG_mte_tagged is set, tags have already been initialised */ - for (i = 0; i < nr_pages; i++, page++) { - if (!page_mte_tagged(page)) { + for (i = 0; i < nr_pages; i++, page++) + if (!page_mte_tagged(page)) mte_sync_page_tags(page, old_pte, check_swap, pte_is_tagged); - set_page_mte_tagged(page); - } - } /* ensure the tags are visible before the PTE is set */ smp_wmb(); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index b8ec7b3ac9cb..417a8a86b2db 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -296,6 +296,8 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; + kaslr_init(); + /* * If know now we are going to need KPTI then use non-global * mappings from the start, avoiding the cost of rewriting diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 2cfc810d0a5b..e304f7ebec2a 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -23,6 +23,7 @@ #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/elf.h> +#include <asm/exception.h> #include <asm/cacheflush.h> #include <asm/ucontext.h> #include <asm/unistd.h> @@ -398,7 +399,7 @@ static int restore_tpidr2_context(struct user_ctxs *user) __get_user_error(tpidr2_el0, &user->tpidr2->tpidr2, err); if (!err) - current->thread.tpidr2_el0 = tpidr2_el0; + write_sysreg_s(tpidr2_el0, SYS_TPIDR2_EL0); return err; } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index d00d4cbb31b1..edd63894d61e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -332,17 +332,13 @@ static int op_cpu_kill(unsigned int cpu) } /* - * called on the thread which is asking for a CPU to be shutdown - - * waits until shutdown has completed, or it is timed out. + * Called on the thread which is asking for a CPU to be shutdown after the + * shutdown completed. */ -void __cpu_die(unsigned int cpu) +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { int err; - if (!cpu_wait_death(cpu, 5)) { - pr_crit("CPU%u: cpu didn't die\n", cpu); - return; - } pr_debug("CPU%u: shutdown\n", cpu); /* @@ -369,8 +365,8 @@ void __noreturn cpu_die(void) local_daif_mask(); - /* Tell __cpu_die() that this CPU is now safe to dispose of */ - (void)cpu_report_death(); + /* Tell cpuhp_bp_sync_dead() that this CPU is now safe to dispose of */ + cpuhp_ap_report_dead(); /* * Actually shutdown the CPU. This must never fail. The specific hotplug diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index da84cf855c44..5a668d7f3c1f 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -147,11 +147,9 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, * exit regardless, as the old entry assembly did. */ if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) { - local_daif_mask(); flags = read_thread_flags(); if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) return; - local_daif_restore(DAIF_PROCCTX); } trace_exit: diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 4bb1b8f47298..794a2dd3659a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -514,6 +514,63 @@ void do_el1_fpac(struct pt_regs *regs, unsigned long esr) die("Oops - FPAC", regs, esr); } +void do_el0_mops(struct pt_regs *regs, unsigned long esr) +{ + bool wrong_option = esr & ESR_ELx_MOPS_ISS_WRONG_OPTION; + bool option_a = esr & ESR_ELx_MOPS_ISS_OPTION_A; + int dstreg = ESR_ELx_MOPS_ISS_DESTREG(esr); + int srcreg = ESR_ELx_MOPS_ISS_SRCREG(esr); + int sizereg = ESR_ELx_MOPS_ISS_SIZEREG(esr); + unsigned long dst, src, size; + + dst = pt_regs_read_reg(regs, dstreg); + src = pt_regs_read_reg(regs, srcreg); + size = pt_regs_read_reg(regs, sizereg); + + /* + * Put the registers back in the original format suitable for a + * prologue instruction, using the generic return routine from the + * Arm ARM (DDI 0487I.a) rules CNTMJ and MWFQH. + */ + if (esr & ESR_ELx_MOPS_ISS_MEM_INST) { + /* SET* instruction */ + if (option_a ^ wrong_option) { + /* Format is from Option A; forward set */ + pt_regs_write_reg(regs, dstreg, dst + size); + pt_regs_write_reg(regs, sizereg, -size); + } + } else { + /* CPY* instruction */ + if (!(option_a ^ wrong_option)) { + /* Format is from Option B */ + if (regs->pstate & PSR_N_BIT) { + /* Backward copy */ + pt_regs_write_reg(regs, dstreg, dst - size); + pt_regs_write_reg(regs, srcreg, src - size); + } + } else { + /* Format is from Option A */ + if (size & BIT(63)) { + /* Forward copy */ + pt_regs_write_reg(regs, dstreg, dst + size); + pt_regs_write_reg(regs, srcreg, src + size); + pt_regs_write_reg(regs, sizereg, -size); + } + } + } + + if (esr & ESR_ELx_MOPS_ISS_FROM_EPILOGUE) + regs->pc -= 8; + else + regs->pc -= 4; + + /* + * If single stepping then finish the step before executing the + * prologue instruction. + */ + user_fastforward_single_step(current); +} + #define __user_cache_maint(insn, address, res) \ if (address >= TASK_SIZE_MAX) { \ res = -EFAULT; \ @@ -824,6 +881,7 @@ static const char *esr_class_str[] = { [ESR_ELx_EC_DABT_LOW] = "DABT (lower EL)", [ESR_ELx_EC_DABT_CUR] = "DABT (current EL)", [ESR_ELx_EC_SP_ALIGN] = "SP Alignment", + [ESR_ELx_EC_MOPS] = "MOPS", [ESR_ELx_EC_FP_EXC32] = "FP (AArch32)", [ESR_ELx_EC_FP_EXC64] = "FP (AArch64)", [ESR_ELx_EC_SERROR] = "SError", @@ -947,7 +1005,7 @@ void do_serror(struct pt_regs *regs, unsigned long esr) } /* GENERIC_BUG traps */ - +#ifdef CONFIG_GENERIC_BUG int is_valid_bugaddr(unsigned long addr) { /* @@ -959,6 +1017,7 @@ int is_valid_bugaddr(unsigned long addr) */ return 1; } +#endif static int bug_handler(struct pt_regs *regs, unsigned long esr) { diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 0119dc91abb5..d9e1355730ef 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -288,7 +288,7 @@ static int aarch32_alloc_kuser_vdso_page(void) memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start, kuser_sz); - aarch32_vectors_page = virt_to_page(vdso_page); + aarch32_vectors_page = virt_to_page((void *)vdso_page); return 0; } diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 55f80fb93925..8725291cb00a 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -333,7 +333,7 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu) /* Check if we have TRBE implemented and available at the host */ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) && - !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_PROG)) + !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_P)) vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_TRBE); } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 1279949599b5..4c9dcd8fc939 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -81,26 +81,34 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) fpsimd_kvm_prepare(); + /* + * We will check TIF_FOREIGN_FPSTATE just before entering the + * guest in kvm_arch_vcpu_ctxflush_fp() and override this to + * FP_STATE_FREE if the flag set. + */ vcpu->arch.fp_state = FP_STATE_HOST_OWNED; vcpu_clear_flag(vcpu, HOST_SVE_ENABLED); if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) vcpu_set_flag(vcpu, HOST_SVE_ENABLED); - /* - * We don't currently support SME guests but if we leave - * things in streaming mode then when the guest starts running - * FPSIMD or SVE code it may generate SME traps so as a - * special case if we are in streaming mode we force the host - * state to be saved now and exit streaming mode so that we - * don't have to handle any SME traps for valid guest - * operations. Do this for ZA as well for now for simplicity. - */ if (system_supports_sme()) { vcpu_clear_flag(vcpu, HOST_SME_ENABLED); if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) vcpu_set_flag(vcpu, HOST_SME_ENABLED); + /* + * If PSTATE.SM is enabled then save any pending FP + * state and disable PSTATE.SM. If we leave PSTATE.SM + * enabled and the guest does not enable SME via + * CPACR_EL1.SMEN then operations that should be valid + * may generate SME traps from EL1 to EL1 which we + * can't intercept and which would confuse the guest. + * + * Do the same for PSTATE.ZA in the case where there + * is state in the registers which has not already + * been saved, this is very unlikely to happen. + */ if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) { vcpu->arch.fp_state = FP_STATE_FREE; fpsimd_save_and_flush_cpu_state(); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index c41166f1a1dd..2f6e0b3e4a75 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -82,8 +82,14 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) * EL1 instead of being trapped to EL2. */ if (kvm_arm_support_pmu_v3()) { + struct kvm_cpu_context *hctxt; + write_sysreg(0, pmselr_el0); + + hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0); write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); + vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); } vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2); @@ -106,8 +112,13 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2); write_sysreg(0, hstr_el2); - if (kvm_arm_support_pmu_v3()) - write_sysreg(0, pmuserenr_el0); + if (kvm_arm_support_pmu_v3()) { + struct kvm_cpu_context *hctxt; + + hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); + vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU); + } if (cpus_have_final_cap(ARM64_SME)) { sysreg_clear_set_s(SYS_HFGRTR_EL2, 0, @@ -130,6 +141,9 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu) if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); + + if (cpus_have_final_cap(ARM64_HAS_HCX)) + write_sysreg_s(HCRX_GUEST_FLAGS, SYS_HCRX_EL2); } static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) @@ -144,6 +158,9 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 &= ~HCR_VSE; vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; } + + if (cpus_have_final_cap(ARM64_HAS_HCX)) + write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2); } static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) @@ -177,9 +194,17 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) sve_guest = vcpu_has_sve(vcpu); esr_ec = kvm_vcpu_trap_get_class(vcpu); - /* Don't handle SVE traps for non-SVE vcpus here: */ - if (!sve_guest && esr_ec != ESR_ELx_EC_FP_ASIMD) + /* Only handle traps the vCPU can support here: */ + switch (esr_ec) { + case ESR_ELx_EC_FP_ASIMD: + break; + case ESR_ELx_EC_SVE: + if (!sve_guest) + return false; + break; + default: return false; + } /* Valid trap. Switch the context: */ @@ -404,17 +429,21 @@ static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) return false; } -static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) +static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code) { if (!__populate_fault_info(vcpu)) return true; return false; } +static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) + __alias(kvm_hyp_handle_memory_fault); +static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code) + __alias(kvm_hyp_handle_memory_fault); static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) { - if (!__populate_fault_info(vcpu)) + if (kvm_hyp_handle_memory_fault(vcpu, exit_code)) return true; if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 699ea1f8d409..bb6b571ec627 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -44,6 +44,8 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0); ctxt_sys_reg(ctxt, TTBR1_EL1) = read_sysreg_el1(SYS_TTBR1); ctxt_sys_reg(ctxt, TCR_EL1) = read_sysreg_el1(SYS_TCR); + if (cpus_have_final_cap(ARM64_HAS_TCR2)) + ctxt_sys_reg(ctxt, TCR2_EL1) = read_sysreg_el1(SYS_TCR2); ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR); ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0); ctxt_sys_reg(ctxt, AFSR1_EL1) = read_sysreg_el1(SYS_AFSR1); @@ -53,6 +55,10 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR); ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR); ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL); + if (cpus_have_final_cap(ARM64_HAS_S1PIE)) { + ctxt_sys_reg(ctxt, PIR_EL1) = read_sysreg_el1(SYS_PIR); + ctxt_sys_reg(ctxt, PIRE0_EL1) = read_sysreg_el1(SYS_PIRE0); + } ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par(); ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); @@ -114,6 +120,8 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg_el1(ctxt_sys_reg(ctxt, CPACR_EL1), SYS_CPACR); write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR0_EL1), SYS_TTBR0); write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR1_EL1), SYS_TTBR1); + if (cpus_have_final_cap(ARM64_HAS_TCR2)) + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR2_EL1), SYS_TCR2); write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR); write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0); write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR1_EL1), SYS_AFSR1); @@ -123,6 +131,10 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg_el1(ctxt_sys_reg(ctxt, CONTEXTIDR_EL1), SYS_CONTEXTIDR); write_sysreg_el1(ctxt_sys_reg(ctxt, AMAIR_EL1), SYS_AMAIR); write_sysreg_el1(ctxt_sys_reg(ctxt, CNTKCTL_EL1), SYS_CNTKCTL); + if (cpus_have_final_cap(ARM64_HAS_S1PIE)) { + write_sysreg_el1(ctxt_sys_reg(ctxt, PIR_EL1), SYS_PIR); + write_sysreg_el1(ctxt_sys_reg(ctxt, PIRE0_EL1), SYS_PIRE0); + } write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index d756b939f296..4558c02eb352 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -56,7 +56,7 @@ static void __debug_save_trace(u64 *trfcr_el1) *trfcr_el1 = 0; /* Check if the TRBE is enabled */ - if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_ENABLE)) + if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E)) return; /* * Prohibit trace generation while we are in guest. diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 2e9ec4a2a4a3..a8813b212996 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -575,7 +575,7 @@ struct pkvm_mem_donation { struct check_walk_data { enum pkvm_page_state desired; - enum pkvm_page_state (*get_page_state)(kvm_pte_t pte); + enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr); }; static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx, @@ -583,10 +583,7 @@ static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx, { struct check_walk_data *d = ctx->arg; - if (kvm_pte_valid(ctx->old) && !addr_is_allowed_memory(kvm_pte_to_phys(ctx->old))) - return -EINVAL; - - return d->get_page_state(ctx->old) == d->desired ? 0 : -EPERM; + return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM; } static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size, @@ -601,8 +598,11 @@ static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size, return kvm_pgtable_walk(pgt, addr, size, &walker); } -static enum pkvm_page_state host_get_page_state(kvm_pte_t pte) +static enum pkvm_page_state host_get_page_state(kvm_pte_t pte, u64 addr) { + if (!addr_is_allowed_memory(addr)) + return PKVM_NOPAGE; + if (!kvm_pte_valid(pte) && pte) return PKVM_NOPAGE; @@ -709,7 +709,7 @@ static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx return host_stage2_set_owner_locked(addr, size, host_id); } -static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte) +static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr) { if (!kvm_pte_valid(pte)) return PKVM_NOPAGE; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 71fa16a0dc77..77791495c995 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -186,6 +186,7 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, + [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, }; @@ -196,6 +197,7 @@ static const exit_handler_fn pvm_exit_handlers[] = { [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, + [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, }; diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 3d61bd3e591d..95dae02ccc2e 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -58,8 +58,9 @@ struct kvm_pgtable_walk_data { struct kvm_pgtable_walker *walker; + const u64 start; u64 addr; - u64 end; + const u64 end; }; static bool kvm_phys_is_valid(u64 phys) @@ -201,20 +202,33 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, .old = READ_ONCE(*ptep), .arg = data->walker->arg, .mm_ops = mm_ops, + .start = data->start, .addr = data->addr, .end = data->end, .level = level, .flags = flags, }; int ret = 0; + bool reload = false; kvm_pteref_t childp; bool table = kvm_pte_table(ctx.old, level); - if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) + if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) { ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_PRE); + reload = true; + } if (!table && (ctx.flags & KVM_PGTABLE_WALK_LEAF)) { ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_LEAF); + reload = true; + } + + /* + * Reload the page table after invoking the walker callback for leaf + * entries or after pre-order traversal, to allow the walker to descend + * into a newly installed or replaced table. + */ + if (reload) { ctx.old = READ_ONCE(*ptep); table = kvm_pte_table(ctx.old, level); } @@ -293,6 +307,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, struct kvm_pgtable_walker *walker) { struct kvm_pgtable_walk_data walk_data = { + .start = ALIGN_DOWN(addr, PAGE_SIZE), .addr = ALIGN_DOWN(addr, PAGE_SIZE), .end = PAGE_ALIGN(walk_data.addr + size), .walker = walker, @@ -349,7 +364,7 @@ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, } struct hyp_map_data { - u64 phys; + const u64 phys; kvm_pte_t attr; }; @@ -407,13 +422,12 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, struct hyp_map_data *data) { + u64 phys = data->phys + (ctx->addr - ctx->start); kvm_pte_t new; - u64 granule = kvm_granule_size(ctx->level), phys = data->phys; if (!kvm_block_mapping_supported(ctx, phys)) return false; - data->phys += granule; new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level); if (ctx->old == new) return true; @@ -576,7 +590,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) } struct stage2_map_data { - u64 phys; + const u64 phys; kvm_pte_t attr; u8 owner_id; @@ -794,20 +808,43 @@ static bool stage2_pte_executable(kvm_pte_t pte) return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); } +static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx, + const struct stage2_map_data *data) +{ + u64 phys = data->phys; + + /* + * Stage-2 walks to update ownership data are communicated to the map + * walker using an invalid PA. Avoid offsetting an already invalid PA, + * which could overflow and make the address valid again. + */ + if (!kvm_phys_is_valid(phys)) + return phys; + + /* + * Otherwise, work out the correct PA based on how far the walk has + * gotten. + */ + return phys + (ctx->addr - ctx->start); +} + static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { + u64 phys = stage2_map_walker_phys_addr(ctx, data); + if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1))) return false; - return kvm_block_mapping_supported(ctx, data->phys); + return kvm_block_mapping_supported(ctx, phys); } static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { kvm_pte_t new; - u64 granule = kvm_granule_size(ctx->level), phys = data->phys; + u64 phys = stage2_map_walker_phys_addr(ctx, data); + u64 granule = kvm_granule_size(ctx->level); struct kvm_pgtable *pgt = data->mmu->pgt; struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; @@ -841,8 +878,6 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, stage2_make_pte(ctx, new); - if (kvm_phys_is_valid(phys)) - data->phys += granule; return 0; } @@ -1297,4 +1332,7 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg }; WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level + 1)); + + WARN_ON(mm_ops->page_count(pgtable) != 1); + mm_ops->put_page(pgtable); } diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 3d868e84c7a0..b37e7c96efea 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -92,14 +92,28 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) } NOKPROBE_SYMBOL(__deactivate_traps); +/* + * Disable IRQs in {activate,deactivate}_traps_vhe_{load,put}() to + * prevent a race condition between context switching of PMUSERENR_EL0 + * in __{activate,deactivate}_traps_common() and IPIs that attempts to + * update PMUSERENR_EL0. See also kvm_set_pmuserenr(). + */ void activate_traps_vhe_load(struct kvm_vcpu *vcpu) { + unsigned long flags; + + local_irq_save(flags); __activate_traps_common(vcpu); + local_irq_restore(flags); } void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu) { + unsigned long flags; + + local_irq_save(flags); __deactivate_traps_common(vcpu); + local_irq_restore(flags); } static const exit_handler_fn hyp_exit_handlers[] = { @@ -110,6 +124,7 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, + [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, }; diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 64c3aec0d937..0bd93a5f21ce 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -204,7 +204,7 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu) * Size Fault at level 0, as if exceeding PARange. * * Non-LPAE guests will only get the external abort, as there - * is no way to to describe the ASF. + * is no way to describe the ASF. */ if (vcpu_el1_is_32bit(vcpu) && !(vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE)) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 45727d50d18d..560650972478 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -694,45 +694,41 @@ out_unlock: static struct arm_pmu *kvm_pmu_probe_armpmu(void) { - struct perf_event_attr attr = { }; - struct perf_event *event; - struct arm_pmu *pmu = NULL; + struct arm_pmu *tmp, *pmu = NULL; + struct arm_pmu_entry *entry; + int cpu; + + mutex_lock(&arm_pmus_lock); /* - * Create a dummy event that only counts user cycles. As we'll never - * leave this function with the event being live, it will never - * count anything. But it allows us to probe some of the PMU - * details. Yes, this is terrible. + * It is safe to use a stale cpu to iterate the list of PMUs so long as + * the same value is used for the entirety of the loop. Given this, and + * the fact that no percpu data is used for the lookup there is no need + * to disable preemption. + * + * It is still necessary to get a valid cpu, though, to probe for the + * default PMU instance as userspace is not required to specify a PMU + * type. In order to uphold the preexisting behavior KVM selects the + * PMU instance for the core where the first call to the + * KVM_ARM_VCPU_PMU_V3_CTRL attribute group occurs. A dependent use case + * would be a user with disdain of all things big.LITTLE that affines + * the VMM to a particular cluster of cores. + * + * In any case, userspace should just do the sane thing and use the UAPI + * to select a PMU type directly. But, be wary of the baggage being + * carried here. */ - attr.type = PERF_TYPE_RAW; - attr.size = sizeof(attr); - attr.pinned = 1; - attr.disabled = 0; - attr.exclude_user = 0; - attr.exclude_kernel = 1; - attr.exclude_hv = 1; - attr.exclude_host = 1; - attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; - attr.sample_period = GENMASK(63, 0); - - event = perf_event_create_kernel_counter(&attr, -1, current, - kvm_pmu_perf_overflow, &attr); - - if (IS_ERR(event)) { - pr_err_once("kvm: pmu event creation failed %ld\n", - PTR_ERR(event)); - return NULL; - } + cpu = raw_smp_processor_id(); + list_for_each_entry(entry, &arm_pmus, entry) { + tmp = entry->arm_pmu; - if (event->pmu) { - pmu = to_arm_pmu(event->pmu); - if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI || - pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) - pmu = NULL; + if (cpumask_test_cpu(cpu, &tmp->supported_cpus)) { + pmu = tmp; + break; + } } - perf_event_disable(event); - perf_event_release_kernel(event); + mutex_unlock(&arm_pmus_lock); return pmu; } @@ -912,7 +908,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return -EBUSY; if (!kvm->arch.arm_pmu) { - /* No PMU set, get the default one */ + /* + * No PMU set, get the default one. + * + * The observant among you will notice that the supported_cpus + * mask does not get updated for the default PMU even though it + * is quite possible the selected instance supports only a + * subset of cores in the system. This is intentional, and + * upholds the preexisting behavior on heterogeneous systems + * where vCPUs can be scheduled on any core but the guest + * counters could stop working. + */ kvm->arch.arm_pmu = kvm_pmu_probe_armpmu(); if (!kvm->arch.arm_pmu) return -ENODEV; diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index 7887133d15f0..121f1a14c829 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -209,3 +209,30 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) kvm_vcpu_pmu_enable_el0(events_host); kvm_vcpu_pmu_disable_el0(events_guest); } + +/* + * With VHE, keep track of the PMUSERENR_EL0 value for the host EL0 on the pCPU + * where PMUSERENR_EL0 for the guest is loaded, since PMUSERENR_EL0 is switched + * to the value for the guest on vcpu_load(). The value for the host EL0 + * will be restored on vcpu_put(), before returning to userspace. + * This isn't necessary for nVHE, as the register is context switched for + * every guest enter/exit. + * + * Return true if KVM takes care of the register. Otherwise return false. + */ +bool kvm_set_pmuserenr(u64 val) +{ + struct kvm_cpu_context *hctxt; + struct kvm_vcpu *vcpu; + + if (!kvm_arm_support_pmu_v3() || !has_vhe()) + return false; + + vcpu = kvm_get_running_vcpu(); + if (!vcpu || !vcpu_get_flag(vcpu, PMUSERENR_ON_CPU)) + return false; + + hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val; + return true; +} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 71b12094d613..5b5d5e5449dc 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -211,6 +211,19 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, return true; } +static bool access_dcgsw(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (!kvm_has_mte(vcpu->kvm)) { + kvm_inject_undefined(vcpu); + return false; + } + + /* Treat MTE S/W ops as we treat the classic ones: with contempt */ + return access_dcsw(vcpu, p, r); +} + static void get_access_mask(const struct sys_reg_desc *r, u64 *mask, u64 *shift) { switch (r->aarch32_map) { @@ -388,9 +401,9 @@ static bool trap_oslar_el1(struct kvm_vcpu *vcpu, return read_from_write_only(vcpu, p, r); /* Forward the OSLK bit to OSLSR */ - oslsr = __vcpu_sys_reg(vcpu, OSLSR_EL1) & ~SYS_OSLSR_OSLK; - if (p->regval & SYS_OSLAR_OSLK) - oslsr |= SYS_OSLSR_OSLK; + oslsr = __vcpu_sys_reg(vcpu, OSLSR_EL1) & ~OSLSR_EL1_OSLK; + if (p->regval & OSLAR_EL1_OSLK) + oslsr |= OSLSR_EL1_OSLK; __vcpu_sys_reg(vcpu, OSLSR_EL1) = oslsr; return true; @@ -414,7 +427,7 @@ static int set_oslsr_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, * The only modifiable bit is the OSLK bit. Refuse the write if * userspace attempts to change any other bit in the register. */ - if ((val ^ rd->val) & ~SYS_OSLSR_OSLK) + if ((val ^ rd->val) & ~OSLSR_EL1_OSLK) return -EINVAL; __vcpu_sys_reg(vcpu, rd->reg) = val; @@ -1252,6 +1265,7 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3)); if (!cpus_have_final_cap(ARM64_HAS_WFXT)) val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT); + val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_MOPS); break; case SYS_ID_AA64DFR0_EL1: /* Limit debug to ARMv8.0 */ @@ -1756,8 +1770,14 @@ static bool access_spsr(struct kvm_vcpu *vcpu, */ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_DC_ISW), access_dcsw }, + { SYS_DESC(SYS_DC_IGSW), access_dcgsw }, + { SYS_DESC(SYS_DC_IGDSW), access_dcgsw }, { SYS_DESC(SYS_DC_CSW), access_dcsw }, + { SYS_DESC(SYS_DC_CGSW), access_dcgsw }, + { SYS_DESC(SYS_DC_CGDSW), access_dcgsw }, { SYS_DESC(SYS_DC_CISW), access_dcsw }, + { SYS_DESC(SYS_DC_CIGSW), access_dcgsw }, + { SYS_DESC(SYS_DC_CIGDSW), access_dcgsw }, DBG_BCR_BVR_WCR_WVR_EL1(0), DBG_BCR_BVR_WCR_WVR_EL1(1), @@ -1781,7 +1801,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_MDRAR_EL1), trap_raz_wi }, { SYS_DESC(SYS_OSLAR_EL1), trap_oslar_el1 }, { SYS_DESC(SYS_OSLSR_EL1), trap_oslsr_el1, reset_val, OSLSR_EL1, - SYS_OSLSR_OSLM_IMPLEMENTED, .set_user = set_oslsr_el1, }, + OSLSR_EL1_OSLM_IMPLEMENTED, .set_user = set_oslsr_el1, }, { SYS_DESC(SYS_OSDLR_EL1), trap_raz_wi }, { SYS_DESC(SYS_DBGPRCR_EL1), trap_raz_wi }, { SYS_DESC(SYS_DBGCLAIMSET_EL1), trap_raz_wi }, @@ -1872,7 +1892,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_SANITISED(ID_AA64MMFR0_EL1), ID_SANITISED(ID_AA64MMFR1_EL1), ID_SANITISED(ID_AA64MMFR2_EL1), - ID_UNALLOCATED(7,3), + ID_SANITISED(ID_AA64MMFR3_EL1), ID_UNALLOCATED(7,4), ID_UNALLOCATED(7,5), ID_UNALLOCATED(7,6), @@ -1892,6 +1912,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 }, { SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 }, { SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 }, + { SYS_DESC(SYS_TCR2_EL1), access_vm_reg, reset_val, TCR2_EL1, 0 }, PTRAUTH_KEY(APIA), PTRAUTH_KEY(APIB), @@ -1941,6 +1962,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi }, { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, + { SYS_DESC(SYS_PIRE0_EL1), access_vm_reg, reset_unknown, PIRE0_EL1 }, + { SYS_DESC(SYS_PIR_EL1), access_vm_reg, reset_unknown, PIR_EL1 }, { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, { SYS_DESC(SYS_LORSA_EL1), trap_loregion }, diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 9d42c7cb2b58..c8c3cb812783 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -235,9 +235,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) * KVM io device for the redistributor that belongs to this VCPU. */ if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { - mutex_lock(&vcpu->kvm->arch.config_lock); + mutex_lock(&vcpu->kvm->slots_lock); ret = vgic_register_redist_iodev(vcpu); - mutex_unlock(&vcpu->kvm->arch.config_lock); + mutex_unlock(&vcpu->kvm->slots_lock); } return ret; } @@ -406,7 +406,7 @@ void kvm_vgic_destroy(struct kvm *kvm) /** * vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest - * is a GICv2. A GICv3 must be explicitly initialized by the guest using the + * is a GICv2. A GICv3 must be explicitly initialized by userspace using the * KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group. * @kvm: kvm struct pointer */ @@ -446,11 +446,14 @@ int vgic_lazy_init(struct kvm *kvm) int kvm_vgic_map_resources(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; + enum vgic_type type; + gpa_t dist_base; int ret = 0; if (likely(vgic_ready(kvm))) return 0; + mutex_lock(&kvm->slots_lock); mutex_lock(&kvm->arch.config_lock); if (vgic_ready(kvm)) goto out; @@ -458,18 +461,33 @@ int kvm_vgic_map_resources(struct kvm *kvm) if (!irqchip_in_kernel(kvm)) goto out; - if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) + if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) { ret = vgic_v2_map_resources(kvm); - else + type = VGIC_V2; + } else { ret = vgic_v3_map_resources(kvm); + type = VGIC_V3; + } - if (ret) + if (ret) { __kvm_vgic_destroy(kvm); - else - dist->ready = true; + goto out; + } + dist->ready = true; + dist_base = dist->vgic_dist_base; + mutex_unlock(&kvm->arch.config_lock); + + ret = vgic_register_dist_iodev(kvm, dist_base, type); + if (ret) { + kvm_err("Unable to register VGIC dist MMIO regions\n"); + kvm_vgic_destroy(kvm); + } + mutex_unlock(&kvm->slots_lock); + return ret; out: mutex_unlock(&kvm->arch.config_lock); + mutex_unlock(&kvm->slots_lock); return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 750e51e3779a..5fe2365a629f 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -1936,6 +1936,7 @@ void vgic_lpi_translation_cache_destroy(struct kvm *kvm) static int vgic_its_create(struct kvm_device *dev, u32 type) { + int ret; struct vgic_its *its; if (type != KVM_DEV_TYPE_ARM_VGIC_ITS) @@ -1945,9 +1946,12 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) if (!its) return -ENOMEM; + mutex_lock(&dev->kvm->arch.config_lock); + if (vgic_initialized(dev->kvm)) { - int ret = vgic_v4_init(dev->kvm); + ret = vgic_v4_init(dev->kvm); if (ret < 0) { + mutex_unlock(&dev->kvm->arch.config_lock); kfree(its); return ret; } @@ -1960,12 +1964,10 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) /* Yep, even more trickery for lock ordering... */ #ifdef CONFIG_LOCKDEP - mutex_lock(&dev->kvm->arch.config_lock); mutex_lock(&its->cmd_lock); mutex_lock(&its->its_lock); mutex_unlock(&its->its_lock); mutex_unlock(&its->cmd_lock); - mutex_unlock(&dev->kvm->arch.config_lock); #endif its->vgic_its_base = VGIC_ADDR_UNDEF; @@ -1986,7 +1988,11 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) dev->private = its; - return vgic_its_set_abi(its, NR_ITS_ABIS - 1); + ret = vgic_its_set_abi(its, NR_ITS_ABIS - 1); + + mutex_unlock(&dev->kvm->arch.config_lock); + + return ret; } static void vgic_its_destroy(struct kvm_device *kvm_dev) diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 35cfa268fd5d..212b73a715c1 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -102,7 +102,11 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri if (get_user(addr, uaddr)) return -EFAULT; - mutex_lock(&kvm->arch.config_lock); + /* + * Since we can't hold config_lock while registering the redistributor + * iodevs, take the slots_lock immediately. + */ + mutex_lock(&kvm->slots_lock); switch (attr->attr) { case KVM_VGIC_V2_ADDR_TYPE_DIST: r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); @@ -182,6 +186,7 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri if (r) goto out; + mutex_lock(&kvm->arch.config_lock); if (write) { r = vgic_check_iorange(kvm, *addr_ptr, addr, alignment, size); if (!r) @@ -189,9 +194,10 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri } else { addr = *addr_ptr; } + mutex_unlock(&kvm->arch.config_lock); out: - mutex_unlock(&kvm->arch.config_lock); + mutex_unlock(&kvm->slots_lock); if (!r && !write) r = put_user(addr, uaddr); diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 472b18ac92a2..188d2187eede 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -769,10 +769,13 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; struct vgic_redist_region *rdreg; gpa_t rd_base; - int ret; + int ret = 0; + + lockdep_assert_held(&kvm->slots_lock); + mutex_lock(&kvm->arch.config_lock); if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) - return 0; + goto out_unlock; /* * We may be creating VCPUs before having set the base address for the @@ -782,10 +785,12 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) */ rdreg = vgic_v3_rdist_free_slot(&vgic->rd_regions); if (!rdreg) - return 0; + goto out_unlock; - if (!vgic_v3_check_base(kvm)) - return -EINVAL; + if (!vgic_v3_check_base(kvm)) { + ret = -EINVAL; + goto out_unlock; + } vgic_cpu->rdreg = rdreg; vgic_cpu->rdreg_index = rdreg->free_index; @@ -799,16 +804,20 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rd_registers); rd_dev->redist_vcpu = vcpu; - mutex_lock(&kvm->slots_lock); + mutex_unlock(&kvm->arch.config_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base, 2 * SZ_64K, &rd_dev->dev); - mutex_unlock(&kvm->slots_lock); - if (ret) return ret; + /* Protected by slots_lock */ rdreg->free_index++; return 0; + +out_unlock: + mutex_unlock(&kvm->arch.config_lock); + return ret; } static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) @@ -834,12 +843,10 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm) /* The current c failed, so iterate over the previous ones. */ int i; - mutex_lock(&kvm->slots_lock); for (i = 0; i < c; i++) { vcpu = kvm_get_vcpu(kvm, i); vgic_unregister_redist_iodev(vcpu); } - mutex_unlock(&kvm->slots_lock); } return ret; @@ -938,7 +945,9 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count) { int ret; + mutex_lock(&kvm->arch.config_lock); ret = vgic_v3_alloc_redist_region(kvm, index, addr, count); + mutex_unlock(&kvm->arch.config_lock); if (ret) return ret; @@ -950,8 +959,10 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count) if (ret) { struct vgic_redist_region *rdreg; + mutex_lock(&kvm->arch.config_lock); rdreg = vgic_v3_rdist_region_from_index(kvm, index); vgic_v3_free_redist_region(rdreg); + mutex_unlock(&kvm->arch.config_lock); return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index 1939c94e0b24..ff558c05e990 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -1096,7 +1096,6 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, enum vgic_type type) { struct vgic_io_device *io_device = &kvm->arch.vgic.dist_iodev; - int ret = 0; unsigned int len; switch (type) { @@ -1114,10 +1113,6 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, io_device->iodev_type = IODEV_DIST; io_device->redist_vcpu = NULL; - mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address, - len, &io_device->dev); - mutex_unlock(&kvm->slots_lock); - - return ret; + return kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address, + len, &io_device->dev); } diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index 645648349c99..7e9cdb78f7ce 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -312,12 +312,6 @@ int vgic_v2_map_resources(struct kvm *kvm) return ret; } - ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V2); - if (ret) { - kvm_err("Unable to register VGIC MMIO regions\n"); - return ret; - } - if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) { ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, kvm_vgic_global_state.vcpu_base, diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 469d816f356f..c3b8e132d599 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -539,7 +539,6 @@ int vgic_v3_map_resources(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu; - int ret = 0; unsigned long c; kvm_for_each_vcpu(c, vcpu, kvm) { @@ -569,12 +568,6 @@ int vgic_v3_map_resources(struct kvm *kvm) return -EBUSY; } - ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V3); - if (ret) { - kvm_err("Unable to register VGICv3 dist MMIO regions\n"); - return ret; - } - if (kvm_vgic_global_state.has_gicv4_1) vgic_v4_configure_vsgis(kvm); @@ -616,6 +609,10 @@ static const struct midr_range broken_seis[] = { MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX), MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD), MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX), {}, }; diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index 3bb003478060..c1c28fe680ba 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -184,13 +184,14 @@ static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu) } } -/* Must be called with the kvm lock held */ void vgic_v4_configure_vsgis(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu; unsigned long i; + lockdep_assert_held(&kvm->arch.config_lock); + kvm_arm_halt_guest(kvm); kvm_for_each_vcpu(i, vcpu, kvm) { diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c index 08978d0672e7..7fe8ba1a2851 100644 --- a/arch/arm64/kvm/vmid.c +++ b/arch/arm64/kvm/vmid.c @@ -47,7 +47,7 @@ static void flush_context(void) int cpu; u64 vmid; - bitmap_clear(vmid_map, 0, NUM_USER_VMIDS); + bitmap_zero(vmid_map, NUM_USER_VMIDS); for_each_possible_cpu(cpu) { vmid = atomic64_xchg_relaxed(&per_cpu(active_vmids, cpu), 0); @@ -182,8 +182,7 @@ int __init kvm_arm_vmid_alloc_init(void) */ WARN_ON(NUM_USER_VMIDS - 1 <= num_possible_cpus()); atomic64_set(&vmid_generation, VMID_FIRST_VERSION); - vmid_map = kcalloc(BITS_TO_LONGS(NUM_USER_VMIDS), - sizeof(*vmid_map), GFP_KERNEL); + vmid_map = bitmap_zalloc(NUM_USER_VMIDS, GFP_KERNEL); if (!vmid_map) return -ENOMEM; @@ -192,5 +191,5 @@ int __init kvm_arm_vmid_alloc_init(void) void __init kvm_arm_vmid_alloc_free(void) { - kfree(vmid_map); + bitmap_free(vmid_map); } diff --git a/arch/arm64/lib/xor-neon.c b/arch/arm64/lib/xor-neon.c index 96b171995d19..f9a53b7f9842 100644 --- a/arch/arm64/lib/xor-neon.c +++ b/arch/arm64/lib/xor-neon.c @@ -10,7 +10,7 @@ #include <linux/module.h> #include <asm/neon-intrinsics.h> -void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1, +static void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2) { uint64_t *dp1 = (uint64_t *)p1; @@ -37,7 +37,7 @@ void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } -void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1, +static void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2, const unsigned long * __restrict p3) { @@ -73,7 +73,7 @@ void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } -void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1, +static void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2, const unsigned long * __restrict p3, const unsigned long * __restrict p4) @@ -118,7 +118,7 @@ void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } -void xor_arm64_neon_5(unsigned long bytes, unsigned long * __restrict p1, +static void xor_arm64_neon_5(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2, const unsigned long * __restrict p3, const unsigned long * __restrict p4, diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index e1e0dca01839..188197590fc9 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -364,8 +364,8 @@ void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm) ttbr1 &= ~TTBR_ASID_MASK; ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid); + cpu_set_reserved_ttbr0_nosync(); write_sysreg(ttbr1, ttbr1_el1); - isb(); write_sysreg(ttbr0, ttbr0_el1); isb(); post_ttbr_update_workaround(); diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 4aadcfb01754..a7bb20055ce0 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -21,9 +21,10 @@ void copy_highpage(struct page *to, struct page *from) copy_page(kto, kfrom); + if (kasan_hw_tags_enabled()) + page_kasan_tag_reset(to); + if (system_supports_mte() && page_mte_tagged(from)) { - if (kasan_hw_tags_enabled()) - page_kasan_tag_reset(to); /* It's a new page, shouldn't have been tagged yet */ WARN_ON_ONCE(!try_page_mte_tagging(to)); mte_copy_page_tags(kto, kfrom); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 9e0db5c387e3..c85b6d70b222 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -66,6 +66,8 @@ static inline const struct fault_info *esr_to_debug_fault_info(unsigned long esr static void data_abort_decode(unsigned long esr) { + unsigned long iss2 = ESR_ELx_ISS2(esr); + pr_alert("Data abort info:\n"); if (esr & ESR_ELx_ISV) { @@ -78,12 +80,21 @@ static void data_abort_decode(unsigned long esr) (esr & ESR_ELx_SF) >> ESR_ELx_SF_SHIFT, (esr & ESR_ELx_AR) >> ESR_ELx_AR_SHIFT); } else { - pr_alert(" ISV = 0, ISS = 0x%08lx\n", esr & ESR_ELx_ISS_MASK); + pr_alert(" ISV = 0, ISS = 0x%08lx, ISS2 = 0x%08lx\n", + esr & ESR_ELx_ISS_MASK, iss2); } - pr_alert(" CM = %lu, WnR = %lu\n", + pr_alert(" CM = %lu, WnR = %lu, TnD = %lu, TagAccess = %lu\n", (esr & ESR_ELx_CM) >> ESR_ELx_CM_SHIFT, - (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT); + (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT, + (iss2 & ESR_ELx_TnD) >> ESR_ELx_TnD_SHIFT, + (iss2 & ESR_ELx_TagAccess) >> ESR_ELx_TagAccess_SHIFT); + + pr_alert(" GCS = %ld, Overlay = %lu, DirtyBit = %lu, Xs = %llu\n", + (iss2 & ESR_ELx_GCS) >> ESR_ELx_GCS_SHIFT, + (iss2 & ESR_ELx_Overlay) >> ESR_ELx_Overlay_SHIFT, + (iss2 & ESR_ELx_DirtyBit) >> ESR_ELx_DirtyBit_SHIFT, + (iss2 & ESR_ELx_Xs_MASK) >> ESR_ELx_Xs_SHIFT); } static void mem_abort_decode(unsigned long esr) @@ -480,8 +491,8 @@ static void do_bad_area(unsigned long far, unsigned long esr, } } -#define VM_FAULT_BADMAP 0x010000 -#define VM_FAULT_BADACCESS 0x020000 +#define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000) +#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000) static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int mm_flags, unsigned long vm_flags, @@ -600,8 +611,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, vma_end_read(vma); goto lock_mmap; } - fault = handle_mm_fault(vma, addr & PAGE_MASK, - mm_flags | FAULT_FLAG_VMA_LOCK, regs); + fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs); vma_end_read(vma); if (!(fault & VM_FAULT_RETRY)) { @@ -886,9 +896,6 @@ void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs) } NOKPROBE_SYMBOL(do_sp_pc_abort); -int __init early_brk64(unsigned long addr, unsigned long esr, - struct pt_regs *regs); - /* * __refdata because early_brk64 is __init, but the reference to it is * clobbered at arch_initcall time. diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 5f9379b3c8c8..4e6476094952 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -8,6 +8,7 @@ #include <linux/export.h> #include <linux/mm.h> +#include <linux/libnvdimm.h> #include <linux/pagemap.h> #include <asm/cacheflush.h> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 66e70ca47680..c28c2c8483cc 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -69,6 +69,7 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit; #define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit #define CRASH_ADDR_HIGH_MAX (PHYS_MASK + 1) +#define CRASH_HIGH_SEARCH_BASE SZ_4G #define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) @@ -101,12 +102,13 @@ static int __init reserve_crashkernel_low(unsigned long long low_size) */ static void __init reserve_crashkernel(void) { - unsigned long long crash_base, crash_size; - unsigned long long crash_low_size = 0; + unsigned long long crash_low_size = 0, search_base = 0; unsigned long long crash_max = CRASH_ADDR_LOW_MAX; + unsigned long long crash_base, crash_size; char *cmdline = boot_command_line; - int ret; bool fixed_base = false; + bool high = false; + int ret; if (!IS_ENABLED(CONFIG_KEXEC_CORE)) return; @@ -129,7 +131,9 @@ static void __init reserve_crashkernel(void) else if (ret) return; + search_base = CRASH_HIGH_SEARCH_BASE; crash_max = CRASH_ADDR_HIGH_MAX; + high = true; } else if (ret || !crash_size) { /* The specified value is invalid */ return; @@ -140,31 +144,51 @@ static void __init reserve_crashkernel(void) /* User specifies base address explicitly. */ if (crash_base) { fixed_base = true; + search_base = crash_base; crash_max = crash_base + crash_size; } retry: crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, - crash_base, crash_max); + search_base, crash_max); if (!crash_base) { /* - * If the first attempt was for low memory, fall back to - * high memory, the minimum required low memory will be - * reserved later. + * For crashkernel=size[KMG]@offset[KMG], print out failure + * message if can't reserve the specified region. */ - if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) { + if (fixed_base) { + pr_warn("crashkernel reservation failed - memory is in use.\n"); + return; + } + + /* + * For crashkernel=size[KMG], if the first attempt was for + * low memory, fall back to high memory, the minimum required + * low memory will be reserved later. + */ + if (!high && crash_max == CRASH_ADDR_LOW_MAX) { crash_max = CRASH_ADDR_HIGH_MAX; + search_base = CRASH_ADDR_LOW_MAX; crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; goto retry; } + /* + * For crashkernel=size[KMG],high, if the first attempt was + * for high memory, fall back to low memory. + */ + if (high && crash_max == CRASH_ADDR_HIGH_MAX) { + crash_max = CRASH_ADDR_LOW_MAX; + search_base = 0; + goto retry; + } pr_warn("cannot allocate crashkernel (size:0x%llx)\n", crash_size); return; } - if ((crash_base > CRASH_ADDR_LOW_MAX - crash_low_size) && - crash_low_size && reserve_crashkernel_low(crash_low_size)) { + if ((crash_base >= CRASH_ADDR_LOW_MAX) && crash_low_size && + reserve_crashkernel_low(crash_low_size)) { memblock_phys_free(crash_base, crash_size); return; } diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index e969e68de005..f17d066e85eb 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -214,7 +214,7 @@ static void __init clear_pgds(unsigned long start, static void __init kasan_init_shadow(void) { u64 kimg_shadow_start, kimg_shadow_end; - u64 mod_shadow_start, mod_shadow_end; + u64 mod_shadow_start; u64 vmalloc_shadow_end; phys_addr_t pa_start, pa_end; u64 i; @@ -223,7 +223,6 @@ static void __init kasan_init_shadow(void) kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(KERNEL_END)); mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR); - mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END); vmalloc_shadow_end = (u64)kasan_mem_to_shadow((void *)VMALLOC_END); @@ -246,17 +245,9 @@ static void __init kasan_init_shadow(void) kasan_populate_early_shadow(kasan_mem_to_shadow((void *)PAGE_END), (void *)mod_shadow_start); - if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { - BUILD_BUG_ON(VMALLOC_START != MODULES_END); - kasan_populate_early_shadow((void *)vmalloc_shadow_end, - (void *)KASAN_SHADOW_END); - } else { - kasan_populate_early_shadow((void *)kimg_shadow_end, - (void *)KASAN_SHADOW_END); - if (kimg_shadow_start > mod_shadow_end) - kasan_populate_early_shadow((void *)mod_shadow_end, - (void *)kimg_shadow_start); - } + BUILD_BUG_ON(VMALLOC_START != MODULES_END); + kasan_populate_early_shadow((void *)vmalloc_shadow_end, + (void *)KASAN_SHADOW_END); for_each_mem_range(i, &pa_start, &pa_end) { void *start = (void *)__phys_to_virt(pa_start); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index af6bc8403ee4..95d360805f8a 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -451,7 +451,7 @@ static phys_addr_t pgd_pgtable_alloc(int shift) void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { + if (virt < PAGE_OFFSET) { pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; @@ -478,7 +478,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, static void update_mapping_prot(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { + if (virt < PAGE_OFFSET) { pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; @@ -663,12 +663,17 @@ static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end, vm_area_add_early(vma); } +static pgprot_t kernel_exec_prot(void) +{ + return rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; +} + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 static int __init map_entry_trampoline(void) { int i; - pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; + pgprot_t prot = kernel_exec_prot(); phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); /* The trampoline is always mapped and can therefore be global */ @@ -723,7 +728,7 @@ static void __init map_kernel(pgd_t *pgdp) * mapping to install SW breakpoints. Allow this (only) when * explicitly requested with rodata=off. */ - pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; + pgprot_t text_prot = kernel_exec_prot(); /* * If we have a CPU that supports BTI and a kernel built for diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c2cb437821ca..2baeec419f62 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -199,7 +199,7 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1) #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -#define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) +#define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS | PTE_WRITE) .pushsection ".idmap.text", "a" @@ -290,7 +290,7 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings) isb mov temp_pte, x5 - mov pte_flags, #KPTI_NG_PTE_FLAGS + mov_q pte_flags, KPTI_NG_PTE_FLAGS /* Everybody is enjoying the idmap, so we can rewrite swapper. */ /* PGD */ @@ -454,6 +454,21 @@ SYM_FUNC_START(__cpu_setup) #endif /* CONFIG_ARM64_HW_AFDBM */ msr mair_el1, mair msr tcr_el1, tcr + + mrs_s x1, SYS_ID_AA64MMFR3_EL1 + ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4 + cbz x1, .Lskip_indirection + + mov_q x0, PIE_E0 + msr REG_PIRE0_EL1, x0 + mov_q x0, PIE_E1 + msr REG_PIR_EL1, x0 + + mov x0, TCR2_EL1x_PIE + msr REG_TCR2_EL1, x0 + +.Lskip_indirection: + /* * Prepare SCTLR */ diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 40ba95472594..19c23c4fa2da 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -32,16 +32,20 @@ HAS_GENERIC_AUTH_IMP_DEF HAS_GIC_CPUIF_SYSREGS HAS_GIC_PRIO_MASKING HAS_GIC_PRIO_RELAXED_SYNC +HAS_HCX HAS_LDAPR HAS_LSE_ATOMICS +HAS_MOPS HAS_NESTED_VIRT HAS_NO_FPSIMD HAS_NO_HW_PREFETCH HAS_PAN +HAS_S1PIE HAS_RAS_EXTN HAS_RNG HAS_SB HAS_STAGE2_FWB +HAS_TCR2 HAS_TIDCP1 HAS_TLB_RANGE HAS_VIRT_HOST_EXTN diff --git a/arch/arm64/tools/gen-cpucaps.awk b/arch/arm64/tools/gen-cpucaps.awk index 00c9e72a200a..8525980379d7 100755 --- a/arch/arm64/tools/gen-cpucaps.awk +++ b/arch/arm64/tools/gen-cpucaps.awk @@ -24,12 +24,12 @@ BEGIN { } /^[vA-Z0-9_]+$/ { - printf("#define ARM64_%-30s\t%d\n", $0, cap_num++) + printf("#define ARM64_%-40s\t%d\n", $0, cap_num++) next } END { - printf("#define ARM64_NCAPS\t\t\t\t%d\n", cap_num) + printf("#define ARM64_NCAPS\t\t\t\t\t%d\n", cap_num) print "" print "#endif /* __ASM_CPUCAPS_H */" } diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index c9a0d1fa3209..1ea4a3dc68f8 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -48,6 +48,61 @@ # feature that introduces them (eg, FEAT_LS64_ACCDATA introduces enumeration # item ACCDATA) though it may be more taseful to do something else. +Sysreg OSDTRRX_EL1 2 0 0 0 2 +Res0 63:32 +Field 31:0 DTRRX +EndSysreg + +Sysreg MDCCINT_EL1 2 0 0 2 0 +Res0 63:31 +Field 30 RX +Field 29 TX +Res0 28:0 +EndSysreg + +Sysreg MDSCR_EL1 2 0 0 2 2 +Res0 63:36 +Field 35 EHBWE +Field 34 EnSPM +Field 33 TTA +Field 32 EMBWE +Field 31 TFO +Field 30 RXfull +Field 29 TXfull +Res0 28 +Field 27 RXO +Field 26 TXU +Res0 25:24 +Field 23:22 INTdis +Field 21 TDA +Res0 20 +Field 19 SC2 +Res0 18:16 +Field 15 MDE +Field 14 HDE +Field 13 KDE +Field 12 TDCC +Res0 11:7 +Field 6 ERR +Res0 5:1 +Field 0 SS +EndSysreg + +Sysreg OSDTRTX_EL1 2 0 0 3 2 +Res0 63:32 +Field 31:0 DTRTX +EndSysreg + +Sysreg OSECCR_EL1 2 0 0 6 2 +Res0 63:32 +Field 31:0 EDECCR +EndSysreg + +Sysreg OSLAR_EL1 2 0 1 0 4 +Res0 63:1 +Field 0 OSLK +EndSysreg + Sysreg ID_PFR0_EL1 3 0 0 1 0 Res0 63:32 UnsignedEnum 31:28 RAS @@ -1538,6 +1593,78 @@ UnsignedEnum 3:0 CnP EndEnum EndSysreg +Sysreg ID_AA64MMFR3_EL1 3 0 0 7 3 +UnsignedEnum 63:60 Spec_FPACC + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 59:56 ADERR + 0b0000 NI + 0b0001 DEV_ASYNC + 0b0010 FEAT_ADERR + 0b0011 FEAT_ADERR_IND +EndEnum +UnsignedEnum 55:52 SDERR + 0b0000 NI + 0b0001 DEV_SYNC + 0b0010 FEAT_ADERR + 0b0011 FEAT_ADERR_IND +EndEnum +Res0 51:48 +UnsignedEnum 47:44 ANERR + 0b0000 NI + 0b0001 ASYNC + 0b0010 FEAT_ANERR + 0b0011 FEAT_ANERR_IND +EndEnum +UnsignedEnum 43:40 SNERR + 0b0000 NI + 0b0001 SYNC + 0b0010 FEAT_ANERR + 0b0011 FEAT_ANERR_IND +EndEnum +UnsignedEnum 39:36 D128_2 + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 35:32 D128 + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 31:28 MEC + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 27:24 AIE + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 23:20 S2POE + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 19:16 S1POE + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 15:12 S2PIE + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 11:8 S1PIE + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 7:4 SCTLRX + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 3:0 TCRX + 0b0000 NI + 0b0001 IMP +EndEnum +EndSysreg + Sysreg SCTLR_EL1 3 0 1 0 0 Field 63 TIDCP Field 62 SPINTMASK @@ -2034,7 +2161,17 @@ Fields ZCR_ELx EndSysreg Sysreg HCRX_EL2 3 4 1 2 2 -Res0 63:12 +Res0 63:23 +Field 22 GCSEn +Field 21 EnIDCP128 +Field 20 EnSDERR +Field 19 TMEA +Field 18 EnSNERR +Field 17 D128En +Field 16 PTTWI +Field 15 SCTLR2En +Field 14 TCR2En +Res0 13:12 Field 11 MSCEn Field 10 MCE2 Field 9 CMOW @@ -2153,6 +2290,87 @@ Sysreg TTBR1_EL1 3 0 2 0 1 Fields TTBRx_EL1 EndSysreg +SysregFields TCR2_EL1x +Res0 63:16 +Field 15 DisCH1 +Field 14 DisCH0 +Res0 13:12 +Field 11 HAFT +Field 10 PTTWI +Res0 9:6 +Field 5 D128 +Field 4 AIE +Field 3 POE +Field 2 E0POE +Field 1 PIE +Field 0 PnCH +EndSysregFields + +Sysreg TCR2_EL1 3 0 2 0 3 +Fields TCR2_EL1x +EndSysreg + +Sysreg TCR2_EL12 3 5 2 0 3 +Fields TCR2_EL1x +EndSysreg + +Sysreg TCR2_EL2 3 4 2 0 3 +Res0 63:16 +Field 15 DisCH1 +Field 14 DisCH0 +Field 13 AMEC1 +Field 12 AMEC0 +Field 11 HAFT +Field 10 PTTWI +Field 9:8 SKL1 +Field 7:6 SKL0 +Field 5 D128 +Field 4 AIE +Field 3 POE +Field 2 E0POE +Field 1 PIE +Field 0 PnCH +EndSysreg + +SysregFields PIRx_ELx +Field 63:60 Perm15 +Field 59:56 Perm14 +Field 55:52 Perm13 +Field 51:48 Perm12 +Field 47:44 Perm11 +Field 43:40 Perm10 +Field 39:36 Perm9 +Field 35:32 Perm8 +Field 31:28 Perm7 +Field 27:24 Perm6 +Field 23:20 Perm5 +Field 19:16 Perm4 +Field 15:12 Perm3 +Field 11:8 Perm2 +Field 7:4 Perm1 +Field 3:0 Perm0 +EndSysregFields + +Sysreg PIRE0_EL1 3 0 10 2 2 +Fields PIRx_ELx +EndSysreg + +Sysreg PIRE0_EL12 3 5 10 2 2 +Fields PIRx_ELx +EndSysreg + +Sysreg PIR_EL1 3 0 10 2 3 +Fields PIRx_ELx +EndSysreg + +Sysreg PIR_EL12 3 5 10 2 3 +Fields PIRx_ELx +EndSysreg + +Sysreg PIR_EL2 3 4 10 2 3 +Fields PIRx_ELx +EndSysreg + Sysreg LORSA_EL1 3 0 10 4 0 Res0 63:52 Field 51:16 SA @@ -2200,3 +2418,80 @@ Sysreg ICC_NMIAR1_EL1 3 0 12 9 5 Res0 63:24 Field 23:0 INTID EndSysreg + +Sysreg TRBLIMITR_EL1 3 0 9 11 0 +Field 63:12 LIMIT +Res0 11:7 +Field 6 XE +Field 5 nVM +Enum 4:3 TM + 0b00 STOP + 0b01 IRQ + 0b11 IGNR +EndEnum +Enum 2:1 FM + 0b00 FILL + 0b01 WRAP + 0b11 CBUF +EndEnum +Field 0 E +EndSysreg + +Sysreg TRBPTR_EL1 3 0 9 11 1 +Field 63:0 PTR +EndSysreg + +Sysreg TRBBASER_EL1 3 0 9 11 2 +Field 63:12 BASE +Res0 11:0 +EndSysreg + +Sysreg TRBSR_EL1 3 0 9 11 3 +Res0 63:56 +Field 55:32 MSS2 +Field 31:26 EC +Res0 25:24 +Field 23 DAT +Field 22 IRQ +Field 21 TRG +Field 20 WRAP +Res0 19 +Field 18 EA +Field 17 S +Res0 16 +Field 15:0 MSS +EndSysreg + +Sysreg TRBMAR_EL1 3 0 9 11 4 +Res0 63:12 +Enum 11:10 PAS + 0b00 SECURE + 0b01 NON_SECURE + 0b10 ROOT + 0b11 REALM +EndEnum +Enum 9:8 SH + 0b00 NON_SHAREABLE + 0b10 OUTER_SHAREABLE + 0b11 INNER_SHAREABLE +EndEnum +Field 7:0 Attr +EndSysreg + +Sysreg TRBTRG_EL1 3 0 9 11 6 +Res0 63:32 +Field 31:0 TRG +EndSysreg + +Sysreg TRBIDR_EL1 3 0 9 11 7 +Res0 63:12 +Enum 11:8 EA + 0b0000 NON_DESC + 0b0001 IGNORE + 0b0010 SERROR +EndEnum +Res0 7:6 +Field 5 F +Field 4 P +Field 3:0 Align +EndSysreg diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 4df1f8c9d170..95f1e9bfd1c7 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -96,6 +96,7 @@ config CSKY select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select MAY_HAVE_SPARSE_IRQ select MODULES_USE_ELF_RELA if MODULES select OF diff --git a/arch/csky/include/asm/atomic.h b/arch/csky/include/asm/atomic.h index 60406ef9c2bb..4dab44f6143a 100644 --- a/arch/csky/include/asm/atomic.h +++ b/arch/csky/include/asm/atomic.h @@ -195,41 +195,6 @@ arch_atomic_dec_if_positive(atomic_t *v) } #define arch_atomic_dec_if_positive arch_atomic_dec_if_positive -#define ATOMIC_OP() \ -static __always_inline \ -int arch_atomic_xchg_relaxed(atomic_t *v, int n) \ -{ \ - return __xchg_relaxed(n, &(v->counter), 4); \ -} \ -static __always_inline \ -int arch_atomic_cmpxchg_relaxed(atomic_t *v, int o, int n) \ -{ \ - return __cmpxchg_relaxed(&(v->counter), o, n, 4); \ -} \ -static __always_inline \ -int arch_atomic_cmpxchg_acquire(atomic_t *v, int o, int n) \ -{ \ - return __cmpxchg_acquire(&(v->counter), o, n, 4); \ -} \ -static __always_inline \ -int arch_atomic_cmpxchg(atomic_t *v, int o, int n) \ -{ \ - return __cmpxchg(&(v->counter), o, n, 4); \ -} - -#define ATOMIC_OPS() \ - ATOMIC_OP() - -ATOMIC_OPS() - -#define arch_atomic_xchg_relaxed arch_atomic_xchg_relaxed -#define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed -#define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire -#define arch_atomic_cmpxchg arch_atomic_cmpxchg - -#undef ATOMIC_OPS -#undef ATOMIC_OP - #else #include <asm-generic/atomic.h> #endif diff --git a/arch/csky/include/asm/smp.h b/arch/csky/include/asm/smp.h index 668b79ce29ea..d3db334f3196 100644 --- a/arch/csky/include/asm/smp.h +++ b/arch/csky/include/asm/smp.h @@ -23,7 +23,7 @@ void __init set_send_ipi(void (*func)(const struct cpumask *mask), int irq); int __cpu_disable(void); -void __cpu_die(unsigned int cpu); +static inline void __cpu_die(unsigned int cpu) { } #endif /* CONFIG_SMP */ diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c index b12e2c3c387f..8e42352cbf12 100644 --- a/arch/csky/kernel/smp.c +++ b/arch/csky/kernel/smp.c @@ -291,12 +291,8 @@ int __cpu_disable(void) return 0; } -void __cpu_die(unsigned int cpu) +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { - if (!cpu_wait_death(cpu, 5)) { - pr_crit("CPU%u: shutdown failed\n", cpu); - return; - } pr_notice("CPU%u: shutdown\n", cpu); } @@ -304,7 +300,7 @@ void __noreturn arch_cpu_idle_dead(void) { idle_task_exit(); - cpu_report_death(); + cpuhp_ap_report_dead(); while (!secondary_stack) arch_cpu_idle(); diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index 6e94f8d04146..2447d083c432 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h @@ -28,58 +28,8 @@ static inline void arch_atomic_set(atomic_t *v, int new) #define arch_atomic_set_release(v, i) arch_atomic_set((v), (i)) -/** - * arch_atomic_read - reads a word, atomically - * @v: pointer to atomic value - * - * Assumes all word reads on our architecture are atomic. - */ #define arch_atomic_read(v) READ_ONCE((v)->counter) -/** - * arch_atomic_xchg - atomic - * @v: pointer to memory to change - * @new: new value (technically passed in a register -- see xchg) - */ -#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), (new))) - - -/** - * arch_atomic_cmpxchg - atomic compare-and-exchange values - * @v: pointer to value to change - * @old: desired old value to match - * @new: new value to put in - * - * Parameters are then pointer, value-in-register, value-in-register, - * and the output is the old value. - * - * Apparently this is complicated for archs that don't support - * the memw_locked like we do (or it's broken or whatever). - * - * Kind of the lynchpin of the rest of the generically defined routines. - * Remember V2 had that bug with dotnew predicate set by memw_locked. - * - * "old" is "expected" old val, __oldval is actual old value - */ -static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) -{ - int __oldval; - - asm volatile( - "1: %0 = memw_locked(%1);\n" - " { P0 = cmp.eq(%0,%2);\n" - " if (!P0.new) jump:nt 2f; }\n" - " memw_locked(%1,P0) = %3;\n" - " if (!P0) jump 1b;\n" - "2:\n" - : "=&r" (__oldval) - : "r" (&v->counter), "r" (old), "r" (new) - : "memory", "p0" - ); - - return __oldval; -} - #define ATOMIC_OP(op) \ static inline void arch_atomic_##op(int i, atomic_t *v) \ { \ @@ -135,6 +85,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + #undef ATOMIC_OPS #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) @@ -142,21 +97,15 @@ ATOMIC_OPS(and) ATOMIC_OPS(or) ATOMIC_OPS(xor) +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -/** - * arch_atomic_fetch_add_unless - add unless the number is a given value - * @v: pointer to value - * @a: amount to add - * @u: unless value is equal to u - * - * Returns old value. - * - */ - static inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) { int __oldval; diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 21fa63ce5ffc..2cd93e6bf0fe 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -9,6 +9,7 @@ menu "Processor type and features" config IA64 bool select ARCH_BINFMT_ELF_EXTRA_PHDRS + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_DMA_MARK_CLEAN select ARCH_HAS_STRNCPY_FROM_USER select ARCH_HAS_STRNLEN_USER diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 266c429b9137..6540a628d257 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -207,13 +207,6 @@ ATOMIC64_FETCH_OP(xor, ^) #undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP -#define arch_atomic_cmpxchg(v, old, new) (arch_cmpxchg(&((v)->counter), old, new)) -#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new)) - -#define arch_atomic64_cmpxchg(v, old, new) \ - (arch_cmpxchg(&((v)->counter), old, new)) -#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new)) - #define arch_atomic_add(i,v) (void)arch_atomic_add_return((i), (v)) #define arch_atomic_sub(i,v) (void)arch_atomic_sub_return((i), (v)) diff --git a/arch/ia64/include/asm/bugs.h b/arch/ia64/include/asm/bugs.h deleted file mode 100644 index 0d6b9bded56c..000000000000 --- a/arch/ia64/include/asm/bugs.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Needs: - * void check_bugs(void); - * - * Based on <asm-alpha/bugs.h>. - * - * Modified 1998, 1999, 2003 - * David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co. - */ -#ifndef _ASM_IA64_BUGS_H -#define _ASM_IA64_BUGS_H - -#include <asm/processor.h> - -extern void check_bugs (void); - -#endif /* _ASM_IA64_BUGS_H */ diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index c05728044272..5a55ac82c13a 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -627,7 +627,7 @@ setup_arch (char **cmdline_p) * is physical disk 1 partition 1 and the Linux root disk is * physical disk 1 partition 2. */ - ROOT_DEV = Root_SDA2; /* default to second partition on first drive */ + ROOT_DEV = MKDEV(SCSI_DISK0_MAJOR, 2); if (is_uv_system()) uv_setup(cmdline_p); @@ -1067,8 +1067,7 @@ cpu_init (void) } } -void __init -check_bugs (void) +void __init arch_cpu_finalize_init(void) { ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles, (unsigned long) __end___mckinley_e9_bundles); diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index d38b066fc931..cbab4f9ca15c 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -10,6 +10,7 @@ config LOONGARCH select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_PTE_SPECIAL diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h index 6b9aca9ab6e9..e27f0c72d324 100644 --- a/arch/loongarch/include/asm/atomic.h +++ b/arch/loongarch/include/asm/atomic.h @@ -29,21 +29,7 @@ #define ATOMIC_INIT(i) { (i) } -/* - * arch_atomic_read - read atomic variable - * @v: pointer of type atomic_t - * - * Atomically reads the value of @v. - */ #define arch_atomic_read(v) READ_ONCE((v)->counter) - -/* - * arch_atomic_set - set atomic variable - * @v: pointer of type atomic_t - * @i: required value - * - * Atomically sets the value of @v to @i. - */ #define arch_atomic_set(v, i) WRITE_ONCE((v)->counter, (i)) #define ATOMIC_OP(op, I, asm_op) \ @@ -139,14 +125,6 @@ static inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) } #define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless -/* - * arch_atomic_sub_if_positive - conditionally subtract integer from atomic variable - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically test @v and subtract @i if @v is greater or equal than @i. - * The function returns the old value of @v minus @i. - */ static inline int arch_atomic_sub_if_positive(int i, atomic_t *v) { int result; @@ -181,31 +159,13 @@ static inline int arch_atomic_sub_if_positive(int i, atomic_t *v) return result; } -#define arch_atomic_cmpxchg(v, o, n) (arch_cmpxchg(&((v)->counter), (o), (n))) -#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), (new))) - -/* - * arch_atomic_dec_if_positive - decrement by 1 if old value positive - * @v: pointer of type atomic_t - */ #define arch_atomic_dec_if_positive(v) arch_atomic_sub_if_positive(1, v) #ifdef CONFIG_64BIT #define ATOMIC64_INIT(i) { (i) } -/* - * arch_atomic64_read - read atomic variable - * @v: pointer of type atomic64_t - * - */ #define arch_atomic64_read(v) READ_ONCE((v)->counter) - -/* - * arch_atomic64_set - set atomic variable - * @v: pointer of type atomic64_t - * @i: required value - */ #define arch_atomic64_set(v, i) WRITE_ONCE((v)->counter, (i)) #define ATOMIC64_OP(op, I, asm_op) \ @@ -300,14 +260,6 @@ static inline long arch_atomic64_fetch_add_unless(atomic64_t *v, long a, long u) } #define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless -/* - * arch_atomic64_sub_if_positive - conditionally subtract integer from atomic variable - * @i: integer value to subtract - * @v: pointer of type atomic64_t - * - * Atomically test @v and subtract @i if @v is greater or equal than @i. - * The function returns the old value of @v minus @i. - */ static inline long arch_atomic64_sub_if_positive(long i, atomic64_t *v) { long result; @@ -342,14 +294,6 @@ static inline long arch_atomic64_sub_if_positive(long i, atomic64_t *v) return result; } -#define arch_atomic64_cmpxchg(v, o, n) \ - ((__typeof__((v)->counter))arch_cmpxchg(&((v)->counter), (o), (n))) -#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), (new))) - -/* - * arch_atomic64_dec_if_positive - decrement by 1 if old value positive - * @v: pointer of type atomic64_t - */ #define arch_atomic64_dec_if_positive(v) arch_atomic64_sub_if_positive(1, v) #endif /* CONFIG_64BIT */ diff --git a/arch/loongarch/include/asm/bugs.h b/arch/loongarch/include/asm/bugs.h deleted file mode 100644 index 98396535163b..000000000000 --- a/arch/loongarch/include/asm/bugs.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Copyright (C) 2020-2022 Loongson Technology Corporation Limited - */ -#ifndef _ASM_BUGS_H -#define _ASM_BUGS_H - -#include <asm/cpu.h> -#include <asm/cpu-info.h> - -extern void check_bugs(void); - -#endif /* _ASM_BUGS_H */ diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index b3323ab5b78d..1c2a0a2c8830 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -1167,7 +1167,7 @@ static __always_inline void iocsr_write64(u64 val, u32 reg) #ifndef __ASSEMBLY__ -static inline u64 drdtime(void) +static __always_inline u64 drdtime(void) { int rID = 0; u64 val = 0; @@ -1496,7 +1496,7 @@ __BUILD_CSR_OP(tlbidx) #define write_fcsr(dest, val) \ do { \ __asm__ __volatile__( \ - " movgr2fcsr %0, "__stringify(dest)" \n" \ + " movgr2fcsr "__stringify(dest)", %0 \n" \ : : "r" (val)); \ } while (0) diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h index 8b98d22a145b..de46a6b1e9f1 100644 --- a/arch/loongarch/include/asm/pgtable-bits.h +++ b/arch/loongarch/include/asm/pgtable-bits.h @@ -22,12 +22,14 @@ #define _PAGE_PFN_SHIFT 12 #define _PAGE_SWP_EXCLUSIVE_SHIFT 23 #define _PAGE_PFN_END_SHIFT 48 +#define _PAGE_PRESENT_INVALID_SHIFT 60 #define _PAGE_NO_READ_SHIFT 61 #define _PAGE_NO_EXEC_SHIFT 62 #define _PAGE_RPLV_SHIFT 63 /* Used by software */ #define _PAGE_PRESENT (_ULCAST_(1) << _PAGE_PRESENT_SHIFT) +#define _PAGE_PRESENT_INVALID (_ULCAST_(1) << _PAGE_PRESENT_INVALID_SHIFT) #define _PAGE_WRITE (_ULCAST_(1) << _PAGE_WRITE_SHIFT) #define _PAGE_ACCESSED (_ULCAST_(1) << _PAGE_ACCESSED_SHIFT) #define _PAGE_MODIFIED (_ULCAST_(1) << _PAGE_MODIFIED_SHIFT) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index d28fb9dbec59..9a9f9ff9b709 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -213,7 +213,7 @@ static inline int pmd_bad(pmd_t pmd) static inline int pmd_present(pmd_t pmd) { if (unlikely(pmd_val(pmd) & _PAGE_HUGE)) - return !!(pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE)); + return !!(pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PRESENT_INVALID)); return pmd_val(pmd) != (unsigned long)invalid_pte_table; } @@ -558,6 +558,7 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) static inline pmd_t pmd_mkinvalid(pmd_t pmd) { + pmd_val(pmd) |= _PAGE_PRESENT_INVALID; pmd_val(pmd) &= ~(_PAGE_PRESENT | _PAGE_VALID | _PAGE_DIRTY | _PAGE_PROTNONE); return pmd; diff --git a/arch/loongarch/kernel/hw_breakpoint.c b/arch/loongarch/kernel/hw_breakpoint.c index 2406c95b34cc..021b59c248fa 100644 --- a/arch/loongarch/kernel/hw_breakpoint.c +++ b/arch/loongarch/kernel/hw_breakpoint.c @@ -396,6 +396,8 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, if (hw->ctrl.type != LOONGARCH_BREAKPOINT_EXECUTE) alignment_mask = 0x7; + else + alignment_mask = 0x3; offset = hw->address & alignment_mask; hw->address &= ~alignment_mask; diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c index ff28f99b47d7..0491bf453cd4 100644 --- a/arch/loongarch/kernel/perf_event.c +++ b/arch/loongarch/kernel/perf_event.c @@ -271,7 +271,7 @@ static void loongarch_pmu_enable_event(struct hw_perf_event *evt, int idx) WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); /* Make sure interrupt enabled. */ - cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) | + cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base) | (evt->config_base & M_PERFCTL_CONFIG_MASK) | CSR_PERFCTRL_IE; cpu = (event->cpu >= 0) ? event->cpu : smp_processor_id(); @@ -594,7 +594,7 @@ static struct pmu pmu = { static unsigned int loongarch_pmu_perf_event_encode(const struct loongarch_perf_event *pev) { - return (pev->event_id & 0xff); + return M_PERFCTL_EVENT(pev->event_id); } static const struct loongarch_perf_event *loongarch_pmu_map_general_event(int idx) @@ -849,7 +849,7 @@ static void resume_local_counters(void) static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config) { - raw_event.event_id = config & 0xff; + raw_event.event_id = M_PERFCTL_EVENT(config); return &raw_event; } diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 4444b13418f0..78a00359bde3 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -12,6 +12,7 @@ */ #include <linux/init.h> #include <linux/acpi.h> +#include <linux/cpu.h> #include <linux/dmi.h> #include <linux/efi.h> #include <linux/export.h> @@ -37,7 +38,6 @@ #include <asm/addrspace.h> #include <asm/alternative.h> #include <asm/bootinfo.h> -#include <asm/bugs.h> #include <asm/cache.h> #include <asm/cpu.h> #include <asm/dma.h> @@ -87,7 +87,7 @@ const char *get_system_type(void) return "generic-loongson-machine"; } -void __init check_bugs(void) +void __init arch_cpu_finalize_init(void) { alternative_instructions(); } diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index f377e50f3c66..c189e03cd5da 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -190,9 +190,9 @@ static u64 read_const_counter(struct clocksource *clk) return drdtime(); } -static u64 native_sched_clock(void) +static noinstr u64 sched_clock_read(void) { - return read_const_counter(NULL); + return drdtime(); } static struct clocksource clocksource_const = { @@ -211,7 +211,7 @@ int __init constant_clocksource_init(void) res = clocksource_register_hz(&clocksource_const, freq); - sched_clock_register(native_sched_clock, 64, freq); + sched_clock_register(sched_clock_read, 64, freq); pr_info("Constant clock source device register\n"); diff --git a/arch/loongarch/kernel/unaligned.c b/arch/loongarch/kernel/unaligned.c index bdff825d29ef..85fae3d2d71a 100644 --- a/arch/loongarch/kernel/unaligned.c +++ b/arch/loongarch/kernel/unaligned.c @@ -485,7 +485,7 @@ static int __init debugfs_unaligned(void) struct dentry *d; d = debugfs_create_dir("loongarch", NULL); - if (!d) + if (IS_ERR_OR_NULL(d)) return -ENOMEM; debugfs_create_u32("unaligned_instructions_user", diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 40198a1ebe27..dc792b321f1e 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -4,6 +4,7 @@ config M68K default y select ARCH_32BIT_OFF_T select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_CPU_FINALIZE_INIT if MMU select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index b26469a65bc1..62fdca7efce4 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -43,6 +43,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -454,7 +455,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 944a49a129be..5bfbd0444bb5 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -39,6 +39,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -411,7 +412,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index a32dd884fcce..44302f11c9ea 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -46,6 +46,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -431,7 +432,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 23b7805309bd..f3336f1774ec 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -36,6 +36,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -403,7 +404,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 5605ab5c3dcf..2d1bbac68066 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -38,6 +38,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -413,7 +414,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index d0d1f9c33756..b4428dc36102 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -37,6 +37,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -433,7 +434,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 6d04314ce7ea..4cd9fa4cb10c 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -57,6 +57,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -519,7 +520,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index e6f5ae526d08..7ee9ad50f0ad 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -35,6 +35,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -402,7 +403,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index f2d4dff4787a..2488893616dc 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -36,6 +36,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -403,7 +404,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 907eedecd040..ffc676289f87 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -37,6 +37,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -420,7 +421,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 9e3d47008f21..198179657ce0 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -402,7 +402,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index f6540078cb4b..85364f6178d4 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -33,6 +33,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -401,7 +402,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/virt_defconfig b/arch/m68k/configs/virt_defconfig index 8059bd618370..311b57e73316 100644 --- a/arch/m68k/configs/virt_defconfig +++ b/arch/m68k/configs/virt_defconfig @@ -24,8 +24,6 @@ CONFIG_SUN_PARTITION=y CONFIG_SYSV68_PARTITION=y CONFIG_NET=y CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h index cfba83d230fd..4bfbc25f6ecf 100644 --- a/arch/m68k/include/asm/atomic.h +++ b/arch/m68k/include/asm/atomic.h @@ -106,6 +106,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t * v) \ ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + #undef ATOMIC_OPS #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ @@ -115,6 +120,10 @@ ATOMIC_OPS(and, &=, and) ATOMIC_OPS(or, |=, or) ATOMIC_OPS(xor, ^=, eor) +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN @@ -158,12 +167,7 @@ static inline int arch_atomic_inc_and_test(atomic_t *v) } #define arch_atomic_inc_and_test arch_atomic_inc_and_test -#ifdef CONFIG_RMW_INSNS - -#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n))) -#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new)) - -#else /* !CONFIG_RMW_INSNS */ +#ifndef CONFIG_RMW_INSNS static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { @@ -177,6 +181,7 @@ static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) local_irq_restore(flags); return prev; } +#define arch_atomic_cmpxchg arch_atomic_cmpxchg static inline int arch_atomic_xchg(atomic_t *v, int new) { @@ -189,6 +194,7 @@ static inline int arch_atomic_xchg(atomic_t *v, int new) local_irq_restore(flags); return prev; } +#define arch_atomic_xchg arch_atomic_xchg #endif /* !CONFIG_RMW_INSNS */ diff --git a/arch/m68k/include/asm/bugs.h b/arch/m68k/include/asm/bugs.h deleted file mode 100644 index 745530651e0b..000000000000 --- a/arch/m68k/include/asm/bugs.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * include/asm-m68k/bugs.h - * - * Copyright (C) 1994 Linus Torvalds - */ - -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Needs: - * void check_bugs(void); - */ - -#ifdef CONFIG_MMU -extern void check_bugs(void); /* in arch/m68k/kernel/setup.c */ -#else -static void check_bugs(void) -{ -} -#endif diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index fbff1cea62ca..6f1ae01f322c 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -10,6 +10,7 @@ */ #include <linux/kernel.h> +#include <linux/cpu.h> #include <linux/mm.h> #include <linux/sched.h> #include <linux/delay.h> @@ -504,7 +505,7 @@ static int __init proc_hardware_init(void) module_init(proc_hardware_init); #endif -void check_bugs(void) +void __init arch_cpu_finalize_init(void) { #if defined(CONFIG_FPU) && !defined(CONFIG_M68KFPU_EMU) if (m68k_fputype == 0) { diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index b9f6908a31bc..ba468b5f3f0b 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -858,11 +858,17 @@ static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs * } static inline void __user * -get_sigframe(struct ksignal *ksig, size_t frame_size) +get_sigframe(struct ksignal *ksig, struct pt_regs *tregs, size_t frame_size) { unsigned long usp = sigsp(rdusp(), ksig); + unsigned long gap = 0; - return (void __user *)((usp - frame_size) & -8UL); + if (CPU_IS_020_OR_030 && tregs->format == 0xb) { + /* USP is unreliable so use worst-case value */ + gap = 256; + } + + return (void __user *)((usp - gap - frame_size) & -8UL); } static int setup_frame(struct ksignal *ksig, sigset_t *set, @@ -880,7 +886,7 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, return -EFAULT; } - frame = get_sigframe(ksig, sizeof(*frame) + fsize); + frame = get_sigframe(ksig, tregs, sizeof(*frame) + fsize); if (fsize) err |= copy_to_user (frame + 1, regs + 1, fsize); @@ -952,7 +958,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, return -EFAULT; } - frame = get_sigframe(ksig, sizeof(*frame)); + frame = get_sigframe(ksig, tregs, sizeof(*frame)); if (fsize) err |= copy_to_user (&frame->uc.uc_extra, regs + 1, fsize); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index c2f5498d207f..ada18f3be229 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -4,6 +4,7 @@ config MIPS default y select ARCH_32BIT_OFF_T if !64BIT select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_CURRENT_STACK_POINTER if !CC_IS_CLANG || CLANG_VERSION >= 140000 select ARCH_HAS_DEBUG_VIRTUAL if !64BIT select ARCH_HAS_FORTIFY_SOURCE @@ -79,6 +80,7 @@ config MIPS select HAVE_LD_DEAD_CODE_DATA_ELIMINATION select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI + select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -2285,6 +2287,7 @@ config MIPS_CPS select MIPS_CM select MIPS_CPS_PM if HOTPLUG_CPU select SMP + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select SYNC_R4K if (CEVT_R4K || CSRC_R4K) select SYS_SUPPORTS_HOTPLUG_CPU select SYS_SUPPORTS_SCHED_SMT if CPU_MIPSR6 diff --git a/arch/mips/alchemy/common/dbdma.c b/arch/mips/alchemy/common/dbdma.c index 5ab043000409..6a3c890f7bbf 100644 --- a/arch/mips/alchemy/common/dbdma.c +++ b/arch/mips/alchemy/common/dbdma.c @@ -30,6 +30,7 @@ * */ +#include <linux/dma-map-ops.h> /* for dma_default_coherent */ #include <linux/init.h> #include <linux/kernel.h> #include <linux/slab.h> @@ -623,17 +624,18 @@ u32 au1xxx_dbdma_put_source(u32 chanid, dma_addr_t buf, int nbytes, u32 flags) dp->dscr_cmd0 &= ~DSCR_CMD0_IE; /* - * There is an errata on the Au1200/Au1550 parts that could result - * in "stale" data being DMA'ed. It has to do with the snoop logic on - * the cache eviction buffer. DMA_NONCOHERENT is on by default for - * these parts. If it is fixed in the future, these dma_cache_inv will - * just be nothing more than empty macros. See io.h. + * There is an erratum on certain Au1200/Au1550 revisions that could + * result in "stale" data being DMA'ed. It has to do with the snoop + * logic on the cache eviction buffer. dma_default_coherent is set + * to false on these parts. */ - dma_cache_wback_inv((unsigned long)buf, nbytes); + if (!dma_default_coherent) + dma_cache_wback_inv(KSEG0ADDR(buf), nbytes); dp->dscr_cmd0 |= DSCR_CMD0_V; /* Let it rip */ wmb(); /* drain writebuffer */ dma_cache_wback_inv((unsigned long)dp, sizeof(*dp)); ctp->chan_ptr->ddma_dbell = 0; + wmb(); /* force doorbell write out to dma engine */ /* Get next descriptor pointer. */ ctp->put_ptr = phys_to_virt(DSCR_GET_NXTPTR(dp->dscr_nxtptr)); @@ -685,17 +687,18 @@ u32 au1xxx_dbdma_put_dest(u32 chanid, dma_addr_t buf, int nbytes, u32 flags) dp->dscr_source1, dp->dscr_dest0, dp->dscr_dest1); #endif /* - * There is an errata on the Au1200/Au1550 parts that could result in - * "stale" data being DMA'ed. It has to do with the snoop logic on the - * cache eviction buffer. DMA_NONCOHERENT is on by default for these - * parts. If it is fixed in the future, these dma_cache_inv will just - * be nothing more than empty macros. See io.h. + * There is an erratum on certain Au1200/Au1550 revisions that could + * result in "stale" data being DMA'ed. It has to do with the snoop + * logic on the cache eviction buffer. dma_default_coherent is set + * to false on these parts. */ - dma_cache_inv((unsigned long)buf, nbytes); + if (!dma_default_coherent) + dma_cache_inv(KSEG0ADDR(buf), nbytes); dp->dscr_cmd0 |= DSCR_CMD0_V; /* Let it rip */ wmb(); /* drain writebuffer */ dma_cache_wback_inv((unsigned long)dp, sizeof(*dp)); ctp->chan_ptr->ddma_dbell = 0; + wmb(); /* force doorbell write out to dma engine */ /* Get next descriptor pointer. */ ctp->put_ptr = phys_to_virt(DSCR_GET_NXTPTR(dp->dscr_nxtptr)); diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c index 549a6392a3d2..053805cb741c 100644 --- a/arch/mips/bmips/setup.c +++ b/arch/mips/bmips/setup.c @@ -178,7 +178,10 @@ void __init plat_mem_setup(void) ioport_resource.start = 0; ioport_resource.end = ~0; - /* intended to somewhat resemble ARM; see Documentation/arm/booting.rst */ + /* + * intended to somewhat resemble ARM; see + * Documentation/arch/arm/booting.rst + */ if (fw_arg0 == 0 && fw_arg1 == 0xffffffff) dtb = phys_to_virt(fw_arg2); else diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 4212584e6efa..33c09688210f 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -345,6 +345,7 @@ void play_dead(void) int cpu = cpu_number_map(cvmx_get_core_num()); idle_task_exit(); + cpuhp_ap_report_dead(); octeon_processor_boot = 0xff; per_cpu(cpu_state, cpu) = CPU_DEAD; diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 712fb5a6a568..ba188e77768b 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -33,17 +33,6 @@ static __always_inline void arch_##pfx##_set(pfx##_t *v, type i) \ { \ WRITE_ONCE(v->counter, i); \ } \ - \ -static __always_inline type \ -arch_##pfx##_cmpxchg(pfx##_t *v, type o, type n) \ -{ \ - return arch_cmpxchg(&v->counter, o, n); \ -} \ - \ -static __always_inline type arch_##pfx##_xchg(pfx##_t *v, type n) \ -{ \ - return arch_xchg(&v->counter, n); \ -} ATOMIC_OPS(atomic, int) diff --git a/arch/mips/include/asm/bugs.h b/arch/mips/include/asm/bugs.h index 653f78f3a685..84be74afcb9a 100644 --- a/arch/mips/include/asm/bugs.h +++ b/arch/mips/include/asm/bugs.h @@ -1,17 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * This is included by init/main.c to check for architecture-dependent bugs. - * * Copyright (C) 2007 Maciej W. Rozycki - * - * Needs: - * void check_bugs(void); */ #ifndef _ASM_BUGS_H #define _ASM_BUGS_H #include <linux/bug.h> -#include <linux/delay.h> #include <linux/smp.h> #include <asm/cpu.h> @@ -24,17 +18,6 @@ extern void check_bugs64_early(void); extern void check_bugs32(void); extern void check_bugs64(void); -static inline void __init check_bugs(void) -{ - unsigned int cpu = smp_processor_id(); - - cpu_data[cpu].udelay_val = loops_per_jiffy; - check_bugs32(); - - if (IS_ENABLED(CONFIG_CPU_R4X00_BUGS64)) - check_bugs64(); -} - static inline int r4k_daddiu_bug(void) { if (!IS_ENABLED(CONFIG_CPU_R4X00_BUGS64)) diff --git a/arch/mips/include/asm/mach-loongson32/loongson1.h b/arch/mips/include/asm/mach-loongson32/loongson1.h index eb3ddbec1752..d8f9dec0ecc3 100644 --- a/arch/mips/include/asm/mach-loongson32/loongson1.h +++ b/arch/mips/include/asm/mach-loongson32/loongson1.h @@ -47,7 +47,6 @@ #include <regs-clk.h> #include <regs-mux.h> -#include <regs-pwm.h> #include <regs-rtc.h> #include <regs-wdt.h> diff --git a/arch/mips/include/asm/mach-loongson32/regs-pwm.h b/arch/mips/include/asm/mach-loongson32/regs-pwm.h deleted file mode 100644 index ec870c82d492..000000000000 --- a/arch/mips/include/asm/mach-loongson32/regs-pwm.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2014 Zhang, Keguang <keguang.zhang@gmail.com> - * - * Loongson 1 PWM Register Definitions. - */ - -#ifndef __ASM_MACH_LOONGSON32_REGS_PWM_H -#define __ASM_MACH_LOONGSON32_REGS_PWM_H - -/* Loongson 1 PWM Timer Register Definitions */ -#define PWM_CNT 0x0 -#define PWM_HRC 0x4 -#define PWM_LRC 0x8 -#define PWM_CTRL 0xc - -/* PWM Control Register Bits */ -#define CNT_RST BIT(7) -#define INT_SR BIT(6) -#define INT_EN BIT(5) -#define PWM_SINGLE BIT(4) -#define PWM_OE BIT(3) -#define CNT_EN BIT(0) - -#endif /* __ASM_MACH_LOONGSON32_REGS_PWM_H */ diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h index 0145bbfb5efb..5719ff49eff1 100644 --- a/arch/mips/include/asm/smp-ops.h +++ b/arch/mips/include/asm/smp-ops.h @@ -33,6 +33,7 @@ struct plat_smp_ops { #ifdef CONFIG_HOTPLUG_CPU int (*cpu_disable)(void); void (*cpu_die)(unsigned int cpu); + void (*cleanup_dead_cpu)(unsigned cpu); #endif #ifdef CONFIG_KEXEC void (*kexec_nonboot_cpu)(void); diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 6d15a398d389..e79adcb128e6 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -1502,6 +1502,10 @@ static inline void cpu_probe_alchemy(struct cpuinfo_mips *c, unsigned int cpu) break; } break; + case PRID_IMP_NETLOGIC_AU13XX: + c->cputype = CPU_ALCHEMY; + __cpu_name[cpu] = "Au1300"; + break; } } @@ -1863,6 +1867,7 @@ void cpu_probe(void) cpu_probe_mips(c, cpu); break; case PRID_COMP_ALCHEMY: + case PRID_COMP_NETLOGIC: cpu_probe_alchemy(c, cpu); break; case PRID_COMP_SIBYTE: diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index febdc5564638..cb871eb784a7 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -11,6 +11,8 @@ * Copyright (C) 2000, 2001, 2002, 2007 Maciej W. Rozycki */ #include <linux/init.h> +#include <linux/cpu.h> +#include <linux/delay.h> #include <linux/ioport.h> #include <linux/export.h> #include <linux/screen_info.h> @@ -158,10 +160,6 @@ static unsigned long __init init_initrd(void) pr_err("initrd start must be page aligned\n"); goto disable; } - if (initrd_start < PAGE_OFFSET) { - pr_err("initrd start < PAGE_OFFSET\n"); - goto disable; - } /* * Sanitize initrd addresses. For example firmware @@ -174,6 +172,11 @@ static unsigned long __init init_initrd(void) initrd_end = (unsigned long)__va(end); initrd_start = (unsigned long)__va(__pa(initrd_start)); + if (initrd_start < PAGE_OFFSET) { + pr_err("initrd start < PAGE_OFFSET\n"); + goto disable; + } + ROOT_DEV = Root_RAM0; return PFN_UP(end); disable: @@ -840,3 +843,14 @@ static int __init setnocoherentio(char *str) } early_param("nocoherentio", setnocoherentio); #endif + +void __init arch_cpu_finalize_init(void) +{ + unsigned int cpu = smp_processor_id(); + + cpu_data[cpu].udelay_val = loops_per_jiffy; + check_bugs32(); + + if (IS_ENABLED(CONFIG_CPU_R4X00_BUGS64)) + check_bugs64(); +} diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 15466d4cf4a0..c074ecce3fbf 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -392,6 +392,7 @@ static void bmips_cpu_die(unsigned int cpu) void __ref play_dead(void) { idle_task_exit(); + cpuhp_ap_report_dead(); /* flush data cache */ _dma_cache_wback_inv(0, ~0); diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index 62f677b2306f..d7fdbec232da 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -503,8 +503,7 @@ void play_dead(void) } } - /* This CPU has chosen its way out */ - (void)cpu_report_death(); + cpuhp_ap_report_dead(); cps_shutdown_this_cpu(cpu_death); @@ -527,7 +526,9 @@ static void wait_for_sibling_halt(void *ptr_cpu) } while (!(halted & TCHALT_H)); } -static void cps_cpu_die(unsigned int cpu) +static void cps_cpu_die(unsigned int cpu) { } + +static void cps_cleanup_dead_cpu(unsigned cpu) { unsigned core = cpu_core(&cpu_data[cpu]); unsigned int vpe_id = cpu_vpe_id(&cpu_data[cpu]); @@ -535,12 +536,6 @@ static void cps_cpu_die(unsigned int cpu) unsigned stat; int err; - /* Wait for the cpu to choose its way out */ - if (!cpu_wait_death(cpu, 5)) { - pr_err("CPU%u: didn't offline\n", cpu); - return; - } - /* * Now wait for the CPU to actually offline. Without doing this that * offlining may race with one or more of: @@ -624,6 +619,7 @@ static const struct plat_smp_ops cps_smp_ops = { #ifdef CONFIG_HOTPLUG_CPU .cpu_disable = cps_cpu_disable, .cpu_die = cps_cpu_die, + .cleanup_dead_cpu = cps_cleanup_dead_cpu, #endif #ifdef CONFIG_KEXEC .kexec_nonboot_cpu = cps_kexec_nonboot_cpu, diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 1d93b85271ba..90c71d800b59 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -690,6 +690,14 @@ void flush_tlb_one(unsigned long vaddr) EXPORT_SYMBOL(flush_tlb_page); EXPORT_SYMBOL(flush_tlb_one); +#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) +{ + if (mp_ops->cleanup_dead_cpu) + mp_ops->cleanup_dead_cpu(cpu); +} +#endif + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST static void tick_broadcast_callee(void *info) diff --git a/arch/mips/loongson32/Kconfig b/arch/mips/loongson32/Kconfig index 2ef9da0016df..a7c500959577 100644 --- a/arch/mips/loongson32/Kconfig +++ b/arch/mips/loongson32/Kconfig @@ -35,41 +35,4 @@ config LOONGSON1_LS1C select COMMON_CLK endchoice -menuconfig CEVT_CSRC_LS1X - bool "Use PWM Timer for clockevent/clocksource" - select MIPS_EXTERNAL_TIMER - depends on CPU_LOONGSON32 - help - This option changes the default clockevent/clocksource to PWM Timer, - and is required by Loongson1 CPUFreq support. - - If unsure, say N. - -choice - prompt "Select clockevent/clocksource" - depends on CEVT_CSRC_LS1X - default TIMER_USE_PWM0 - -config TIMER_USE_PWM0 - bool "Use PWM Timer 0" - help - Use PWM Timer 0 as the default clockevent/clocksourcer. - -config TIMER_USE_PWM1 - bool "Use PWM Timer 1" - help - Use PWM Timer 1 as the default clockevent/clocksourcer. - -config TIMER_USE_PWM2 - bool "Use PWM Timer 2" - help - Use PWM Timer 2 as the default clockevent/clocksourcer. - -config TIMER_USE_PWM3 - bool "Use PWM Timer 3" - help - Use PWM Timer 3 as the default clockevent/clocksourcer. - -endchoice - endif # MACH_LOONGSON32 diff --git a/arch/mips/loongson32/common/time.c b/arch/mips/loongson32/common/time.c index 965c04aa56fd..74ad2b17918d 100644 --- a/arch/mips/loongson32/common/time.c +++ b/arch/mips/loongson32/common/time.c @@ -5,208 +5,8 @@ #include <linux/clk.h> #include <linux/of_clk.h> -#include <linux/interrupt.h> -#include <linux/sizes.h> #include <asm/time.h> -#include <loongson1.h> -#include <platform.h> - -#ifdef CONFIG_CEVT_CSRC_LS1X - -#if defined(CONFIG_TIMER_USE_PWM1) -#define LS1X_TIMER_BASE LS1X_PWM1_BASE -#define LS1X_TIMER_IRQ LS1X_PWM1_IRQ - -#elif defined(CONFIG_TIMER_USE_PWM2) -#define LS1X_TIMER_BASE LS1X_PWM2_BASE -#define LS1X_TIMER_IRQ LS1X_PWM2_IRQ - -#elif defined(CONFIG_TIMER_USE_PWM3) -#define LS1X_TIMER_BASE LS1X_PWM3_BASE -#define LS1X_TIMER_IRQ LS1X_PWM3_IRQ - -#else -#define LS1X_TIMER_BASE LS1X_PWM0_BASE -#define LS1X_TIMER_IRQ LS1X_PWM0_IRQ -#endif - -DEFINE_RAW_SPINLOCK(ls1x_timer_lock); - -static void __iomem *timer_reg_base; -static uint32_t ls1x_jiffies_per_tick; - -static inline void ls1x_pwmtimer_set_period(uint32_t period) -{ - __raw_writel(period, timer_reg_base + PWM_HRC); - __raw_writel(period, timer_reg_base + PWM_LRC); -} - -static inline void ls1x_pwmtimer_restart(void) -{ - __raw_writel(0x0, timer_reg_base + PWM_CNT); - __raw_writel(INT_EN | CNT_EN, timer_reg_base + PWM_CTRL); -} - -void __init ls1x_pwmtimer_init(void) -{ - timer_reg_base = ioremap(LS1X_TIMER_BASE, SZ_16); - if (!timer_reg_base) - panic("Failed to remap timer registers"); - - ls1x_jiffies_per_tick = DIV_ROUND_CLOSEST(mips_hpt_frequency, HZ); - - ls1x_pwmtimer_set_period(ls1x_jiffies_per_tick); - ls1x_pwmtimer_restart(); -} - -static u64 ls1x_clocksource_read(struct clocksource *cs) -{ - unsigned long flags; - int count; - u32 jifs; - static int old_count; - static u32 old_jifs; - - raw_spin_lock_irqsave(&ls1x_timer_lock, flags); - /* - * Although our caller may have the read side of xtime_lock, - * this is now a seqlock, and we are cheating in this routine - * by having side effects on state that we cannot undo if - * there is a collision on the seqlock and our caller has to - * retry. (Namely, old_jifs and old_count.) So we must treat - * jiffies as volatile despite the lock. We read jiffies - * before latching the timer count to guarantee that although - * the jiffies value might be older than the count (that is, - * the counter may underflow between the last point where - * jiffies was incremented and the point where we latch the - * count), it cannot be newer. - */ - jifs = jiffies; - /* read the count */ - count = __raw_readl(timer_reg_base + PWM_CNT); - - /* - * It's possible for count to appear to go the wrong way for this - * reason: - * - * The timer counter underflows, but we haven't handled the resulting - * interrupt and incremented jiffies yet. - * - * Previous attempts to handle these cases intelligently were buggy, so - * we just do the simple thing now. - */ - if (count < old_count && jifs == old_jifs) - count = old_count; - - old_count = count; - old_jifs = jifs; - - raw_spin_unlock_irqrestore(&ls1x_timer_lock, flags); - - return (u64) (jifs * ls1x_jiffies_per_tick) + count; -} - -static struct clocksource ls1x_clocksource = { - .name = "ls1x-pwmtimer", - .read = ls1x_clocksource_read, - .mask = CLOCKSOURCE_MASK(24), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; - -static irqreturn_t ls1x_clockevent_isr(int irq, void *devid) -{ - struct clock_event_device *cd = devid; - - ls1x_pwmtimer_restart(); - cd->event_handler(cd); - - return IRQ_HANDLED; -} - -static int ls1x_clockevent_set_state_periodic(struct clock_event_device *cd) -{ - raw_spin_lock(&ls1x_timer_lock); - ls1x_pwmtimer_set_period(ls1x_jiffies_per_tick); - ls1x_pwmtimer_restart(); - __raw_writel(INT_EN | CNT_EN, timer_reg_base + PWM_CTRL); - raw_spin_unlock(&ls1x_timer_lock); - - return 0; -} - -static int ls1x_clockevent_tick_resume(struct clock_event_device *cd) -{ - raw_spin_lock(&ls1x_timer_lock); - __raw_writel(INT_EN | CNT_EN, timer_reg_base + PWM_CTRL); - raw_spin_unlock(&ls1x_timer_lock); - - return 0; -} - -static int ls1x_clockevent_set_state_shutdown(struct clock_event_device *cd) -{ - raw_spin_lock(&ls1x_timer_lock); - __raw_writel(__raw_readl(timer_reg_base + PWM_CTRL) & ~CNT_EN, - timer_reg_base + PWM_CTRL); - raw_spin_unlock(&ls1x_timer_lock); - - return 0; -} - -static int ls1x_clockevent_set_next(unsigned long evt, - struct clock_event_device *cd) -{ - raw_spin_lock(&ls1x_timer_lock); - ls1x_pwmtimer_set_period(evt); - ls1x_pwmtimer_restart(); - raw_spin_unlock(&ls1x_timer_lock); - - return 0; -} - -static struct clock_event_device ls1x_clockevent = { - .name = "ls1x-pwmtimer", - .features = CLOCK_EVT_FEAT_PERIODIC, - .rating = 300, - .irq = LS1X_TIMER_IRQ, - .set_next_event = ls1x_clockevent_set_next, - .set_state_shutdown = ls1x_clockevent_set_state_shutdown, - .set_state_periodic = ls1x_clockevent_set_state_periodic, - .set_state_oneshot = ls1x_clockevent_set_state_shutdown, - .tick_resume = ls1x_clockevent_tick_resume, -}; - -static void __init ls1x_time_init(void) -{ - struct clock_event_device *cd = &ls1x_clockevent; - int ret; - - if (!mips_hpt_frequency) - panic("Invalid timer clock rate"); - - ls1x_pwmtimer_init(); - - clockevent_set_clock(cd, mips_hpt_frequency); - cd->max_delta_ns = clockevent_delta2ns(0xffffff, cd); - cd->max_delta_ticks = 0xffffff; - cd->min_delta_ns = clockevent_delta2ns(0x000300, cd); - cd->min_delta_ticks = 0x000300; - cd->cpumask = cpumask_of(smp_processor_id()); - clockevents_register_device(cd); - - ls1x_clocksource.rating = 200 + mips_hpt_frequency / 10000000; - ret = clocksource_register_hz(&ls1x_clocksource, mips_hpt_frequency); - if (ret) - panic(KERN_ERR "Failed to register clocksource: %d\n", ret); - - if (request_irq(LS1X_TIMER_IRQ, ls1x_clockevent_isr, - IRQF_PERCPU | IRQF_TIMER, "ls1x-pwmtimer", - &ls1x_clockevent)) - pr_err("Failed to register ls1x-pwmtimer interrupt\n"); -} -#endif /* CONFIG_CEVT_CSRC_LS1X */ - void __init plat_time_init(void) { struct clk *clk = NULL; @@ -214,20 +14,10 @@ void __init plat_time_init(void) /* initialize LS1X clocks */ of_clk_init(NULL); -#ifdef CONFIG_CEVT_CSRC_LS1X - /* setup LS1X PWM timer */ - clk = clk_get(NULL, "ls1x-pwmtimer"); - if (IS_ERR(clk)) - panic("unable to get timer clock, err=%ld", PTR_ERR(clk)); - - mips_hpt_frequency = clk_get_rate(clk); - ls1x_time_init(); -#else /* setup mips r4k timer */ clk = clk_get(NULL, "cpu_clk"); if (IS_ERR(clk)) panic("unable to get cpu clock, err=%ld", PTR_ERR(clk)); mips_hpt_frequency = clk_get_rate(clk) / 2; -#endif /* CONFIG_CEVT_CSRC_LS1X */ } diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c index b0e8bb9fa036..cdecd7af11a6 100644 --- a/arch/mips/loongson64/smp.c +++ b/arch/mips/loongson64/smp.c @@ -775,6 +775,7 @@ void play_dead(void) void (*play_dead_at_ckseg1)(int *); idle_task_exit(); + cpuhp_ap_report_dead(); prid_imp = read_c0_prid() & PRID_IMP_MASK; prid_rev = read_c0_prid() & PRID_REV_MASK; diff --git a/arch/nios2/boot/dts/10m50_devboard.dts b/arch/nios2/boot/dts/10m50_devboard.dts index 56339bef3247..0e7e5b0dd685 100644 --- a/arch/nios2/boot/dts/10m50_devboard.dts +++ b/arch/nios2/boot/dts/10m50_devboard.dts @@ -97,7 +97,7 @@ rx-fifo-depth = <8192>; tx-fifo-depth = <8192>; address-bits = <48>; - max-frame-size = <1518>; + max-frame-size = <1500>; local-mac-address = [00 00 00 00 00 00]; altr,has-supplementary-unicast; altr,enable-sup-addr = <1>; diff --git a/arch/nios2/boot/dts/3c120_devboard.dts b/arch/nios2/boot/dts/3c120_devboard.dts index d10fb81686c7..3ee316906379 100644 --- a/arch/nios2/boot/dts/3c120_devboard.dts +++ b/arch/nios2/boot/dts/3c120_devboard.dts @@ -106,7 +106,7 @@ interrupt-names = "rx_irq", "tx_irq"; rx-fifo-depth = <8192>; tx-fifo-depth = <8192>; - max-frame-size = <1518>; + max-frame-size = <1500>; local-mac-address = [ 00 00 00 00 00 00 ]; phy-mode = "rgmii-id"; phy-handle = <&phy0>; diff --git a/arch/nios2/kernel/cpuinfo.c b/arch/nios2/kernel/cpuinfo.c index 203870c4b86d..338849c430a5 100644 --- a/arch/nios2/kernel/cpuinfo.c +++ b/arch/nios2/kernel/cpuinfo.c @@ -47,7 +47,7 @@ void __init setup_cpuinfo(void) str = of_get_property(cpu, "altr,implementation", &len); if (str) - strlcpy(cpuinfo.cpu_impl, str, sizeof(cpuinfo.cpu_impl)); + strscpy(cpuinfo.cpu_impl, str, sizeof(cpuinfo.cpu_impl)); else strcpy(cpuinfo.cpu_impl, "<unknown>"); diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c index 40bc8fb75e0b..8582ed965844 100644 --- a/arch/nios2/kernel/setup.c +++ b/arch/nios2/kernel/setup.c @@ -121,7 +121,7 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6, dtb_passed = r6; if (r7) - strlcpy(cmdline_passed, (char *)r7, COMMAND_LINE_SIZE); + strscpy(cmdline_passed, (char *)r7, COMMAND_LINE_SIZE); } #endif @@ -129,10 +129,10 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6, #ifndef CONFIG_CMDLINE_FORCE if (cmdline_passed[0]) - strlcpy(boot_command_line, cmdline_passed, COMMAND_LINE_SIZE); + strscpy(boot_command_line, cmdline_passed, COMMAND_LINE_SIZE); #ifdef CONFIG_NIOS2_CMDLINE_IGNORE_DTB else - strlcpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); + strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); #endif #endif diff --git a/arch/openrisc/include/asm/atomic.h b/arch/openrisc/include/asm/atomic.h index 326167e4783a..8ce67ec7c9a3 100644 --- a/arch/openrisc/include/asm/atomic.h +++ b/arch/openrisc/include/asm/atomic.h @@ -130,7 +130,4 @@ static inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) #include <asm/cmpxchg.h> -#define arch_atomic_xchg(ptr, v) (arch_xchg(&(ptr)->counter, (v))) -#define arch_atomic_cmpxchg(v, old, new) (arch_cmpxchg(&((v)->counter), (old), (new))) - #endif /* __ASM_OPENRISC_ATOMIC_H */ diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 466a25525364..c0b4b1c253d1 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -57,6 +57,7 @@ config PARISC select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_REGS_AND_STACK_ACCESS_API + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select GENERIC_SCHED_CLOCK select GENERIC_IRQ_MIGRATION if SMP select HAVE_UNSTABLE_SCHED_CLOCK if SMP @@ -130,6 +131,10 @@ config PM config STACKTRACE_SUPPORT def_bool y +config LOCKDEP_SUPPORT + bool + default y + config ISA_DMA_API bool diff --git a/arch/parisc/Kconfig.debug b/arch/parisc/Kconfig.debug index f66554cd5c45..3a059cb5e112 100644 --- a/arch/parisc/Kconfig.debug +++ b/arch/parisc/Kconfig.debug @@ -1 +1,12 @@ # SPDX-License-Identifier: GPL-2.0 +# +config LIGHTWEIGHT_SPINLOCK_CHECK + bool "Enable lightweight spinlock checks" + depends on SMP && !DEBUG_SPINLOCK + default y + help + Add checks with low performance impact to the spinlock functions + to catch memory overwrites at runtime. For more advanced + spinlock debugging you should choose the DEBUG_SPINLOCK option + which will detect unitialized spinlocks too. + If unsure say Y here. diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 0f0d4a496fef..75677b526b2b 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -90,10 +90,6 @@ #include <asm/asmregs.h> #include <asm/psw.h> - sp = 30 - gp = 27 - ipsw = 22 - /* * We provide two versions of each macro to convert from physical * to virtual and vice versa. The "_r1" versions take one argument diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index dd5a299ada69..d4f023887ff8 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -73,10 +73,6 @@ static __inline__ int arch_atomic_read(const atomic_t *v) return READ_ONCE((v)->counter); } -/* exported interface */ -#define arch_atomic_cmpxchg(v, o, n) (arch_cmpxchg(&((v)->counter), (o), (n))) -#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new)) - #define ATOMIC_OP(op, c_op) \ static __inline__ void arch_atomic_##op(int i, atomic_t *v) \ { \ @@ -122,6 +118,11 @@ static __inline__ int arch_atomic_fetch_##op(int i, atomic_t *v) \ ATOMIC_OPS(add, +=) ATOMIC_OPS(sub, -=) +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + #undef ATOMIC_OPS #define ATOMIC_OPS(op, c_op) \ ATOMIC_OP(op, c_op) \ @@ -131,6 +132,10 @@ ATOMIC_OPS(and, &=) ATOMIC_OPS(or, |=) ATOMIC_OPS(xor, ^=) +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN @@ -185,6 +190,11 @@ static __inline__ s64 arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \ ATOMIC64_OPS(add, +=) ATOMIC64_OPS(sub, -=) +#define arch_atomic64_add_return arch_atomic64_add_return +#define arch_atomic64_sub_return arch_atomic64_sub_return +#define arch_atomic64_fetch_add arch_atomic64_fetch_add +#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub + #undef ATOMIC64_OPS #define ATOMIC64_OPS(op, c_op) \ ATOMIC64_OP(op, c_op) \ @@ -194,6 +204,10 @@ ATOMIC64_OPS(and, &=) ATOMIC64_OPS(or, |=) ATOMIC64_OPS(xor, ^=) +#define arch_atomic64_fetch_and arch_atomic64_fetch_and +#define arch_atomic64_fetch_or arch_atomic64_fetch_or +#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor + #undef ATOMIC64_OPS #undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN @@ -218,11 +232,6 @@ arch_atomic64_read(const atomic64_t *v) return READ_ONCE((v)->counter); } -/* exported interface */ -#define arch_atomic64_cmpxchg(v, o, n) \ - ((__typeof__((v)->counter))arch_cmpxchg(&((v)->counter), (o), (n))) -#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new)) - #endif /* !CONFIG_64BIT */ diff --git a/arch/parisc/include/asm/bugs.h b/arch/parisc/include/asm/bugs.h deleted file mode 100644 index 0a7f9db6bd1c..000000000000 --- a/arch/parisc/include/asm/bugs.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * include/asm-parisc/bugs.h - * - * Copyright (C) 1999 Mike Shaver - */ - -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Needs: - * void check_bugs(void); - */ - -#include <asm/processor.h> - -static inline void check_bugs(void) -{ -// identify_cpu(&boot_cpu_data); -} diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index 0bdee6724132..c8b6928cee1e 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -48,6 +48,10 @@ void flush_dcache_page(struct page *page); #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages) #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages) +#define flush_dcache_mmap_lock_irqsave(mapping, flags) \ + xa_lock_irqsave(&mapping->i_pages, flags) +#define flush_dcache_mmap_unlock_irqrestore(mapping, flags) \ + xa_unlock_irqrestore(&mapping->i_pages, flags) #define flush_icache_page(vma,page) do { \ flush_kernel_dcache_page_addr(page_address(page)); \ diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index e715df5385d6..5656395c95ee 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -472,9 +472,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, #define pte_same(A,B) (pte_val(A) == pte_val(B)) -struct seq_file; -extern void arch_report_meminfo(struct seq_file *m); - #endif /* !__ASSEMBLY__ */ diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index a6e5d66a7656..edfcb9858bcb 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -7,10 +7,26 @@ #include <asm/processor.h> #include <asm/spinlock_types.h> +#define SPINLOCK_BREAK_INSN 0x0000c006 /* break 6,6 */ + +static inline void arch_spin_val_check(int lock_val) +{ + if (IS_ENABLED(CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK)) + asm volatile( "andcm,= %0,%1,%%r0\n" + ".word %2\n" + : : "r" (lock_val), "r" (__ARCH_SPIN_LOCK_UNLOCKED_VAL), + "i" (SPINLOCK_BREAK_INSN)); +} + static inline int arch_spin_is_locked(arch_spinlock_t *x) { - volatile unsigned int *a = __ldcw_align(x); - return READ_ONCE(*a) == 0; + volatile unsigned int *a; + int lock_val; + + a = __ldcw_align(x); + lock_val = READ_ONCE(*a); + arch_spin_val_check(lock_val); + return (lock_val == 0); } static inline void arch_spin_lock(arch_spinlock_t *x) @@ -18,9 +34,18 @@ static inline void arch_spin_lock(arch_spinlock_t *x) volatile unsigned int *a; a = __ldcw_align(x); - while (__ldcw(a) == 0) + do { + int lock_val_old; + + lock_val_old = __ldcw(a); + arch_spin_val_check(lock_val_old); + if (lock_val_old) + return; /* got lock */ + + /* wait until we should try to get lock again */ while (*a == 0) continue; + } while (1); } static inline void arch_spin_unlock(arch_spinlock_t *x) @@ -29,15 +54,19 @@ static inline void arch_spin_unlock(arch_spinlock_t *x) a = __ldcw_align(x); /* Release with ordered store. */ - __asm__ __volatile__("stw,ma %0,0(%1)" : : "r"(1), "r"(a) : "memory"); + __asm__ __volatile__("stw,ma %0,0(%1)" + : : "r"(__ARCH_SPIN_LOCK_UNLOCKED_VAL), "r"(a) : "memory"); } static inline int arch_spin_trylock(arch_spinlock_t *x) { volatile unsigned int *a; + int lock_val; a = __ldcw_align(x); - return __ldcw(a) != 0; + lock_val = __ldcw(a); + arch_spin_val_check(lock_val); + return lock_val != 0; } /* diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h index ca39ee350c3f..d65934079ebd 100644 --- a/arch/parisc/include/asm/spinlock_types.h +++ b/arch/parisc/include/asm/spinlock_types.h @@ -2,13 +2,17 @@ #ifndef __ASM_SPINLOCK_TYPES_H #define __ASM_SPINLOCK_TYPES_H +#define __ARCH_SPIN_LOCK_UNLOCKED_VAL 0x1a46 + typedef struct { #ifdef CONFIG_PA20 volatile unsigned int slock; -# define __ARCH_SPIN_LOCK_UNLOCKED { 1 } +# define __ARCH_SPIN_LOCK_UNLOCKED { __ARCH_SPIN_LOCK_UNLOCKED_VAL } #else volatile unsigned int lock[4]; -# define __ARCH_SPIN_LOCK_UNLOCKED { { 1, 1, 1, 1 } } +# define __ARCH_SPIN_LOCK_UNLOCKED \ + { { __ARCH_SPIN_LOCK_UNLOCKED_VAL, __ARCH_SPIN_LOCK_UNLOCKED_VAL, \ + __ARCH_SPIN_LOCK_UNLOCKED_VAL, __ARCH_SPIN_LOCK_UNLOCKED_VAL } } #endif } arch_spinlock_t; diff --git a/arch/parisc/kernel/alternative.c b/arch/parisc/kernel/alternative.c index 66f5672c70bd..25c4d6c3375d 100644 --- a/arch/parisc/kernel/alternative.c +++ b/arch/parisc/kernel/alternative.c @@ -25,7 +25,7 @@ void __init_or_module apply_alternatives(struct alt_instr *start, { struct alt_instr *entry; int index = 0, applied = 0; - int num_cpus = num_online_cpus(); + int num_cpus = num_present_cpus(); u16 cond_check; cond_check = ALT_COND_ALWAYS | diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 1d3b8bc8a623..ca4a302d4365 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -399,6 +399,7 @@ void flush_dcache_page(struct page *page) unsigned long offset; unsigned long addr, old_addr = 0; unsigned long count = 0; + unsigned long flags; pgoff_t pgoff; if (mapping && !mapping_mapped(mapping)) { @@ -420,7 +421,7 @@ void flush_dcache_page(struct page *page) * to flush one address here for them all to become coherent * on machines that support equivalent aliasing */ - flush_dcache_mmap_lock(mapping); + flush_dcache_mmap_lock_irqsave(mapping, flags); vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) { offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; addr = mpnt->vm_start + offset; @@ -460,7 +461,7 @@ void flush_dcache_page(struct page *page) } WARN_ON(++count == 4096); } - flush_dcache_mmap_unlock(mapping); + flush_dcache_mmap_unlock_irqrestore(mapping, flags); } EXPORT_SYMBOL(flush_dcache_page); diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index ba87f791323b..71ed5391f29d 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -446,11 +446,27 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr, void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, enum dma_data_direction dir) { + /* + * fdc: The data cache line is written back to memory, if and only if + * it is dirty, and then invalidated from the data cache. + */ flush_kernel_dcache_range((unsigned long)phys_to_virt(paddr), size); } void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - flush_kernel_dcache_range((unsigned long)phys_to_virt(paddr), size); + unsigned long addr = (unsigned long) phys_to_virt(paddr); + + switch (dir) { + case DMA_TO_DEVICE: + case DMA_BIDIRECTIONAL: + flush_kernel_dcache_range(addr, size); + return; + case DMA_FROM_DEVICE: + purge_kernel_dcache_range_asm(addr, addr + size); + return; + default: + BUG(); + } } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 97c6f875bd0e..abdbf038d643 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -122,13 +122,18 @@ void machine_power_off(void) /* It seems we have no way to power the system off via * software. The user has to press the button himself. */ - printk(KERN_EMERG "System shut down completed.\n" - "Please power this system off now."); + printk("Power off or press RETURN to reboot.\n"); /* prevent soft lockup/stalled CPU messages for endless loop. */ rcu_sysrq_start(); lockup_detector_soft_poweroff(); - for (;;); + while (1) { + /* reboot if user presses RETURN key */ + if (pdc_iodc_getc() == 13) { + printk("Rebooting...\n"); + machine_restart(NULL); + } + } } void (*pm_power_off)(void); @@ -166,8 +171,8 @@ void __noreturn arch_cpu_idle_dead(void) local_irq_disable(); - /* Tell __cpu_die() that this CPU is now safe to dispose of. */ - (void)cpu_report_death(); + /* Tell the core that this CPU is now safe to dispose of. */ + cpuhp_ap_report_dead(); /* Ensure that the cache lines are written out. */ flush_cache_all_local(); diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index b7fc859fa87d..6b6eaa485946 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -500,11 +500,10 @@ int __cpu_disable(void) void __cpu_die(unsigned int cpu) { pdc_cpu_rendezvous_lock(); +} - if (!cpu_wait_death(cpu, 5)) { - pr_crit("CPU%u: cpu didn't die\n", cpu); - return; - } +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) +{ pr_info("CPU%u: is shutting down\n", cpu); /* set task's state to interruptible sleep */ diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index f9696fbf646c..304eebd1c83e 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -47,6 +47,10 @@ #include <linux/kgdb.h> #include <linux/kprobes.h> +#if defined(CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK) +#include <asm/spinlock.h> +#endif + #include "../math-emu/math-emu.h" /* for handle_fpe() */ static void parisc_show_stack(struct task_struct *task, @@ -291,24 +295,30 @@ static void handle_break(struct pt_regs *regs) } #ifdef CONFIG_KPROBES - if (unlikely(iir == PARISC_KPROBES_BREAK_INSN)) { + if (unlikely(iir == PARISC_KPROBES_BREAK_INSN && !user_mode(regs))) { parisc_kprobe_break_handler(regs); return; } - if (unlikely(iir == PARISC_KPROBES_BREAK_INSN2)) { + if (unlikely(iir == PARISC_KPROBES_BREAK_INSN2 && !user_mode(regs))) { parisc_kprobe_ss_handler(regs); return; } #endif #ifdef CONFIG_KGDB - if (unlikely(iir == PARISC_KGDB_COMPILED_BREAK_INSN || - iir == PARISC_KGDB_BREAK_INSN)) { + if (unlikely((iir == PARISC_KGDB_COMPILED_BREAK_INSN || + iir == PARISC_KGDB_BREAK_INSN)) && !user_mode(regs)) { kgdb_handle_exception(9, SIGTRAP, 0, regs); return; } #endif +#ifdef CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK + if ((iir == SPINLOCK_BREAK_INSN) && !user_mode(regs)) { + die_if_kernel("Spinlock was trashed", regs, 1); + } +#endif + if (unlikely(iir != GDB_BREAK_INSN)) parisc_printk_ratelimited(0, regs, KERN_DEBUG "break %d,%d: pid=%d command='%s'\n", diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 539d1f03ff42..bff5820b7cda 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -906,11 +906,17 @@ config DATA_SHIFT config ARCH_FORCE_MAX_ORDER int "Order of maximal physically contiguous allocations" + range 7 8 if PPC64 && PPC_64K_PAGES default "8" if PPC64 && PPC_64K_PAGES + range 12 12 if PPC64 && !PPC_64K_PAGES default "12" if PPC64 && !PPC_64K_PAGES + range 8 10 if PPC32 && PPC_16K_PAGES default "8" if PPC32 && PPC_16K_PAGES + range 6 10 if PPC32 && PPC_64K_PAGES default "6" if PPC32 && PPC_64K_PAGES + range 4 10 if PPC32 && PPC_256K_PAGES default "4" if PPC32 && PPC_256K_PAGES + range 10 10 default "10" help The kernel page allocator limits the size of maximal physically diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 85cde5bf04b7..771b79423bbc 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -34,8 +34,6 @@ endif BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -O2 -msoft-float -mno-altivec -mno-vsx \ - $(call cc-option,-mno-prefixed) $(call cc-option,-mno-pcrel) \ - $(call cc-option,-mno-mma) \ $(call cc-option,-mno-spe) $(call cc-option,-mspe=no) \ -pipe -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ $(LINUXINCLUDE) @@ -71,6 +69,10 @@ BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -nostdinc BOOTARFLAGS := -crD +BOOTCFLAGS += $(call cc-option,-mno-prefixed) \ + $(call cc-option,-mno-pcrel) \ + $(call cc-option,-mno-mma) + ifdef CONFIG_CC_IS_CLANG BOOTCFLAGS += $(CLANG_FLAGS) BOOTAFLAGS += $(CLANG_FLAGS) diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig index 7113f9355165..ad1872518992 100644 --- a/arch/powerpc/crypto/Kconfig +++ b/arch/powerpc/crypto/Kconfig @@ -96,7 +96,7 @@ config CRYPTO_AES_PPC_SPE config CRYPTO_AES_GCM_P10 tristate "Stitched AES/GCM acceleration support on P10 or later CPU (PPC)" - depends on PPC64 && CPU_LITTLE_ENDIAN + depends on PPC64 && CPU_LITTLE_ENDIAN && VSX select CRYPTO_LIB_AES select CRYPTO_ALGAPI select CRYPTO_AEAD diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile index 05c7486f42c5..7b4f516abec1 100644 --- a/arch/powerpc/crypto/Makefile +++ b/arch/powerpc/crypto/Makefile @@ -22,15 +22,15 @@ sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o crct10dif-vpmsum-y := crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o -aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp8-ppc.o aesp8-ppc.o +aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp10-ppc.o aesp10-ppc.o quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $< $(if $(CONFIG_CPU_LITTLE_ENDIAN), linux-ppc64le, linux-ppc64) > $@ -targets += aesp8-ppc.S ghashp8-ppc.S +targets += aesp10-ppc.S ghashp10-ppc.S -$(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE +$(obj)/aesp10-ppc.S $(obj)/ghashp10-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE $(call if_changed,perl) -OBJECT_FILES_NON_STANDARD_aesp8-ppc.o := y -OBJECT_FILES_NON_STANDARD_ghashp8-ppc.o := y +OBJECT_FILES_NON_STANDARD_aesp10-ppc.o := y +OBJECT_FILES_NON_STANDARD_ghashp10-ppc.o := y diff --git a/arch/powerpc/crypto/aes-gcm-p10-glue.c b/arch/powerpc/crypto/aes-gcm-p10-glue.c index bd3475f5348d..4b6e899895e7 100644 --- a/arch/powerpc/crypto/aes-gcm-p10-glue.c +++ b/arch/powerpc/crypto/aes-gcm-p10-glue.c @@ -30,15 +30,15 @@ MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS_CRYPTO("aes"); -asmlinkage int aes_p8_set_encrypt_key(const u8 *userKey, const int bits, +asmlinkage int aes_p10_set_encrypt_key(const u8 *userKey, const int bits, void *key); -asmlinkage void aes_p8_encrypt(const u8 *in, u8 *out, const void *key); +asmlinkage void aes_p10_encrypt(const u8 *in, u8 *out, const void *key); asmlinkage void aes_p10_gcm_encrypt(u8 *in, u8 *out, size_t len, void *rkey, u8 *iv, void *Xi); asmlinkage void aes_p10_gcm_decrypt(u8 *in, u8 *out, size_t len, void *rkey, u8 *iv, void *Xi); asmlinkage void gcm_init_htable(unsigned char htable[256], unsigned char Xi[16]); -asmlinkage void gcm_ghash_p8(unsigned char *Xi, unsigned char *Htable, +asmlinkage void gcm_ghash_p10(unsigned char *Xi, unsigned char *Htable, unsigned char *aad, unsigned int alen); struct aes_key { @@ -93,7 +93,7 @@ static void set_aad(struct gcm_ctx *gctx, struct Hash_ctx *hash, gctx->aadLen = alen; i = alen & ~0xf; if (i) { - gcm_ghash_p8(nXi, hash->Htable+32, aad, i); + gcm_ghash_p10(nXi, hash->Htable+32, aad, i); aad += i; alen -= i; } @@ -102,7 +102,7 @@ static void set_aad(struct gcm_ctx *gctx, struct Hash_ctx *hash, nXi[i] ^= aad[i]; memset(gctx->aad_hash, 0, 16); - gcm_ghash_p8(gctx->aad_hash, hash->Htable+32, nXi, 16); + gcm_ghash_p10(gctx->aad_hash, hash->Htable+32, nXi, 16); } else { memcpy(gctx->aad_hash, nXi, 16); } @@ -115,7 +115,7 @@ static void gcmp10_init(struct gcm_ctx *gctx, u8 *iv, unsigned char *rdkey, { __be32 counter = cpu_to_be32(1); - aes_p8_encrypt(hash->H, hash->H, rdkey); + aes_p10_encrypt(hash->H, hash->H, rdkey); set_subkey(hash->H); gcm_init_htable(hash->Htable+32, hash->H); @@ -126,7 +126,7 @@ static void gcmp10_init(struct gcm_ctx *gctx, u8 *iv, unsigned char *rdkey, /* * Encrypt counter vector as iv tag and increment counter. */ - aes_p8_encrypt(iv, gctx->ivtag, rdkey); + aes_p10_encrypt(iv, gctx->ivtag, rdkey); counter = cpu_to_be32(2); *((__be32 *)(iv+12)) = counter; @@ -160,7 +160,7 @@ static void finish_tag(struct gcm_ctx *gctx, struct Hash_ctx *hash, int len) /* * hash (AAD len and len) */ - gcm_ghash_p8(hash->Htable, hash->Htable+32, aclen, 16); + gcm_ghash_p10(hash->Htable, hash->Htable+32, aclen, 16); for (i = 0; i < 16; i++) hash->Htable[i] ^= gctx->ivtag[i]; @@ -192,7 +192,7 @@ static int p10_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key, int ret; vsx_begin(); - ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); + ret = aes_p10_set_encrypt_key(key, keylen * 8, &ctx->enc_key); vsx_end(); return ret ? -EINVAL : 0; diff --git a/arch/powerpc/crypto/aesp8-ppc.pl b/arch/powerpc/crypto/aesp10-ppc.pl index 1f22aec27d79..2c06ce2a2c7c 100644 --- a/arch/powerpc/crypto/aesp8-ppc.pl +++ b/arch/powerpc/crypto/aesp10-ppc.pl @@ -110,7 +110,7 @@ die "can't locate ppc-xlate.pl"; open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; $FRAME=8*$SIZE_T; -$prefix="aes_p8"; +$prefix="aes_p10"; $sp="r1"; $vrsave="r12"; diff --git a/arch/powerpc/crypto/ghashp8-ppc.pl b/arch/powerpc/crypto/ghashp10-ppc.pl index b56603b4a893..27a6b0bec645 100644 --- a/arch/powerpc/crypto/ghashp8-ppc.pl +++ b/arch/powerpc/crypto/ghashp10-ppc.pl @@ -64,7 +64,7 @@ $code=<<___; .text -.globl .gcm_init_p8 +.globl .gcm_init_p10 lis r0,0xfff0 li r8,0x10 mfspr $vrsave,256 @@ -110,7 +110,7 @@ $code=<<___; .long 0 .byte 0,12,0x14,0,0,0,2,0 .long 0 -.size .gcm_init_p8,.-.gcm_init_p8 +.size .gcm_init_p10,.-.gcm_init_p10 .globl .gcm_init_htable lis r0,0xfff0 @@ -237,7 +237,7 @@ $code=<<___; .long 0 .size .gcm_init_htable,.-.gcm_init_htable -.globl .gcm_gmult_p8 +.globl .gcm_gmult_p10 lis r0,0xfff8 li r8,0x10 mfspr $vrsave,256 @@ -283,9 +283,9 @@ $code=<<___; .long 0 .byte 0,12,0x14,0,0,0,2,0 .long 0 -.size .gcm_gmult_p8,.-.gcm_gmult_p8 +.size .gcm_gmult_p10,.-.gcm_gmult_p10 -.globl .gcm_ghash_p8 +.globl .gcm_ghash_p10 lis r0,0xfff8 li r8,0x10 mfspr $vrsave,256 @@ -350,7 +350,7 @@ Loop: .long 0 .byte 0,12,0x14,0,0,0,4,0 .long 0 -.size .gcm_ghash_p8,.-.gcm_ghash_p8 +.size .gcm_ghash_p10,.-.gcm_ghash_p10 .asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" .align 2 diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 47228b177478..5bf6a4d49268 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -126,18 +126,6 @@ ATOMIC_OPS(xor, xor, "", K) #undef ATOMIC_OP_RETURN_RELAXED #undef ATOMIC_OP -#define arch_atomic_cmpxchg(v, o, n) \ - (arch_cmpxchg(&((v)->counter), (o), (n))) -#define arch_atomic_cmpxchg_relaxed(v, o, n) \ - arch_cmpxchg_relaxed(&((v)->counter), (o), (n)) -#define arch_atomic_cmpxchg_acquire(v, o, n) \ - arch_cmpxchg_acquire(&((v)->counter), (o), (n)) - -#define arch_atomic_xchg(v, new) \ - (arch_xchg(&((v)->counter), new)) -#define arch_atomic_xchg_relaxed(v, new) \ - arch_xchg_relaxed(&((v)->counter), (new)) - /** * atomic_fetch_add_unless - add unless the number is a given value * @v: pointer of type atomic_t @@ -396,18 +384,6 @@ static __inline__ s64 arch_atomic64_dec_if_positive(atomic64_t *v) } #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive -#define arch_atomic64_cmpxchg(v, o, n) \ - (arch_cmpxchg(&((v)->counter), (o), (n))) -#define arch_atomic64_cmpxchg_relaxed(v, o, n) \ - arch_cmpxchg_relaxed(&((v)->counter), (o), (n)) -#define arch_atomic64_cmpxchg_acquire(v, o, n) \ - arch_cmpxchg_acquire(&((v)->counter), (o), (n)) - -#define arch_atomic64_xchg(v, new) \ - (arch_xchg(&((v)->counter), new)) -#define arch_atomic64_xchg_relaxed(v, new) \ - arch_xchg_relaxed(&((v)->counter), (new)) - /** * atomic64_fetch_add_unless - add unless the number is a given value * @v: pointer of type atomic64_t diff --git a/arch/powerpc/include/asm/bugs.h b/arch/powerpc/include/asm/bugs.h deleted file mode 100644 index 01b8f6ca4dbb..000000000000 --- a/arch/powerpc/include/asm/bugs.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _ASM_POWERPC_BUGS_H -#define _ASM_POWERPC_BUGS_H - -/* - */ - -/* - * This file is included by 'init/main.c' to check for - * architecture-dependent bugs. - */ - -static inline void check_bugs(void) { } - -#endif /* _ASM_POWERPC_BUGS_H */ diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 678b5bdc79b1..34e14dfd8e04 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -205,7 +205,6 @@ extern void iommu_register_group(struct iommu_table_group *table_group, int pci_domain_number, unsigned long pe_num); extern int iommu_add_device(struct iommu_table_group *table_group, struct device *dev); -extern void iommu_del_device(struct device *dev); extern long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl, unsigned long entry, unsigned long *hpa, enum dma_data_direction *direction); @@ -229,10 +228,6 @@ static inline int iommu_add_device(struct iommu_table_group *table_group, { return 0; } - -static inline void iommu_del_device(struct device *dev) -{ -} #endif /* !CONFIG_IOMMU_API */ u64 dma_iommu_get_required_mask(struct device *dev); diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 9972626ddaf6..6a88bfdaa69b 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -165,9 +165,6 @@ static inline bool is_ioremap_addr(const void *x) return addr >= IOREMAP_BASE && addr < IOREMAP_END; } - -struct seq_file; -void arch_report_meminfo(struct seq_file *m); #endif /* CONFIG_PPC64 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 038ce8d9061d..8920862ffd79 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -144,7 +144,7 @@ static bool dma_iommu_bypass_supported(struct device *dev, u64 mask) /* We support DMA to/from any memory page via the iommu */ int dma_iommu_dma_supported(struct device *dev, u64 mask) { - struct iommu_table *tbl = get_iommu_table_base(dev); + struct iommu_table *tbl; if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) { /* @@ -162,6 +162,8 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask) return 1; } + tbl = get_iommu_table_base(dev); + if (!tbl) { dev_err(dev, "Warning: IOMMU dma not supported: mask 0x%08llx, table unavailable\n", mask); return 0; diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 0089dd49b4cb..67f0b01e6ff5 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -518,7 +518,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, /* Convert entry to a dma_addr_t */ entry += tbl->it_offset; dma_addr = entry << tbl->it_page_shift; - dma_addr |= (s->offset & ~IOMMU_PAGE_MASK(tbl)); + dma_addr |= (vaddr & ~IOMMU_PAGE_MASK(tbl)); DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n", npages, entry, dma_addr); @@ -905,6 +905,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, unsigned int order; unsigned int nio_pages, io_order; struct page *page; + int tcesize = (1 << tbl->it_page_shift); size = PAGE_ALIGN(size); order = get_order(size); @@ -931,7 +932,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, memset(ret, 0, size); /* Set up tces to cover the allocated range */ - nio_pages = size >> tbl->it_page_shift; + nio_pages = IOMMU_PAGE_ALIGN(size, tbl) >> tbl->it_page_shift; + io_order = get_iommu_order(size, tbl); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, mask >> tbl->it_page_shift, io_order, 0); @@ -939,7 +941,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, free_pages((unsigned long)ret, order); return NULL; } - *dma_handle = mapping; + + *dma_handle = mapping | ((u64)ret & (tcesize - 1)); return ret; } @@ -950,7 +953,7 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, unsigned int nio_pages; size = PAGE_ALIGN(size); - nio_pages = size >> tbl->it_page_shift; + nio_pages = IOMMU_PAGE_ALIGN(size, tbl) >> tbl->it_page_shift; iommu_free(tbl, dma_handle, nio_pages); size = PAGE_ALIGN(size); free_pages((unsigned long)vaddr, get_order(size)); @@ -1168,23 +1171,6 @@ int iommu_add_device(struct iommu_table_group *table_group, struct device *dev) } EXPORT_SYMBOL_GPL(iommu_add_device); -void iommu_del_device(struct device *dev) -{ - /* - * Some devices might not have IOMMU table and group - * and we needn't detach them from the associated - * IOMMU groups - */ - if (!device_iommu_mapped(dev)) { - pr_debug("iommu_tce: skipping device %s with no tbl\n", - dev_name(dev)); - return; - } - - iommu_group_remove_device(dev); -} -EXPORT_SYMBOL_GPL(iommu_del_device); - /* * A simple iommu_table_group_ops which only allows reusing the existing * iommu_table. This handles VFIO for POWER7 or the nested KVM. diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c index 85bdd7d3652f..48e0eaf1ad61 100644 --- a/arch/powerpc/kernel/isa-bridge.c +++ b/arch/powerpc/kernel/isa-bridge.c @@ -93,11 +93,12 @@ static int process_ISA_OF_ranges(struct device_node *isa_node, } inval_range: - if (!phb_io_base_phys) { + if (phb_io_base_phys) { pr_err("no ISA IO ranges or unexpected isa range, mapping 64k\n"); remap_isa_base(phb_io_base_phys, 0x10000); + return 0; } - return 0; + return -EINVAL; } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 265801a3e94c..e95660e69414 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -417,9 +417,9 @@ noinstr static void nmi_ipi_lock_start(unsigned long *flags) { raw_local_irq_save(*flags); hard_irq_disable(); - while (arch_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { + while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { raw_local_irq_restore(*flags); - spin_until_cond(arch_atomic_read(&__nmi_ipi_lock) == 0); + spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0); raw_local_irq_save(*flags); hard_irq_disable(); } @@ -427,15 +427,15 @@ noinstr static void nmi_ipi_lock_start(unsigned long *flags) noinstr static void nmi_ipi_lock(void) { - while (arch_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) - spin_until_cond(arch_atomic_read(&__nmi_ipi_lock) == 0); + while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) + spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0); } noinstr static void nmi_ipi_unlock(void) { smp_mb(); - WARN_ON(arch_atomic_read(&__nmi_ipi_lock) != 1); - arch_atomic_set(&__nmi_ipi_lock, 0); + WARN_ON(raw_atomic_read(&__nmi_ipi_lock) != 1); + raw_atomic_set(&__nmi_ipi_lock, 0); } noinstr static void nmi_ipi_unlock_end(unsigned long *flags) @@ -1605,6 +1605,7 @@ static void add_cpu_to_masks(int cpu) } /* Activate a secondary processor. */ +__no_stack_protector void start_secondary(void *unused) { unsigned int cpu = raw_smp_processor_id(); diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 828d0f4106d2..cba6dd15de3b 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -200,7 +200,7 @@ static int __init TAU_init(void) tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) && !strcmp(cur_cpu_spec->platform, "ppc750"); - tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1); + tau_workq = alloc_ordered_workqueue("tau", 0); if (!tau_workq) return -ENOMEM; diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 26245aaf12b8..2297aa764ecd 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1040,8 +1040,8 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, pte_t entry, unsigned long address, int psize) { struct mm_struct *mm = vma->vm_mm; - unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | - _PAGE_RW | _PAGE_EXEC); + unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_SOFT_DIRTY | + _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); unsigned long change = pte_val(entry) ^ pte_val(*ptep); /* diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index ce804b7bf84e..0bd4866d9824 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -795,12 +795,20 @@ void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush) goto out; if (current->active_mm == mm) { + unsigned long flags; + WARN_ON_ONCE(current->mm != NULL); - /* Is a kernel thread and is using mm as the lazy tlb */ + /* + * It is a kernel thread and is using mm as the lazy tlb, so + * switch it to init_mm. This is not always called from IPI + * (e.g., flush_type_needed), so must disable irqs. + */ + local_irq_save(flags); mmgrab_lazy_tlb(&init_mm); current->active_mm = &init_mm; switch_mm_irqs_off(mm, &init_mm, current); mmdrop_lazy_tlb(mm); + local_irq_restore(flags); } /* diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index e93aefcfb83f..37043dfc1add 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -101,6 +101,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) bpf_hdr = jit_data->header; proglen = jit_data->proglen; extra_pass = true; + /* During extra pass, ensure index is reset before repopulating extable entries */ + cgctx.exentry_idx = 0; goto skip_init_ctx; } diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 0d9b7609c7d5..3e2e252016f7 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -265,6 +265,7 @@ config CPM2 config FSL_ULI1575 bool "ULI1575 PCIe south bridge support" depends on FSL_SOC_BOOKE || PPC_86xx + depends on PCI select FSL_PCI select GENERIC_ISA_DMA help diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 193cc9c39422..0c41f4b005bc 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -76,7 +76,8 @@ int pmac_newworld; static int current_root_goodness = -1; -#define DEFAULT_ROOT_DEVICE Root_SDA1 /* sda1 - slightly silly choice */ +/* sda1 - slightly silly choice */ +#define DEFAULT_ROOT_DEVICE MKDEV(SCSI_DISK0_MAJOR, 1) sys_ctrler_t sys_ctrler = SYS_CTRLER_UNKNOWN; EXPORT_SYMBOL(sys_ctrler); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 233a50e65fce..7725492097b6 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -865,28 +865,3 @@ void __init pnv_pci_init(void) /* Configure IOMMU DMA hooks */ set_pci_dma_ops(&dma_iommu_ops); } - -static int pnv_tce_iommu_bus_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct device *dev = data; - - switch (action) { - case BUS_NOTIFY_DEL_DEVICE: - iommu_del_device(dev); - return 0; - default: - return 0; - } -} - -static struct notifier_block pnv_tce_iommu_bus_nb = { - .notifier_call = pnv_tce_iommu_bus_notifier, -}; - -static int __init pnv_tce_iommu_bus_notifier_init(void) -{ - bus_register_notifier(&pci_bus_type, &pnv_tce_iommu_bus_nb); - return 0; -} -machine_subsys_initcall_sync(powernv, pnv_tce_iommu_bus_notifier_init); diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 719c97a155ed..47f8eabd1bee 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -564,8 +564,7 @@ int __init dlpar_workqueue_init(void) if (pseries_hp_wq) return 0; - pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue", - WQ_UNBOUND, 1); + pseries_hp_wq = alloc_ordered_workqueue("pseries hotplug workqueue", 0); return pseries_hp_wq ? 0 : -ENOMEM; } diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 7464fa6e4145..d59e8a98a200 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -91,19 +91,24 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node) static void iommu_pseries_free_group(struct iommu_table_group *table_group, const char *node_name) { - struct iommu_table *tbl; - if (!table_group) return; - tbl = table_group->tables[0]; #ifdef CONFIG_IOMMU_API if (table_group->group) { iommu_group_put(table_group->group); BUG_ON(table_group->group); } #endif - iommu_tce_table_put(tbl); + + /* Default DMA window table is at index 0, while DDW at 1. SR-IOV + * adapters only have table on index 1. + */ + if (table_group->tables[0]) + iommu_tce_table_put(table_group->tables[0]); + + if (table_group->tables[1]) + iommu_tce_table_put(table_group->tables[1]); kfree(table_group); } @@ -312,13 +317,22 @@ static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) { u64 rc; + long rpages = npages; + unsigned long limit; if (!firmware_has_feature(FW_FEATURE_STUFF_TCE)) return tce_free_pSeriesLP(tbl->it_index, tcenum, tbl->it_page_shift, npages); - rc = plpar_tce_stuff((u64)tbl->it_index, - (u64)tcenum << tbl->it_page_shift, 0, npages); + do { + limit = min_t(unsigned long, rpages, 512); + + rc = plpar_tce_stuff((u64)tbl->it_index, + (u64)tcenum << tbl->it_page_shift, 0, limit); + + rpages -= limit; + tcenum += limit; + } while (rpages > 0 && !rc); if (rc && printk_ratelimit()) { printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n"); @@ -1695,31 +1709,6 @@ static int __init disable_multitce(char *str) __setup("multitce=", disable_multitce); -static int tce_iommu_bus_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct device *dev = data; - - switch (action) { - case BUS_NOTIFY_DEL_DEVICE: - iommu_del_device(dev); - return 0; - default: - return 0; - } -} - -static struct notifier_block tce_iommu_bus_nb = { - .notifier_call = tce_iommu_bus_notifier, -}; - -static int __init tce_iommu_bus_notifier_init(void) -{ - bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); - return 0; -} -machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init); - #ifdef CONFIG_SPAPR_TCE_IOMMU struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose, struct pci_dev *pdev) diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile index 6f5e2727963c..78473d69cd2b 100644 --- a/arch/powerpc/purgatory/Makefile +++ b/arch/powerpc/purgatory/Makefile @@ -5,6 +5,11 @@ KCSAN_SANITIZE := n targets += trampoline_$(BITS).o purgatory.ro +# When profile-guided optimization is enabled, llvm emits two different +# overlapping text sections, which is not supported by kexec. Remove profile +# optimization flags. +KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS)) + LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined $(obj)/purgatory.ro: $(obj)/trampoline_$(BITS).o FORCE diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 728d3c257e4a..70c4c59a1a8f 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -88,7 +88,7 @@ static unsigned long ndump = 64; static unsigned long nidump = 16; static unsigned long ncsum = 4096; static int termch; -static char tmpstr[128]; +static char tmpstr[KSYM_NAME_LEN]; static int tracing_enabled; static long bus_error_jmp[JMP_BUF_LEN]; diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 348c0fa1fc8c..c69572fbe613 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -26,6 +26,7 @@ config RISCV select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV select ARCH_HAS_MMIOWB + select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PMEM_API select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_DIRECT_MAP if MMU @@ -122,6 +123,7 @@ config RISCV select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select IRQ_DOMAIN select IRQ_FORCED_THREADING select KASAN_VMALLOC if KASAN @@ -799,8 +801,11 @@ menu "Power management options" source "kernel/power/Kconfig" +# Hibernation is only possible on systems where the SBI implementation has +# marked its reserved memory as not accessible from, or does not run +# from the same memory as, Linux config ARCH_HIBERNATION_POSSIBLE - def_bool y + def_bool NONPORTABLE config ARCH_HIBERNATION_HEADER def_bool HIBERNATION diff --git a/arch/riscv/errata/Makefile b/arch/riscv/errata/Makefile index a1055965fbee..7b2637c8c332 100644 --- a/arch/riscv/errata/Makefile +++ b/arch/riscv/errata/Makefile @@ -1,2 +1,6 @@ +ifdef CONFIG_RELOCATABLE +KBUILD_CFLAGS += -fno-pie +endif + obj-$(CONFIG_ERRATA_SIFIVE) += sifive/ obj-$(CONFIG_ERRATA_THEAD) += thead/ diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index bba472928b53..f5dfef6c2153 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -238,78 +238,6 @@ static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, #define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless #endif -/* - * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as - * {cmp,}xchg and the operations that return, so they need a full barrier. - */ -#define ATOMIC_OP(c_t, prefix, size) \ -static __always_inline \ -c_t arch_atomic##prefix##_xchg_relaxed(atomic##prefix##_t *v, c_t n) \ -{ \ - return __xchg_relaxed(&(v->counter), n, size); \ -} \ -static __always_inline \ -c_t arch_atomic##prefix##_xchg_acquire(atomic##prefix##_t *v, c_t n) \ -{ \ - return __xchg_acquire(&(v->counter), n, size); \ -} \ -static __always_inline \ -c_t arch_atomic##prefix##_xchg_release(atomic##prefix##_t *v, c_t n) \ -{ \ - return __xchg_release(&(v->counter), n, size); \ -} \ -static __always_inline \ -c_t arch_atomic##prefix##_xchg(atomic##prefix##_t *v, c_t n) \ -{ \ - return __arch_xchg(&(v->counter), n, size); \ -} \ -static __always_inline \ -c_t arch_atomic##prefix##_cmpxchg_relaxed(atomic##prefix##_t *v, \ - c_t o, c_t n) \ -{ \ - return __cmpxchg_relaxed(&(v->counter), o, n, size); \ -} \ -static __always_inline \ -c_t arch_atomic##prefix##_cmpxchg_acquire(atomic##prefix##_t *v, \ - c_t o, c_t n) \ -{ \ - return __cmpxchg_acquire(&(v->counter), o, n, size); \ -} \ -static __always_inline \ -c_t arch_atomic##prefix##_cmpxchg_release(atomic##prefix##_t *v, \ - c_t o, c_t n) \ -{ \ - return __cmpxchg_release(&(v->counter), o, n, size); \ -} \ -static __always_inline \ -c_t arch_atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n) \ -{ \ - return __cmpxchg(&(v->counter), o, n, size); \ -} - -#ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS() \ - ATOMIC_OP(int, , 4) -#else -#define ATOMIC_OPS() \ - ATOMIC_OP(int, , 4) \ - ATOMIC_OP(s64, 64, 8) -#endif - -ATOMIC_OPS() - -#define arch_atomic_xchg_relaxed arch_atomic_xchg_relaxed -#define arch_atomic_xchg_acquire arch_atomic_xchg_acquire -#define arch_atomic_xchg_release arch_atomic_xchg_release -#define arch_atomic_xchg arch_atomic_xchg -#define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed -#define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire -#define arch_atomic_cmpxchg_release arch_atomic_cmpxchg_release -#define arch_atomic_cmpxchg arch_atomic_cmpxchg - -#undef ATOMIC_OPS -#undef ATOMIC_OP - static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v) { int prev, rc; diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index fe6f23006641..ce1ebda1a49a 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -36,6 +36,9 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty); +#define __HAVE_ARCH_HUGE_PTEP_GET +pte_t huge_ptep_get(pte_t *ptep); + pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags); #define arch_make_huge_pte arch_make_huge_pte diff --git a/arch/riscv/include/asm/kfence.h b/arch/riscv/include/asm/kfence.h index d887a54042aa..0bbffd528096 100644 --- a/arch/riscv/include/asm/kfence.h +++ b/arch/riscv/include/asm/kfence.h @@ -8,41 +8,8 @@ #include <asm-generic/pgalloc.h> #include <asm/pgtable.h> -static inline int split_pmd_page(unsigned long addr) -{ - int i; - unsigned long pfn = PFN_DOWN(__pa((addr & PMD_MASK))); - pmd_t *pmd = pmd_off_k(addr); - pte_t *pte = pte_alloc_one_kernel(&init_mm); - - if (!pte) - return -ENOMEM; - - for (i = 0; i < PTRS_PER_PTE; i++) - set_pte(pte + i, pfn_pte(pfn + i, PAGE_KERNEL)); - set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(pte)), PAGE_TABLE)); - - flush_tlb_kernel_range(addr, addr + PMD_SIZE); - return 0; -} - static inline bool arch_kfence_init_pool(void) { - int ret; - unsigned long addr; - pmd_t *pmd; - - for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr); - addr += PAGE_SIZE) { - pmd = pmd_off_k(addr); - - if (pmd_leaf(*pmd)) { - ret = split_pmd_page(addr); - if (ret) - return false; - } - } - return true; } diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h index d42c901f9a97..665bbc9b2f84 100644 --- a/arch/riscv/include/asm/perf_event.h +++ b/arch/riscv/include/asm/perf_event.h @@ -10,4 +10,11 @@ #include <linux/perf_event.h> #define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs + +#define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->epc = (__ip); \ + (regs)->s0 = (unsigned long) __builtin_frame_address(0); \ + (regs)->sp = current_stack_pointer; \ + (regs)->status = SR_PP; \ +} #endif /* _ASM_RISCV_PERF_EVENT_H */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 2258b27173b0..75970ee2bda2 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -165,8 +165,7 @@ extern struct pt_alloc_ops pt_ops __initdata; _PAGE_EXEC | _PAGE_WRITE) #define PAGE_COPY PAGE_READ -#define PAGE_COPY_EXEC PAGE_EXEC -#define PAGE_COPY_READ_EXEC PAGE_READ_EXEC +#define PAGE_COPY_EXEC PAGE_READ_EXEC #define PAGE_SHARED PAGE_WRITE #define PAGE_SHARED_EXEC PAGE_WRITE_EXEC diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index c4b77017ec58..0d555847cde6 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -70,7 +70,7 @@ asmlinkage void smp_callin(void); #if defined CONFIG_HOTPLUG_CPU int __cpu_disable(void); -void __cpu_die(unsigned int cpu); +static inline void __cpu_die(unsigned int cpu) { } #endif /* CONFIG_HOTPLUG_CPU */ #else diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index fbdccc21418a..153864e4f399 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -23,6 +23,10 @@ ifdef CONFIG_FTRACE CFLAGS_REMOVE_alternative.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_cpufeature.o = $(CC_FLAGS_FTRACE) endif +ifdef CONFIG_RELOCATABLE +CFLAGS_alternative.o += -fno-pie +CFLAGS_cpufeature.o += -fno-pie +endif ifdef CONFIG_KASAN KASAN_SANITIZE_alternative.o := n KASAN_SANITIZE_cpufeature.o := n diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c index a941adc7cbf2..457a18efcb11 100644 --- a/arch/riscv/kernel/cpu-hotplug.c +++ b/arch/riscv/kernel/cpu-hotplug.c @@ -8,6 +8,7 @@ #include <linux/sched.h> #include <linux/err.h> #include <linux/irq.h> +#include <linux/cpuhotplug.h> #include <linux/cpu.h> #include <linux/sched/hotplug.h> #include <asm/irq.h> @@ -49,17 +50,15 @@ int __cpu_disable(void) return ret; } +#ifdef CONFIG_HOTPLUG_CPU /* - * Called on the thread which is asking for a CPU to be shutdown. + * Called on the thread which is asking for a CPU to be shutdown, if the + * CPU reported dead to the hotplug core. */ -void __cpu_die(unsigned int cpu) +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { int ret = 0; - if (!cpu_wait_death(cpu, 5)) { - pr_err("CPU %u: didn't die\n", cpu); - return; - } pr_notice("CPU%u: off\n", cpu); /* Verify from the firmware if the cpu is really stopped*/ @@ -76,9 +75,10 @@ void __noreturn arch_cpu_idle_dead(void) { idle_task_exit(); - (void)cpu_report_death(); + cpuhp_ap_report_dead(); cpu_ops[smp_processor_id()]->cpu_stop(); /* It should never reach here */ BUG(); } +#endif diff --git a/arch/riscv/kernel/probes/Makefile b/arch/riscv/kernel/probes/Makefile index c40139e9ca47..8265ff497977 100644 --- a/arch/riscv/kernel/probes/Makefile +++ b/arch/riscv/kernel/probes/Makefile @@ -4,3 +4,5 @@ obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o obj-$(CONFIG_UPROBES) += uprobes.o decode-insn.o simulate-insn.o CFLAGS_REMOVE_simulate-insn.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_rethook_trampoline.o = $(CC_FLAGS_FTRACE) diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index a163a3e0f0d4..e0ef56dc57b9 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -3,6 +3,30 @@ #include <linux/err.h> #ifdef CONFIG_RISCV_ISA_SVNAPOT +pte_t huge_ptep_get(pte_t *ptep) +{ + unsigned long pte_num; + int i; + pte_t orig_pte = ptep_get(ptep); + + if (!pte_present(orig_pte) || !pte_napot(orig_pte)) + return orig_pte; + + pte_num = napot_pte_num(napot_cont_order(orig_pte)); + + for (i = 0; i < pte_num; i++, ptep++) { + pte_t pte = ptep_get(ptep); + + if (pte_dirty(pte)) + orig_pte = pte_mkdirty(orig_pte); + + if (pte_young(pte)) + orig_pte = pte_mkyoung(orig_pte); + } + + return orig_pte; +} + pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, @@ -218,6 +242,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm, { pte_t pte = ptep_get(ptep); unsigned long order; + pte_t orig_pte; int i, pte_num; if (!pte_napot(pte)) { @@ -228,9 +253,12 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm, order = napot_cont_order(pte); pte_num = napot_pte_num(order); ptep = huge_pte_offset(mm, addr, napot_cont_size(order)); + orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num); + + orig_pte = pte_wrprotect(orig_pte); for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) - ptep_set_wrprotect(mm, addr, ptep); + set_pte_at(mm, addr, ptep, orig_pte); } pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 747e5b1ef02d..4fa420faa780 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -23,6 +23,7 @@ #ifdef CONFIG_RELOCATABLE #include <linux/elf.h> #endif +#include <linux/kfence.h> #include <asm/fixmap.h> #include <asm/tlbflush.h> @@ -293,7 +294,7 @@ static const pgprot_t protection_map[16] = { [VM_EXEC] = PAGE_EXEC, [VM_EXEC | VM_READ] = PAGE_READ_EXEC, [VM_EXEC | VM_WRITE] = PAGE_COPY_EXEC, - [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_READ_EXEC, + [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_EXEC, [VM_SHARED] = PAGE_NONE, [VM_SHARED | VM_READ] = PAGE_READ, [VM_SHARED | VM_WRITE] = PAGE_SHARED, @@ -659,18 +660,19 @@ void __init create_pgd_mapping(pgd_t *pgdp, create_pgd_next_mapping(nextp, va, pa, sz, prot); } -static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) +static uintptr_t __init best_map_size(phys_addr_t pa, uintptr_t va, + phys_addr_t size) { - if (!(base & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE) + if (!(pa & (PGDIR_SIZE - 1)) && !(va & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE) return PGDIR_SIZE; - if (!(base & (P4D_SIZE - 1)) && size >= P4D_SIZE) + if (!(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE) return P4D_SIZE; - if (!(base & (PUD_SIZE - 1)) && size >= PUD_SIZE) + if (!(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE) return PUD_SIZE; - if (!(base & (PMD_SIZE - 1)) && size >= PMD_SIZE) + if (!(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE) return PMD_SIZE; return PAGE_SIZE; @@ -922,9 +924,9 @@ static void __init create_kernel_page_table(pgd_t *pgdir, bool early) static void __init create_fdt_early_page_table(uintptr_t fix_fdt_va, uintptr_t dtb_pa) { +#ifndef CONFIG_BUILTIN_DTB uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1); -#ifndef CONFIG_BUILTIN_DTB /* Make sure the fdt fixmap address is always aligned on PMD size */ BUILD_BUG_ON(FIX_FDT % (PMD_SIZE / PAGE_SIZE)); @@ -1167,14 +1169,16 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) } static void __init create_linear_mapping_range(phys_addr_t start, - phys_addr_t end) + phys_addr_t end, + uintptr_t fixed_map_size) { phys_addr_t pa; uintptr_t va, map_size; for (pa = start; pa < end; pa += map_size) { va = (uintptr_t)__va(pa); - map_size = best_map_size(pa, end - pa); + map_size = fixed_map_size ? fixed_map_size : + best_map_size(pa, va, end - pa); create_pgd_mapping(swapper_pg_dir, va, pa, map_size, pgprot_from_va(va)); @@ -1184,6 +1188,7 @@ static void __init create_linear_mapping_range(phys_addr_t start, static void __init create_linear_mapping_page_table(void) { phys_addr_t start, end; + phys_addr_t kfence_pool __maybe_unused; u64 i; #ifdef CONFIG_STRICT_KERNEL_RWX @@ -1197,6 +1202,19 @@ static void __init create_linear_mapping_page_table(void) memblock_mark_nomap(krodata_start, krodata_size); #endif +#ifdef CONFIG_KFENCE + /* + * kfence pool must be backed by PAGE_SIZE mappings, so allocate it + * before we setup the linear mapping so that we avoid using hugepages + * for this region. + */ + kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE); + BUG_ON(!kfence_pool); + + memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE); + __kfence_pool = __va(kfence_pool); +#endif + /* Map all memory banks in the linear mapping */ for_each_mem_range(i, &start, &end) { if (start >= end) @@ -1207,17 +1225,25 @@ static void __init create_linear_mapping_page_table(void) if (end >= __pa(PAGE_OFFSET) + memory_limit) end = __pa(PAGE_OFFSET) + memory_limit; - create_linear_mapping_range(start, end); + create_linear_mapping_range(start, end, 0); } #ifdef CONFIG_STRICT_KERNEL_RWX - create_linear_mapping_range(ktext_start, ktext_start + ktext_size); + create_linear_mapping_range(ktext_start, ktext_start + ktext_size, 0); create_linear_mapping_range(krodata_start, - krodata_start + krodata_size); + krodata_start + krodata_size, 0); memblock_clear_nomap(ktext_start, ktext_size); memblock_clear_nomap(krodata_start, krodata_size); #endif + +#ifdef CONFIG_KFENCE + create_linear_mapping_range(kfence_pool, + kfence_pool + KFENCE_POOL_SIZE, + PAGE_SIZE); + + memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); +#endif } static void __init setup_vm_final(void) diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile index 8c73360c42bb..dc20e166983e 100644 --- a/arch/riscv/purgatory/Makefile +++ b/arch/riscv/purgatory/Makefile @@ -35,6 +35,11 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY CFLAGS_string.o := -D__DISABLE_EXPORTS CFLAGS_ctype.o := -D__DISABLE_EXPORTS +# When profile-guided optimization is enabled, llvm emits two different +# overlapping text sections, which is not supported by kexec. Remove profile +# optimization flags. +KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS)) + # When linking purgatory.ro with -r unresolved symbols are not checked, # also link a purgatory.chk binary without -r to check for unresolved symbols. PURGATORY_LDFLAGS := -e purgatory_start -z nodefaultlib diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index db20c1589a98..5b39918b7042 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -117,6 +117,7 @@ config S390 select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEBUG_PAGEALLOC select ARCH_SUPPORTS_HUGETLBFS + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && CC_IS_CLANG select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_USE_BUILTIN_BSWAP @@ -469,19 +470,11 @@ config SCHED_SMT config SCHED_MC def_bool n -config SCHED_BOOK - def_bool n - -config SCHED_DRAWER - def_bool n - config SCHED_TOPOLOGY def_bool y prompt "Topology scheduler support" select SCHED_SMT select SCHED_MC - select SCHED_BOOK - select SCHED_DRAWER help Topology scheduler support improves the CPU scheduler's decision making when dealing with machines that have multi-threading, @@ -716,7 +709,6 @@ config EADM_SCH config VFIO_CCW def_tristate n prompt "Support for VFIO-CCW subchannels" - depends on S390_CCW_IOMMU depends on VFIO select VFIO_MDEV help @@ -728,7 +720,7 @@ config VFIO_CCW config VFIO_AP def_tristate n prompt "VFIO support for AP devices" - depends on S390_AP_IOMMU && KVM + depends on KVM depends on VFIO depends on ZCRYPT select VFIO_MDEV diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index acb1f8b53105..c67f59db7a51 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -45,6 +45,13 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat static pte_t pte_z; +static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode) +{ + start = PAGE_ALIGN_DOWN(__sha(start)); + end = PAGE_ALIGN(__sha(end)); + pgtable_populate(start, end, mode); +} + static void kasan_populate_shadow(void) { pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); @@ -95,17 +102,17 @@ static void kasan_populate_shadow(void) */ for_each_physmem_usable_range(i, &start, &end) - pgtable_populate(__sha(start), __sha(end), POPULATE_KASAN_MAP_SHADOW); + kasan_populate(start, end, POPULATE_KASAN_MAP_SHADOW); if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { untracked_end = VMALLOC_START; /* shallowly populate kasan shadow for vmalloc and modules */ - pgtable_populate(__sha(VMALLOC_START), __sha(MODULES_END), POPULATE_KASAN_SHALLOW); + kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW); } else { untracked_end = MODULES_VADDR; } /* populate kasan shadow for untracked memory */ - pgtable_populate(__sha(ident_map_size), __sha(untracked_end), POPULATE_KASAN_ZERO_SHADOW); - pgtable_populate(__sha(MODULES_END), __sha(_REGION1_SIZE), POPULATE_KASAN_ZERO_SHADOW); + kasan_populate(ident_map_size, untracked_end, POPULATE_KASAN_ZERO_SHADOW); + kasan_populate(MODULES_END, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW); } static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 4ccf66d29fc2..aa95cf6dfabb 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -116,6 +116,7 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m +CONFIG_NET_TC_SKB_EXT=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y @@ -591,8 +592,6 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m -CONFIG_S390_CCW_IOMMU=y -CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -703,6 +702,7 @@ CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" +CONFIG_INIT_STACK_NONE=y CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set CONFIG_CRYPTO_PCRYPT=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 693297a2e897..f041945f9148 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -107,6 +107,7 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m +CONFIG_NET_TC_SKB_EXT=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y @@ -580,8 +581,6 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m -CONFIG_S390_CCW_IOMMU=y -CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -686,6 +685,7 @@ CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" +CONFIG_INIT_STACK_NONE=y CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 33a232bb68af..6f68b39817ef 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -67,6 +67,7 @@ CONFIG_ZFCP=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_LSM="yama,loadpin,safesetid,integrity" +CONFIG_INIT_STACK_NONE=y # CONFIG_ZLIB_DFLTCC is not set CONFIG_XZ_DEC_MICROLZMA=y CONFIG_PRINTK_TIME=y diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c index 7752bd314558..5fae187f947a 100644 --- a/arch/s390/crypto/chacha-glue.c +++ b/arch/s390/crypto/chacha-glue.c @@ -82,7 +82,7 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, * it cannot handle a block of data or less, but otherwise * it can handle data of arbitrary size */ - if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20) + if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !MACHINE_HAS_VX) chacha_crypt_generic(state, dst, src, bytes, nrounds); else chacha20_crypt_s390(state, dst, src, bytes, diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 29dc827e0fe8..d29a9d908797 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -5,7 +5,7 @@ * s390 implementation of the AES Cipher Algorithm with protected keys. * * s390 Version: - * Copyright IBM Corp. 2017,2020 + * Copyright IBM Corp. 2017, 2023 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> * Harald Freudenberger <freude@de.ibm.com> */ @@ -132,7 +132,8 @@ static inline int __paes_keyblob2pkey(struct key_blob *kb, if (i > 0 && ret == -EAGAIN && in_task()) if (msleep_interruptible(1000)) return -EINTR; - ret = pkey_keyblob2pkey(kb->key, kb->keylen, pk); + ret = pkey_keyblob2pkey(kb->key, kb->keylen, + pk->protkey, &pk->len, &pk->type); if (ret == 0) break; } @@ -145,6 +146,7 @@ static inline int __paes_convert_key(struct s390_paes_ctx *ctx) int ret; struct pkey_protkey pkey; + pkey.len = sizeof(pkey.protkey); ret = __paes_keyblob2pkey(&ctx->kb, &pkey); if (ret) return ret; @@ -414,6 +416,9 @@ static inline int __xts_paes_convert_key(struct s390_pxts_ctx *ctx) { struct pkey_protkey pkey0, pkey1; + pkey0.len = sizeof(pkey0.protkey); + pkey1.len = sizeof(pkey1.protkey); + if (__paes_keyblob2pkey(&ctx->kb[0], &pkey0) || __paes_keyblob2pkey(&ctx->kb[1], &pkey1)) return -EINVAL; diff --git a/arch/s390/include/asm/asm-prototypes.h b/arch/s390/include/asm/asm-prototypes.h index c37eb921bfbf..a873e873e1ee 100644 --- a/arch/s390/include/asm/asm-prototypes.h +++ b/arch/s390/include/asm/asm-prototypes.h @@ -6,4 +6,8 @@ #include <asm/fpu/api.h> #include <asm-generic/asm-prototypes.h> +__int128_t __ashlti3(__int128_t a, int b); +__int128_t __ashrti3(__int128_t a, int b); +__int128_t __lshrti3(__int128_t a, int b); + #endif /* _ASM_S390_PROTOTYPES_H */ diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 06e0e42f4eec..aae0315374de 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -190,38 +190,18 @@ static __always_inline unsigned long __cmpxchg(unsigned long address, #define arch_cmpxchg_local arch_cmpxchg #define arch_cmpxchg64_local arch_cmpxchg -#define system_has_cmpxchg_double() 1 +#define system_has_cmpxchg128() 1 -static __always_inline int __cmpxchg_double(unsigned long p1, unsigned long p2, - unsigned long o1, unsigned long o2, - unsigned long n1, unsigned long n2) +static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new) { - union register_pair old = { .even = o1, .odd = o2, }; - union register_pair new = { .even = n1, .odd = n2, }; - int cc; - asm volatile( " cdsg %[old],%[new],%[ptr]\n" - " ipm %[cc]\n" - " srl %[cc],28\n" - : [cc] "=&d" (cc), [old] "+&d" (old.pair) - : [new] "d" (new.pair), - [ptr] "QS" (*(unsigned long *)p1), "Q" (*(unsigned long *)p2) + : [old] "+d" (old), [ptr] "+QS" (*ptr) + : [new] "d" (new) : "memory", "cc"); - return !cc; + return old; } -#define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2) \ -({ \ - typeof(p1) __p1 = (p1); \ - typeof(p2) __p2 = (p2); \ - \ - BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \ - BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ - VM_BUG_ON((unsigned long)((__p1) + 1) != (unsigned long)(__p2));\ - __cmpxchg_double((unsigned long)__p1, (unsigned long)__p2, \ - (unsigned long)(o1), (unsigned long)(o2), \ - (unsigned long)(n1), (unsigned long)(n2)); \ -}) +#define arch_cmpxchg128 arch_cmpxchg128 #endif /* __ASM_CMPXCHG_H */ diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index a386070f1d56..3cb9d813f022 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -112,7 +112,7 @@ struct compat_statfs64 { u32 f_namelen; u32 f_frsize; u32 f_flags; - u32 f_spare[4]; + u32 f_spare[5]; }; /* diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index 646b12981f20..b378e2b57ad8 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -2,7 +2,7 @@ /* * CP Assist for Cryptographic Functions (CPACF) * - * Copyright IBM Corp. 2003, 2017 + * Copyright IBM Corp. 2003, 2023 * Author(s): Thomas Spatzier * Jan Glauber * Harald Freudenberger (freude@de.ibm.com) @@ -132,6 +132,11 @@ #define CPACF_PCKMO_ENC_AES_128_KEY 0x12 #define CPACF_PCKMO_ENC_AES_192_KEY 0x13 #define CPACF_PCKMO_ENC_AES_256_KEY 0x14 +#define CPACF_PCKMO_ENC_ECC_P256_KEY 0x20 +#define CPACF_PCKMO_ENC_ECC_P384_KEY 0x21 +#define CPACF_PCKMO_ENC_ECC_P521_KEY 0x22 +#define CPACF_PCKMO_ENC_ECC_ED25519_KEY 0x28 +#define CPACF_PCKMO_ENC_ECC_ED448_KEY 0x29 /* * Function codes for the PRNO (PERFORM RANDOM NUMBER OPERATION) diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 7e417d7de568..a0de5b9b02ea 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -140,7 +140,7 @@ union hws_trailer_header { unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */ unsigned long long overflow; /* 64 - Overflow Count */ }; - __uint128_t val; + u128 val; }; struct hws_trailer_entry { diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h index 0d1c74a7a650..a4d2e103f116 100644 --- a/arch/s390/include/asm/os_info.h +++ b/arch/s390/include/asm/os_info.h @@ -16,6 +16,9 @@ #define OS_INFO_VMCOREINFO 0 #define OS_INFO_REIPL_BLOCK 1 +#define OS_INFO_FLAGS_ENTRY 2 + +#define OS_INFO_FLAG_REIPL_CLEAR (1UL << 0) struct os_info_entry { u64 addr; @@ -30,8 +33,8 @@ struct os_info { u16 version_minor; u64 crashkernel_addr; u64 crashkernel_size; - struct os_info_entry entry[2]; - u8 reserved[4024]; + struct os_info_entry entry[3]; + u8 reserved[4004]; } __packed; void os_info_init(void); diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 081837b391e3..264095dd84bc 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -148,6 +148,22 @@ #define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) #define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n) + +#define this_cpu_cmpxchg128(pcp, oval, nval) \ +({ \ + typedef typeof(pcp) pcp_op_T__; \ + u128 old__, new__, ret__; \ + pcp_op_T__ *ptr__; \ + old__ = oval; \ + new__ = nval; \ + preempt_disable_notrace(); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ + ret__ = cmpxchg128((void *)ptr__, old__, new__); \ + preempt_enable_notrace(); \ + ret__; \ +}) + #define arch_this_cpu_xchg(pcp, nval) \ ({ \ typeof(pcp) *ptr__; \ @@ -164,24 +180,6 @@ #define this_cpu_xchg_4(pcp, nval) arch_this_cpu_xchg(pcp, nval) #define this_cpu_xchg_8(pcp, nval) arch_this_cpu_xchg(pcp, nval) -#define arch_this_cpu_cmpxchg_double(pcp1, pcp2, o1, o2, n1, n2) \ -({ \ - typeof(pcp1) *p1__; \ - typeof(pcp2) *p2__; \ - int ret__; \ - \ - preempt_disable_notrace(); \ - p1__ = raw_cpu_ptr(&(pcp1)); \ - p2__ = raw_cpu_ptr(&(pcp2)); \ - ret__ = __cmpxchg_double((unsigned long)p1__, (unsigned long)p2__, \ - (unsigned long)(o1), (unsigned long)(o2), \ - (unsigned long)(n1), (unsigned long)(n2)); \ - preempt_enable_notrace(); \ - ret__; \ -}) - -#define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double - #include <asm-generic/percpu.h> #endif /* __ARCH_S390_PERCPU__ */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 6822a11c2c8a..c55f3c3365af 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -42,9 +42,6 @@ static inline void update_page_count(int level, long count) atomic_long_add(count, &direct_pages_count[level]); } -struct seq_file; -void arch_report_meminfo(struct seq_file *m); - /* * The S390 doesn't have any external MMU info: the kernel page * tables contain all the necessary information. diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h index 8e9c582592b3..9e41a74fce9a 100644 --- a/arch/s390/include/asm/physmem_info.h +++ b/arch/s390/include/asm/physmem_info.h @@ -3,6 +3,7 @@ #define _ASM_S390_MEM_DETECT_H #include <linux/types.h> +#include <asm/page.h> enum physmem_info_source { MEM_DETECT_NONE = 0, @@ -133,7 +134,7 @@ static inline const char *get_rr_type_name(enum reserved_range_type t) #define for_each_physmem_reserved_type_range(t, range, p_start, p_end) \ for (range = &physmem_info.reserved[t], *p_start = range->start, *p_end = range->end; \ - range && range->end; range = range->chain, \ + range && range->end; range = range->chain ? __va(range->chain) : NULL, \ *p_start = range ? range->start : 0, *p_end = range ? range->end : 0) static inline struct reserved_range *__physmem_reserved_next(enum reserved_range_type *t, @@ -145,7 +146,7 @@ static inline struct reserved_range *__physmem_reserved_next(enum reserved_range return range; } if (range->chain) - return range->chain; + return __va(range->chain); while (++*t < RR_MAX) { range = &physmem_info.reserved[*t]; if (range->end) diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h index dd3d20c332ac..47d80a7451a6 100644 --- a/arch/s390/include/asm/pkey.h +++ b/arch/s390/include/asm/pkey.h @@ -2,7 +2,7 @@ /* * Kernelspace interface to the pkey device driver * - * Copyright IBM Corp. 2016,2019 + * Copyright IBM Corp. 2016, 2023 * * Author: Harald Freudenberger <freude@de.ibm.com> * @@ -23,6 +23,6 @@ * @return 0 on success, negative errno value on failure */ int pkey_keyblob2pkey(const u8 *key, u32 keylen, - struct pkey_protkey *protkey); + u8 *protkey, u32 *protkeylen, u32 *protkeytype); #endif /* _KAPI_PKEY_H */ diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index ce878e85b6e4..4d646659a5f5 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -63,7 +63,7 @@ static inline int store_tod_clock_ext_cc(union tod_clock *clk) return cc; } -static inline void store_tod_clock_ext(union tod_clock *tod) +static __always_inline void store_tod_clock_ext(union tod_clock *tod) { asm volatile("stcke %0" : "=Q" (*tod) : : "cc"); } @@ -177,7 +177,7 @@ static inline void local_tick_enable(unsigned long comp) typedef unsigned long cycles_t; -static inline unsigned long get_tod_clock(void) +static __always_inline unsigned long get_tod_clock(void) { union tod_clock clk; @@ -204,6 +204,11 @@ void init_cpu_timer(void); extern union tod_clock tod_clock_base; +static __always_inline unsigned long __get_tod_clock_monotonic(void) +{ + return get_tod_clock() - tod_clock_base.tod; +} + /** * get_clock_monotonic - returns current time in clock rate units * @@ -216,7 +221,7 @@ static inline unsigned long get_tod_clock_monotonic(void) unsigned long tod; preempt_disable_notrace(); - tod = get_tod_clock() - tod_clock_base.tod; + tod = __get_tod_clock_monotonic(); preempt_enable_notrace(); return tod; } @@ -240,7 +245,7 @@ static inline unsigned long get_tod_clock_monotonic(void) * -> ns = (th * 125) + ((tl * 125) >> 9); * */ -static inline unsigned long tod_to_ns(unsigned long todval) +static __always_inline unsigned long tod_to_ns(unsigned long todval) { return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9); } diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h index 924b876f992c..f7bae1c63bd6 100644 --- a/arch/s390/include/uapi/asm/pkey.h +++ b/arch/s390/include/uapi/asm/pkey.h @@ -2,7 +2,7 @@ /* * Userspace interface to the pkey device driver * - * Copyright IBM Corp. 2017, 2019 + * Copyright IBM Corp. 2017, 2023 * * Author: Harald Freudenberger <freude@de.ibm.com> * @@ -32,10 +32,15 @@ #define MINKEYBLOBSIZE SECKEYBLOBSIZE /* defines for the type field within the pkey_protkey struct */ -#define PKEY_KEYTYPE_AES_128 1 -#define PKEY_KEYTYPE_AES_192 2 -#define PKEY_KEYTYPE_AES_256 3 -#define PKEY_KEYTYPE_ECC 4 +#define PKEY_KEYTYPE_AES_128 1 +#define PKEY_KEYTYPE_AES_192 2 +#define PKEY_KEYTYPE_AES_256 3 +#define PKEY_KEYTYPE_ECC 4 +#define PKEY_KEYTYPE_ECC_P256 5 +#define PKEY_KEYTYPE_ECC_P384 6 +#define PKEY_KEYTYPE_ECC_P521 7 +#define PKEY_KEYTYPE_ECC_ED25519 8 +#define PKEY_KEYTYPE_ECC_ED448 9 /* the newer ioctls use a pkey_key_type enum for type information */ enum pkey_key_type { diff --git a/arch/s390/include/uapi/asm/statfs.h b/arch/s390/include/uapi/asm/statfs.h index 72604f7792c3..f85b50723dd3 100644 --- a/arch/s390/include/uapi/asm/statfs.h +++ b/arch/s390/include/uapi/asm/statfs.h @@ -30,7 +30,7 @@ struct statfs { unsigned int f_namelen; unsigned int f_frsize; unsigned int f_flags; - unsigned int f_spare[4]; + unsigned int f_spare[5]; }; struct statfs64 { @@ -45,7 +45,7 @@ struct statfs64 { unsigned int f_namelen; unsigned int f_frsize; unsigned int f_flags; - unsigned int f_spare[4]; + unsigned int f_spare[5]; }; #endif diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 8983837b3565..6b2a051e1f8a 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -10,6 +10,7 @@ CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) # Do not trace early setup code CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE) endif diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 8a617be28bb4..7af69948b290 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -568,9 +568,9 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt) int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) { Elf64_Phdr *phdr_notes, *phdr_loads; + size_t alloc_size; int mem_chunk_cnt; void *ptr, *hdr; - u32 alloc_size; u64 hdr_off; /* If we are not in kdump or zfcp/nvme dump mode return */ diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 43de939b7af1..85a00d97a314 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -176,6 +176,8 @@ static bool reipl_fcp_clear; static bool reipl_ccw_clear; static bool reipl_eckd_clear; +static unsigned long os_info_flags; + static inline int __diag308(unsigned long subcode, unsigned long addr) { union register_pair r1; @@ -1935,14 +1937,27 @@ static struct shutdown_action __refdata dump_action = { static void dump_reipl_run(struct shutdown_trigger *trigger) { - unsigned long ipib = (unsigned long) reipl_block_actual; struct lowcore *abs_lc; unsigned int csum; + /* + * Set REIPL_CLEAR flag in os_info flags entry indicating + * 'clear' sysfs attribute has been set on the panicked system + * for specified reipl type. + * Always set for IPL_TYPE_NSS and IPL_TYPE_UNKNOWN. + */ + if ((reipl_type == IPL_TYPE_CCW && reipl_ccw_clear) || + (reipl_type == IPL_TYPE_ECKD && reipl_eckd_clear) || + (reipl_type == IPL_TYPE_FCP && reipl_fcp_clear) || + (reipl_type == IPL_TYPE_NVME && reipl_nvme_clear) || + reipl_type == IPL_TYPE_NSS || + reipl_type == IPL_TYPE_UNKNOWN) + os_info_flags |= OS_INFO_FLAG_REIPL_CLEAR; + os_info_entry_add(OS_INFO_FLAGS_ENTRY, &os_info_flags, sizeof(os_info_flags)); csum = (__force unsigned int) csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0); abs_lc = get_abs_lowcore(); - abs_lc->ipib = ipib; + abs_lc->ipib = __pa(reipl_block_actual); abs_lc->ipib_checksum = csum; put_abs_lowcore(abs_lc); dump_run(trigger); diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index f1b35dcdf3eb..42215f9404af 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -352,7 +352,8 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, rc = apply_rela_bits(loc, val, 0, 64, 0, write); else if (r_type == R_390_GOTENT || r_type == R_390_GOTPLTENT) { - val += (Elf_Addr) me->mem[MOD_TEXT].base - loc; + val += (Elf_Addr)me->mem[MOD_TEXT].base + + me->arch.got_offset - loc; rc = apply_rela_bits(loc, val, 1, 32, 1, write); } break; diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index cf1b6e8a708d..90679143534b 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -76,6 +76,7 @@ static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest) } struct cpu_cf_events { + refcount_t refcnt; /* Reference count */ atomic_t ctr_set[CPUMF_CTR_SET_MAX]; u64 state; /* For perf_event_open SVC */ u64 dev_state; /* For /dev/hwctr */ @@ -88,9 +89,6 @@ struct cpu_cf_events { unsigned int sets; /* # Counter set saved in memory */ }; -/* Per-CPU event structure for the counter facility */ -static DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events); - static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */ static debug_info_t *cf_dbg; @@ -103,6 +101,221 @@ static debug_info_t *cf_dbg; */ static struct cpumf_ctr_info cpumf_ctr_info; +struct cpu_cf_ptr { + struct cpu_cf_events *cpucf; +}; + +static struct cpu_cf_root { /* Anchor to per CPU data */ + refcount_t refcnt; /* Overall active events */ + struct cpu_cf_ptr __percpu *cfptr; +} cpu_cf_root; + +/* + * Serialize event initialization and event removal. Both are called from + * user space in task context with perf_event_open() and close() + * system calls. + * + * This mutex serializes functions cpum_cf_alloc_cpu() called at event + * initialization via cpumf_pmu_event_init() and function cpum_cf_free_cpu() + * called at event removal via call back function hw_perf_event_destroy() + * when the event is deleted. They are serialized to enforce correct + * bookkeeping of pointer and reference counts anchored by + * struct cpu_cf_root and the access to cpu_cf_root::refcnt and the + * per CPU pointers stored in cpu_cf_root::cfptr. + */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* + * Get pointer to per-cpu structure. + * + * Function get_cpu_cfhw() is called from + * - cfset_copy_all(): This function is protected by cpus_read_lock(), so + * CPU hot plug remove can not happen. Event removal requires a close() + * first. + * + * Function this_cpu_cfhw() is called from perf common code functions: + * - pmu_{en|dis}able(), pmu_{add|del}()and pmu_{start|stop}(): + * All functions execute with interrupts disabled on that particular CPU. + * - cfset_ioctl_{on|off}, cfset_cpu_read(): see comment cfset_copy_all(). + * + * Therefore it is safe to access the CPU specific pointer to the event. + */ +static struct cpu_cf_events *get_cpu_cfhw(int cpu) +{ + struct cpu_cf_ptr __percpu *p = cpu_cf_root.cfptr; + + if (p) { + struct cpu_cf_ptr *q = per_cpu_ptr(p, cpu); + + return q->cpucf; + } + return NULL; +} + +static struct cpu_cf_events *this_cpu_cfhw(void) +{ + return get_cpu_cfhw(smp_processor_id()); +} + +/* Disable counter sets on dedicated CPU */ +static void cpum_cf_reset_cpu(void *flags) +{ + lcctl(0); +} + +/* Free per CPU data when the last event is removed. */ +static void cpum_cf_free_root(void) +{ + if (!refcount_dec_and_test(&cpu_cf_root.refcnt)) + return; + free_percpu(cpu_cf_root.cfptr); + cpu_cf_root.cfptr = NULL; + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + on_each_cpu(cpum_cf_reset_cpu, NULL, 1); + debug_sprintf_event(cf_dbg, 4, "%s2 root.refcnt %u cfptr %px\n", + __func__, refcount_read(&cpu_cf_root.refcnt), + cpu_cf_root.cfptr); +} + +/* + * On initialization of first event also allocate per CPU data dynamically. + * Start with an array of pointers, the array size is the maximum number of + * CPUs possible, which might be larger than the number of CPUs currently + * online. + */ +static int cpum_cf_alloc_root(void) +{ + int rc = 0; + + if (refcount_inc_not_zero(&cpu_cf_root.refcnt)) + return rc; + + /* The memory is already zeroed. */ + cpu_cf_root.cfptr = alloc_percpu(struct cpu_cf_ptr); + if (cpu_cf_root.cfptr) { + refcount_set(&cpu_cf_root.refcnt, 1); + on_each_cpu(cpum_cf_reset_cpu, NULL, 1); + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); + } else { + rc = -ENOMEM; + } + + return rc; +} + +/* Free CPU counter data structure for a PMU */ +static void cpum_cf_free_cpu(int cpu) +{ + struct cpu_cf_events *cpuhw; + struct cpu_cf_ptr *p; + + mutex_lock(&pmc_reserve_mutex); + /* + * When invoked via CPU hotplug handler, there might be no events + * installed or that particular CPU might not have an + * event installed. This anchor pointer can be NULL! + */ + if (!cpu_cf_root.cfptr) + goto out; + p = per_cpu_ptr(cpu_cf_root.cfptr, cpu); + cpuhw = p->cpucf; + /* + * Might be zero when called from CPU hotplug handler and no event + * installed on that CPU, but on different CPUs. + */ + if (!cpuhw) + goto out; + + if (refcount_dec_and_test(&cpuhw->refcnt)) { + kfree(cpuhw); + p->cpucf = NULL; + } + cpum_cf_free_root(); +out: + mutex_unlock(&pmc_reserve_mutex); +} + +/* Allocate CPU counter data structure for a PMU. Called under mutex lock. */ +static int cpum_cf_alloc_cpu(int cpu) +{ + struct cpu_cf_events *cpuhw; + struct cpu_cf_ptr *p; + int rc; + + mutex_lock(&pmc_reserve_mutex); + rc = cpum_cf_alloc_root(); + if (rc) + goto unlock; + p = per_cpu_ptr(cpu_cf_root.cfptr, cpu); + cpuhw = p->cpucf; + + if (!cpuhw) { + cpuhw = kzalloc(sizeof(*cpuhw), GFP_KERNEL); + if (cpuhw) { + p->cpucf = cpuhw; + refcount_set(&cpuhw->refcnt, 1); + } else { + rc = -ENOMEM; + } + } else { + refcount_inc(&cpuhw->refcnt); + } + if (rc) { + /* + * Error in allocation of event, decrement anchor. Since + * cpu_cf_event in not created, its destroy() function is not + * invoked. Adjust the reference counter for the anchor. + */ + cpum_cf_free_root(); + } +unlock: + mutex_unlock(&pmc_reserve_mutex); + return rc; +} + +/* + * Create/delete per CPU data structures for /dev/hwctr interface and events + * created by perf_event_open(). + * If cpu is -1, track task on all available CPUs. This requires + * allocation of hardware data structures for all CPUs. This setup handles + * perf_event_open() with task context and /dev/hwctr interface. + * If cpu is non-zero install event on this CPU only. This setup handles + * perf_event_open() with CPU context. + */ +static int cpum_cf_alloc(int cpu) +{ + cpumask_var_t mask; + int rc; + + if (cpu == -1) { + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + for_each_online_cpu(cpu) { + rc = cpum_cf_alloc_cpu(cpu); + if (rc) { + for_each_cpu(cpu, mask) + cpum_cf_free_cpu(cpu); + break; + } + cpumask_set_cpu(cpu, mask); + } + free_cpumask_var(mask); + } else { + rc = cpum_cf_alloc_cpu(cpu); + } + return rc; +} + +static void cpum_cf_free(int cpu) +{ + if (cpu == -1) { + for_each_online_cpu(cpu) + cpum_cf_free_cpu(cpu); + } else { + cpum_cf_free_cpu(cpu); + } +} + #define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ /* interval in seconds */ @@ -451,10 +664,10 @@ static int validate_ctr_version(const u64 config, enum cpumf_ctr_set set) */ static void cpumf_pmu_enable(struct pmu *pmu) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); int err; - if (cpuhw->flags & PMU_F_ENABLED) + if (!cpuhw || (cpuhw->flags & PMU_F_ENABLED)) return; err = lcctl(cpuhw->state | cpuhw->dev_state); @@ -471,11 +684,11 @@ static void cpumf_pmu_enable(struct pmu *pmu) */ static void cpumf_pmu_disable(struct pmu *pmu) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - int err; + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); u64 inactive; + int err; - if (!(cpuhw->flags & PMU_F_ENABLED)) + if (!cpuhw || !(cpuhw->flags & PMU_F_ENABLED)) return; inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); @@ -487,58 +700,10 @@ static void cpumf_pmu_disable(struct pmu *pmu) cpuhw->flags &= ~PMU_F_ENABLED; } -#define PMC_INIT 0UL -#define PMC_RELEASE 1UL - -static void cpum_cf_setup_cpu(void *flags) -{ - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - - switch ((unsigned long)flags) { - case PMC_INIT: - cpuhw->flags |= PMU_F_RESERVED; - break; - - case PMC_RELEASE: - cpuhw->flags &= ~PMU_F_RESERVED; - break; - } - - /* Disable CPU counter sets */ - lcctl(0); - debug_sprintf_event(cf_dbg, 5, "%s flags %#x flags %#x state %#llx\n", - __func__, *(int *)flags, cpuhw->flags, - cpuhw->state); -} - -/* Initialize the CPU-measurement counter facility */ -static int __kernel_cpumcf_begin(void) -{ - on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_INIT, 1); - irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); - - return 0; -} - -/* Release the CPU-measurement counter facility */ -static void __kernel_cpumcf_end(void) -{ - on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_RELEASE, 1); - irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); -} - -/* Number of perf events counting hardware events */ -static atomic_t num_events = ATOMIC_INIT(0); -/* Used to avoid races in calling reserve/release_cpumf_hardware */ -static DEFINE_MUTEX(pmc_reserve_mutex); - /* Release the PMU if event is the last perf event */ static void hw_perf_event_destroy(struct perf_event *event) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_dec_return(&num_events) == 0) - __kernel_cpumcf_end(); - mutex_unlock(&pmc_reserve_mutex); + cpum_cf_free(event->cpu); } /* CPUMF <-> perf event mappings for kernel+userspace (basic set) */ @@ -562,14 +727,6 @@ static const int cpumf_generic_events_user[] = { [PERF_COUNT_HW_BUS_CYCLES] = -1, }; -static void cpumf_hw_inuse(void) -{ - mutex_lock(&pmc_reserve_mutex); - if (atomic_inc_return(&num_events) == 1) - __kernel_cpumcf_begin(); - mutex_unlock(&pmc_reserve_mutex); -} - static int is_userspace_event(u64 ev) { return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || @@ -653,7 +810,8 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) } /* Initialize for using the CPU-measurement counter facility */ - cpumf_hw_inuse(); + if (cpum_cf_alloc(event->cpu)) + return -ENOMEM; event->destroy = hw_perf_event_destroy; /* @@ -756,7 +914,7 @@ static void cpumf_pmu_read(struct perf_event *event) static void cpumf_pmu_start(struct perf_event *event, int flags) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct hw_perf_event *hwc = &event->hw; int i; @@ -830,7 +988,7 @@ static int cfdiag_push_sample(struct perf_event *event, static void cpumf_pmu_stop(struct perf_event *event, int flags) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct hw_perf_event *hwc = &event->hw; int i; @@ -857,8 +1015,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags) false); if (cfdiag_diffctr(cpuhw, event->hw.config_base)) cfdiag_push_sample(event, cpuhw); - } else if (cpuhw->flags & PMU_F_RESERVED) { - /* Only update when PMU not hotplugged off */ + } else { hw_perf_event_update(event); } hwc->state |= PERF_HES_UPTODATE; @@ -867,7 +1024,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags) static int cpumf_pmu_add(struct perf_event *event, int flags) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); ctr_set_enable(&cpuhw->state, event->hw.config_base); event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; @@ -880,7 +1037,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags) static void cpumf_pmu_del(struct perf_event *event, int flags) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); int i; cpumf_pmu_stop(event, PERF_EF_UPDATE); @@ -912,29 +1069,83 @@ static struct pmu cpumf_pmu = { .read = cpumf_pmu_read, }; -static int cpum_cf_setup(unsigned int cpu, unsigned long flags) -{ - local_irq_disable(); - cpum_cf_setup_cpu((void *)flags); - local_irq_enable(); - return 0; -} +static struct cfset_session { /* CPUs and counter set bit mask */ + struct list_head head; /* Head of list of active processes */ +} cfset_session = { + .head = LIST_HEAD_INIT(cfset_session.head) +}; + +static refcount_t cfset_opencnt = REFCOUNT_INIT(0); /* Access count */ +/* + * Synchronize access to device /dev/hwc. This mutex protects against + * concurrent access to functions cfset_open() and cfset_release(). + * Same for CPU hotplug add and remove events triggering + * cpum_cf_online_cpu() and cpum_cf_offline_cpu(). + * It also serializes concurrent device ioctl access from multiple + * processes accessing /dev/hwc. + * + * The mutex protects concurrent access to the /dev/hwctr session management + * struct cfset_session and reference counting variable cfset_opencnt. + */ +static DEFINE_MUTEX(cfset_ctrset_mutex); +/* + * CPU hotplug handles only /dev/hwctr device. + * For perf_event_open() the CPU hotplug handling is done on kernel common + * code: + * - CPU add: Nothing is done since a file descriptor can not be created + * and returned to the user. + * - CPU delete: Handled by common code via pmu_disable(), pmu_stop() and + * pmu_delete(). The event itself is removed when the file descriptor is + * closed. + */ static int cfset_online_cpu(unsigned int cpu); + static int cpum_cf_online_cpu(unsigned int cpu) { - debug_sprintf_event(cf_dbg, 4, "%s cpu %d in_irq %ld\n", __func__, - cpu, in_interrupt()); - cpum_cf_setup(cpu, PMC_INIT); - return cfset_online_cpu(cpu); + int rc = 0; + + debug_sprintf_event(cf_dbg, 4, "%s cpu %d root.refcnt %d " + "opencnt %d\n", __func__, cpu, + refcount_read(&cpu_cf_root.refcnt), + refcount_read(&cfset_opencnt)); + /* + * Ignore notification for perf_event_open(). + * Handle only /dev/hwctr device sessions. + */ + mutex_lock(&cfset_ctrset_mutex); + if (refcount_read(&cfset_opencnt)) { + rc = cpum_cf_alloc_cpu(cpu); + if (!rc) + cfset_online_cpu(cpu); + } + mutex_unlock(&cfset_ctrset_mutex); + return rc; } static int cfset_offline_cpu(unsigned int cpu); + static int cpum_cf_offline_cpu(unsigned int cpu) { - debug_sprintf_event(cf_dbg, 4, "%s cpu %d\n", __func__, cpu); - cfset_offline_cpu(cpu); - return cpum_cf_setup(cpu, PMC_RELEASE); + debug_sprintf_event(cf_dbg, 4, "%s cpu %d root.refcnt %d opencnt %d\n", + __func__, cpu, refcount_read(&cpu_cf_root.refcnt), + refcount_read(&cfset_opencnt)); + /* + * During task exit processing of grouped perf events triggered by CPU + * hotplug processing, pmu_disable() is called as part of perf context + * removal process. Therefore do not trigger event removal now for + * perf_event_open() created events. Perf common code triggers event + * destruction when the event file descriptor is closed. + * + * Handle only /dev/hwctr device sessions. + */ + mutex_lock(&cfset_ctrset_mutex); + if (refcount_read(&cfset_opencnt)) { + cfset_offline_cpu(cpu); + cpum_cf_free_cpu(cpu); + } + mutex_unlock(&cfset_ctrset_mutex); + return 0; } /* Return true if store counter set multiple instruction is available */ @@ -953,13 +1164,13 @@ static void cpumf_measurement_alert(struct ext_code ext_code, return; inc_irq_stat(IRQEXT_CMC); - cpuhw = this_cpu_ptr(&cpu_cf_events); /* * Measurement alerts are shared and might happen when the PMU * is not reserved. Ignore these alerts in this case. */ - if (!(cpuhw->flags & PMU_F_RESERVED)) + cpuhw = this_cpu_cfhw(); + if (!cpuhw) return; /* counter authorization change alert */ @@ -1039,19 +1250,11 @@ out1: * counter set via normal file operations. */ -static atomic_t cfset_opencnt = ATOMIC_INIT(0); /* Access count */ -static DEFINE_MUTEX(cfset_ctrset_mutex);/* Synchronize access to hardware */ struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */ unsigned int sets; /* Counter set bit mask */ atomic_t cpus_ack; /* # CPUs successfully executed func */ }; -static struct cfset_session { /* CPUs and counter set bit mask */ - struct list_head head; /* Head of list of active processes */ -} cfset_session = { - .head = LIST_HEAD_INIT(cfset_session.head) -}; - struct cfset_request { /* CPUs and counter set bit mask */ unsigned long ctrset; /* Bit mask of counter set to read */ cpumask_t mask; /* CPU mask to read from */ @@ -1113,11 +1316,11 @@ static void cfset_session_add(struct cfset_request *p) /* Stop all counter sets via ioctl interface */ static void cfset_ioctl_off(void *parm) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct cfset_call_on_cpu_parm *p = parm; int rc; - /* Check if any counter set used by /dev/hwc */ + /* Check if any counter set used by /dev/hwctr */ for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) if ((p->sets & cpumf_ctr_ctl[rc])) { if (!atomic_dec_return(&cpuhw->ctr_set[rc])) { @@ -1141,7 +1344,7 @@ static void cfset_ioctl_off(void *parm) /* Start counter sets on particular CPU */ static void cfset_ioctl_on(void *parm) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct cfset_call_on_cpu_parm *p = parm; int rc; @@ -1163,7 +1366,7 @@ static void cfset_ioctl_on(void *parm) static void cfset_release_cpu(void *p) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); int rc; debug_sprintf_event(cf_dbg, 4, "%s state %#llx dev_state %#llx\n", @@ -1203,27 +1406,41 @@ static int cfset_release(struct inode *inode, struct file *file) kfree(file->private_data); file->private_data = NULL; } - if (!atomic_dec_return(&cfset_opencnt)) + if (refcount_dec_and_test(&cfset_opencnt)) { /* Last close */ on_each_cpu(cfset_release_cpu, NULL, 1); + cpum_cf_free(-1); + } mutex_unlock(&cfset_ctrset_mutex); - - hw_perf_event_destroy(NULL); return 0; } +/* + * Open via /dev/hwctr device. Allocate all per CPU resources on the first + * open of the device. The last close releases all per CPU resources. + * Parallel perf_event_open system calls also use per CPU resources. + * These invocations are handled via reference counting on the per CPU data + * structures. + */ static int cfset_open(struct inode *inode, struct file *file) { - if (!capable(CAP_SYS_ADMIN)) + int rc = 0; + + if (!perfmon_capable()) return -EPERM; + file->private_data = NULL; + mutex_lock(&cfset_ctrset_mutex); - if (atomic_inc_return(&cfset_opencnt) == 1) - cfset_session_init(); + if (!refcount_inc_not_zero(&cfset_opencnt)) { /* First open */ + rc = cpum_cf_alloc(-1); + if (!rc) { + cfset_session_init(); + refcount_set(&cfset_opencnt, 1); + } + } mutex_unlock(&cfset_ctrset_mutex); - cpumf_hw_inuse(); - file->private_data = NULL; /* nonseekable_open() never fails */ - return nonseekable_open(inode, file); + return rc ?: nonseekable_open(inode, file); } static int cfset_all_start(struct cfset_request *req) @@ -1280,7 +1497,7 @@ static int cfset_all_copy(unsigned long arg, cpumask_t *mask) ctrset_read = (struct s390_ctrset_read __user *)arg; uptr = ctrset_read->data; for_each_cpu(cpu, mask) { - struct cpu_cf_events *cpuhw = per_cpu_ptr(&cpu_cf_events, cpu); + struct cpu_cf_events *cpuhw = get_cpu_cfhw(cpu); struct s390_ctrset_cpudata __user *ctrset_cpudata; ctrset_cpudata = uptr; @@ -1324,7 +1541,7 @@ static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset, /* Read all counter sets. */ static void cfset_cpu_read(void *parm) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct cfset_call_on_cpu_parm *p = parm; int set, set_size; size_t space; @@ -1348,9 +1565,9 @@ static void cfset_cpu_read(void *parm) cpuhw->used += space; cpuhw->sets += 1; } + debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__, + cpuhw->sets, cpuhw->used); } - debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__, - cpuhw->sets, cpuhw->used); } static int cfset_all_read(unsigned long arg, struct cfset_request *req) @@ -1502,6 +1719,7 @@ static struct miscdevice cfset_dev = { .name = S390_HWCTR_DEVICE, .minor = MISC_DYNAMIC_MINOR, .fops = &cfset_fops, + .mode = 0666, }; /* Hotplug add of a CPU. Scan through all active processes and add @@ -1512,7 +1730,6 @@ static int cfset_online_cpu(unsigned int cpu) struct cfset_call_on_cpu_parm p; struct cfset_request *rp; - mutex_lock(&cfset_ctrset_mutex); if (!list_empty(&cfset_session.head)) { list_for_each_entry(rp, &cfset_session.head, node) { p.sets = rp->ctrset; @@ -1520,19 +1737,18 @@ static int cfset_online_cpu(unsigned int cpu) cpumask_set_cpu(cpu, &rp->mask); } } - mutex_unlock(&cfset_ctrset_mutex); return 0; } /* Hotplug remove of a CPU. Scan through all active processes and clear * that CPU from the list of CPUs supplied with ioctl(..., START, ...). + * Adjust reference counts. */ static int cfset_offline_cpu(unsigned int cpu) { struct cfset_call_on_cpu_parm p; struct cfset_request *rp; - mutex_lock(&cfset_ctrset_mutex); if (!list_empty(&cfset_session.head)) { list_for_each_entry(rp, &cfset_session.head, node) { p.sets = rp->ctrset; @@ -1540,7 +1756,6 @@ static int cfset_offline_cpu(unsigned int cpu) cpumask_clear_cpu(cpu, &rp->mask); } } - mutex_unlock(&cfset_ctrset_mutex); return 0; } @@ -1618,7 +1833,8 @@ static int cfdiag_event_init(struct perf_event *event) } /* Initialize for using the CPU-measurement counter facility */ - cpumf_hw_inuse(); + if (cpum_cf_alloc(event->cpu)) + return -ENOMEM; event->destroy = hw_perf_event_destroy; err = cfdiag_event_init2(event); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 7ef72f5ff52e..8ecfbce4ac92 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1271,16 +1271,6 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, } } -static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t new) -{ - asm volatile( - " cdsg %[old],%[new],%[ptr]\n" - : [old] "+d" (old), [ptr] "+QS" (*ptr) - : [new] "d" (new) - : "memory", "cc"); - return old; -} - /* hw_perf_event_update() - Process sampling buffer * @event: The perf event * @flush_all: Flag to also flush partially filled sample-data-blocks @@ -1352,7 +1342,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) new.f = 0; new.a = 1; new.overflow = 0; - prev.val = __cdsg(&te->header.val, old.val, new.val); + prev.val = cmpxchg128(&te->header.val, old.val, new.val); } while (prev.val != old.val); /* Advance to next sample-data-block */ @@ -1562,7 +1552,7 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, } new.a = 1; new.overflow = 0; - prev.val = __cdsg(&te->header.val, old.val, new.val); + prev.val = cmpxchg128(&te->header.val, old.val, new.val); } while (prev.val != old.val); return true; } @@ -1636,7 +1626,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, new.a = 1; else new.a = 0; - prev.val = __cdsg(&te->header.val, old.val, new.val); + prev.val = cmpxchg128(&te->header.val, old.val, new.val); } while (prev.val != old.val); *overflow += orig_overflow; } diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index a7b339c4fd7c..fe7d1774ded1 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -36,7 +36,7 @@ struct paicrypt_map { unsigned long *page; /* Page for CPU to store counters */ struct pai_userdata *save; /* Page to store no-zero counters */ unsigned int active_events; /* # of PAI crypto users */ - unsigned int refcnt; /* Reference count mapped buffers */ + refcount_t refcnt; /* Reference count mapped buffers */ enum paievt_mode mode; /* Type of event */ struct perf_event *event; /* Perf event for sampling */ }; @@ -57,10 +57,11 @@ static void paicrypt_event_destroy(struct perf_event *event) static_branch_dec(&pai_key); mutex_lock(&pai_reserve_mutex); debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d" - " mode %d refcnt %d\n", __func__, + " mode %d refcnt %u\n", __func__, event->attr.config, event->cpu, - cpump->active_events, cpump->mode, cpump->refcnt); - if (!--cpump->refcnt) { + cpump->active_events, cpump->mode, + refcount_read(&cpump->refcnt)); + if (refcount_dec_and_test(&cpump->refcnt)) { debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n", __func__, (unsigned long)cpump->page, cpump->save); @@ -149,8 +150,10 @@ static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump) /* Allocate memory for counter page and counter extraction. * Only the first counting event has to allocate a page. */ - if (cpump->page) + if (cpump->page) { + refcount_inc(&cpump->refcnt); goto unlock; + } rc = -ENOMEM; cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL); @@ -164,18 +167,18 @@ static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump) goto unlock; } rc = 0; + refcount_set(&cpump->refcnt, 1); unlock: /* If rc is non-zero, do not set mode and reference count */ if (!rc) { - cpump->refcnt++; cpump->mode = a->sample_period ? PAI_MODE_SAMPLING : PAI_MODE_COUNTING; } debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx users %d" - " mode %d refcnt %d page %#lx save %p rc %d\n", + " mode %d refcnt %u page %#lx save %p rc %d\n", __func__, a->sample_period, cpump->active_events, - cpump->mode, cpump->refcnt, + cpump->mode, refcount_read(&cpump->refcnt), (unsigned long)cpump->page, cpump->save, rc); mutex_unlock(&pai_reserve_mutex); return rc; diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index fcea307d7529..3b4f384f77f7 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -50,7 +50,7 @@ struct paiext_map { struct pai_userdata *save; /* Area to store non-zero counters */ enum paievt_mode mode; /* Type of event */ unsigned int active_events; /* # of PAI Extension users */ - unsigned int refcnt; + refcount_t refcnt; struct perf_event *event; /* Perf event for sampling */ struct paiext_cb *paiext_cb; /* PAI extension control block area */ }; @@ -60,14 +60,14 @@ struct paiext_mapptr { }; static struct paiext_root { /* Anchor to per CPU data */ - int refcnt; /* Overall active events */ + refcount_t refcnt; /* Overall active events */ struct paiext_mapptr __percpu *mapptr; } paiext_root; /* Free per CPU data when the last event is removed. */ static void paiext_root_free(void) { - if (!--paiext_root.refcnt) { + if (refcount_dec_and_test(&paiext_root.refcnt)) { free_percpu(paiext_root.mapptr); paiext_root.mapptr = NULL; } @@ -80,7 +80,7 @@ static void paiext_root_free(void) */ static int paiext_root_alloc(void) { - if (++paiext_root.refcnt == 1) { + if (!refcount_inc_not_zero(&paiext_root.refcnt)) { /* The memory is already zeroed. */ paiext_root.mapptr = alloc_percpu(struct paiext_mapptr); if (!paiext_root.mapptr) { @@ -91,6 +91,7 @@ static int paiext_root_alloc(void) */ return -ENOMEM; } + refcount_set(&paiext_root.refcnt, 1); } return 0; } @@ -122,7 +123,7 @@ static void paiext_event_destroy(struct perf_event *event) mutex_lock(&paiext_reserve_mutex); cpump->event = NULL; - if (!--cpump->refcnt) /* Last reference gone */ + if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */ paiext_free(mp); paiext_root_free(); mutex_unlock(&paiext_reserve_mutex); @@ -163,7 +164,7 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) rc = -ENOMEM; cpump = kzalloc(sizeof(*cpump), GFP_KERNEL); if (!cpump) - goto unlock; + goto undo; /* Allocate memory for counter area and counter extraction. * These are @@ -183,8 +184,9 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) GFP_KERNEL); if (!cpump->save || !cpump->area || !cpump->paiext_cb) { paiext_free(mp); - goto unlock; + goto undo; } + refcount_set(&cpump->refcnt, 1); cpump->mode = a->sample_period ? PAI_MODE_SAMPLING : PAI_MODE_COUNTING; } else { @@ -195,15 +197,15 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) if (cpump->mode == PAI_MODE_SAMPLING || (cpump->mode == PAI_MODE_COUNTING && a->sample_period)) { rc = -EBUSY; - goto unlock; + goto undo; } + refcount_inc(&cpump->refcnt); } rc = 0; cpump->event = event; - ++cpump->refcnt; -unlock: +undo: if (rc) { /* Error in allocation of event, decrement anchor. Since * the event in not created, its destroy() function is never @@ -211,6 +213,7 @@ unlock: */ paiext_root_free(); } +unlock: mutex_unlock(&paiext_reserve_mutex); /* If rc is non-zero, no increment of counter/sampler was done. */ return rc; diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 6b7b6d5e3632..276278199c44 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -102,6 +102,11 @@ void __init time_early_init(void) ((long) qui.old_leap * 4096000000L); } +unsigned long long noinstr sched_clock_noinstr(void) +{ + return tod_to_ns(__get_tod_clock_monotonic()); +} + /* * Scheduler clock - returns current time in nanosec units. */ diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 9fd19530c9a5..68adf1de8888 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -95,7 +95,7 @@ out: static void cpu_thread_map(cpumask_t *dst, unsigned int cpu) { static cpumask_t mask; - int i; + unsigned int max_cpu; cpumask_clear(&mask); if (!cpumask_test_cpu(cpu, &cpu_setup_mask)) @@ -104,9 +104,10 @@ static void cpu_thread_map(cpumask_t *dst, unsigned int cpu) if (topology_mode != TOPOLOGY_MODE_HW) goto out; cpu -= cpu % (smp_cpu_mtid + 1); - for (i = 0; i <= smp_cpu_mtid; i++) { - if (cpumask_test_cpu(cpu + i, &cpu_setup_mask)) - cpumask_set_cpu(cpu + i, &mask); + max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1); + for (; cpu <= max_cpu; cpu++) { + if (cpumask_test_cpu(cpu, &cpu_setup_mask)) + cpumask_set_cpu(cpu, &mask); } out: cpumask_copy(dst, &mask); @@ -123,25 +124,26 @@ static void add_cpus_to_mask(struct topology_core *tl_core, unsigned int core; for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) { - unsigned int rcore; - int lcpu, i; + unsigned int max_cpu, rcore; + int cpu; rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin; - lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); - if (lcpu < 0) + cpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); + if (cpu < 0) continue; - for (i = 0; i <= smp_cpu_mtid; i++) { - topo = &cpu_topology[lcpu + i]; + max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1); + for (; cpu <= max_cpu; cpu++) { + topo = &cpu_topology[cpu]; topo->drawer_id = drawer->id; topo->book_id = book->id; topo->socket_id = socket->id; topo->core_id = rcore; - topo->thread_id = lcpu + i; + topo->thread_id = cpu; topo->dedicated = tl_core->d; - cpumask_set_cpu(lcpu + i, &drawer->mask); - cpumask_set_cpu(lcpu + i, &book->mask); - cpumask_set_cpu(lcpu + i, &socket->mask); - smp_cpu_set_polarization(lcpu + i, tl_core->pp); + cpumask_set_cpu(cpu, &drawer->mask); + cpumask_set_cpu(cpu, &book->mask); + cpumask_set_cpu(cpu, &socket->mask); + smp_cpu_set_polarization(cpu, tl_core->pp); } } } diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 580d2e3265cb..7c50eca85ca4 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -3,7 +3,7 @@ # Makefile for s390-specific library files.. # -lib-y += delay.o string.o uaccess.o find.o spinlock.o +lib-y += delay.o string.o uaccess.o find.o spinlock.o tishift.o obj-y += mem.o xor.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o diff --git a/arch/s390/lib/tishift.S b/arch/s390/lib/tishift.S new file mode 100644 index 000000000000..de33cf02cfd2 --- /dev/null +++ b/arch/s390/lib/tishift.S @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/linkage.h> +#include <asm/nospec-insn.h> +#include <asm/export.h> + + .section .noinstr.text, "ax" + + GEN_BR_THUNK %r14 + +SYM_FUNC_START(__ashlti3) + lmg %r0,%r1,0(%r3) + cije %r4,0,1f + lhi %r3,64 + sr %r3,%r4 + jnh 0f + srlg %r3,%r1,0(%r3) + sllg %r0,%r0,0(%r4) + sllg %r1,%r1,0(%r4) + ogr %r0,%r3 + j 1f +0: sllg %r0,%r1,-64(%r4) + lghi %r1,0 +1: stmg %r0,%r1,0(%r2) + BR_EX %r14 +SYM_FUNC_END(__ashlti3) +EXPORT_SYMBOL(__ashlti3) + +SYM_FUNC_START(__ashrti3) + lmg %r0,%r1,0(%r3) + cije %r4,0,1f + lhi %r3,64 + sr %r3,%r4 + jnh 0f + sllg %r3,%r0,0(%r3) + srlg %r1,%r1,0(%r4) + srag %r0,%r0,0(%r4) + ogr %r1,%r3 + j 1f +0: srag %r1,%r0,-64(%r4) + srag %r0,%r0,63 +1: stmg %r0,%r1,0(%r2) + BR_EX %r14 +SYM_FUNC_END(__ashrti3) +EXPORT_SYMBOL(__ashrti3) + +SYM_FUNC_START(__lshrti3) + lmg %r0,%r1,0(%r3) + cije %r4,0,1f + lhi %r3,64 + sr %r3,%r4 + jnh 0f + sllg %r3,%r0,0(%r3) + srlg %r1,%r1,0(%r4) + srlg %r0,%r0,0(%r4) + ogr %r1,%r3 + j 1f +0: srlg %r1,%r0,-64(%r4) + lghi %r0,0 +1: stmg %r0,%r1,0(%r2) + BR_EX %r14 +SYM_FUNC_END(__lshrti3) +EXPORT_SYMBOL(__lshrti3) diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 5ba3bd8a7b12..ca5a418c58a8 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -4,6 +4,7 @@ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> */ #include <linux/hugetlb.h> +#include <linux/proc_fs.h> #include <linux/vmalloc.h> #include <linux/mm.h> #include <asm/cacheflush.h> diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 5b22c6e24528..b9dcb4ae6c59 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -667,7 +667,15 @@ static void __init memblock_region_swap(void *a, void *b, int size) #ifdef CONFIG_KASAN #define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) + +static inline int set_memory_kasan(unsigned long start, unsigned long end) +{ + start = PAGE_ALIGN_DOWN(__sha(start)); + end = PAGE_ALIGN(__sha(end)); + return set_memory_rwnx(start, (end - start) >> PAGE_SHIFT); +} #endif + /* * map whole physical memory to virtual memory (identity mapping) * we reserve enough space in the vmalloc area for vmemmap to hotplug @@ -737,10 +745,8 @@ void __init vmem_map_init(void) } #ifdef CONFIG_KASAN - for_each_mem_range(i, &base, &end) { - set_memory_rwnx(__sha(base), - (__sha(end) - __sha(base)) >> PAGE_SHIFT); - } + for_each_mem_range(i, &base, &end) + set_memory_kasan(base, end); #endif set_memory_rox((unsigned long)_stext, (unsigned long)(_etext - _stext) >> PAGE_SHIFT); diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index cf14740abd1c..4e930f566878 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -26,6 +26,7 @@ KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding KBUILD_CFLAGS += -Os -m64 -msoft-float -fno-common KBUILD_CFLAGS += -fno-stack-protector +KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING KBUILD_CFLAGS += $(CLANG_FLAGS) KBUILD_CFLAGS += $(call cc-option,-fno-PIE) KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS)) diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 9652d367fc37..e339745f62a1 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -6,6 +6,7 @@ config SUPERH select ARCH_ENABLE_MEMORY_HOTREMOVE if SPARSEMEM && MMU select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A) select ARCH_HAS_BINFMT_FLAT if !MMU + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/sh/include/asm/atomic-grb.h b/arch/sh/include/asm/atomic-grb.h index 059791fd394f..cf1c10f15528 100644 --- a/arch/sh/include/asm/atomic-grb.h +++ b/arch/sh/include/asm/atomic-grb.h @@ -71,6 +71,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + #undef ATOMIC_OPS #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) @@ -78,6 +83,10 @@ ATOMIC_OPS(and) ATOMIC_OPS(or) ATOMIC_OPS(xor) +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN diff --git a/arch/sh/include/asm/atomic-irq.h b/arch/sh/include/asm/atomic-irq.h index 7665de9d00d0..b4090cc35493 100644 --- a/arch/sh/include/asm/atomic-irq.h +++ b/arch/sh/include/asm/atomic-irq.h @@ -55,6 +55,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ ATOMIC_OPS(add, +=) ATOMIC_OPS(sub, -=) +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + #undef ATOMIC_OPS #define ATOMIC_OPS(op, c_op) \ ATOMIC_OP(op, c_op) \ @@ -64,6 +69,10 @@ ATOMIC_OPS(and, &=) ATOMIC_OPS(or, |=) ATOMIC_OPS(xor, ^=) +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h index b63dcfbfa14e..9ef1fb1dd12e 100644 --- a/arch/sh/include/asm/atomic-llsc.h +++ b/arch/sh/include/asm/atomic-llsc.h @@ -73,6 +73,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + #undef ATOMIC_OPS #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) @@ -80,6 +85,10 @@ ATOMIC_OPS(and) ATOMIC_OPS(or) ATOMIC_OPS(xor) +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h index 528bfeda78f5..7a18cb2a1c1a 100644 --- a/arch/sh/include/asm/atomic.h +++ b/arch/sh/include/asm/atomic.h @@ -30,9 +30,6 @@ #include <asm/atomic-irq.h> #endif -#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new)) -#define arch_atomic_cmpxchg(v, o, n) (arch_cmpxchg(&((v)->counter), (o), (n))) - #endif /* CONFIG_CPU_J2 */ #endif /* __ASM_SH_ATOMIC_H */ diff --git a/arch/sh/include/asm/bugs.h b/arch/sh/include/asm/bugs.h deleted file mode 100644 index fe52abb69cea..000000000000 --- a/arch/sh/include/asm/bugs.h +++ /dev/null @@ -1,74 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_SH_BUGS_H -#define __ASM_SH_BUGS_H - -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Needs: - * void check_bugs(void); - */ - -/* - * I don't know of any Super-H bugs yet. - */ - -#include <asm/processor.h> - -extern void select_idle_routine(void); - -static void __init check_bugs(void) -{ - extern unsigned long loops_per_jiffy; - char *p = &init_utsname()->machine[2]; /* "sh" */ - - select_idle_routine(); - - current_cpu_data.loops_per_jiffy = loops_per_jiffy; - - switch (current_cpu_data.family) { - case CPU_FAMILY_SH2: - *p++ = '2'; - break; - case CPU_FAMILY_SH2A: - *p++ = '2'; - *p++ = 'a'; - break; - case CPU_FAMILY_SH3: - *p++ = '3'; - break; - case CPU_FAMILY_SH4: - *p++ = '4'; - break; - case CPU_FAMILY_SH4A: - *p++ = '4'; - *p++ = 'a'; - break; - case CPU_FAMILY_SH4AL_DSP: - *p++ = '4'; - *p++ = 'a'; - *p++ = 'l'; - *p++ = '-'; - *p++ = 'd'; - *p++ = 's'; - *p++ = 'p'; - break; - case CPU_FAMILY_UNKNOWN: - /* - * Specifically use CPU_FAMILY_UNKNOWN rather than - * default:, so we're able to have the compiler whine - * about unhandled enumerations. - */ - break; - } - - printk("CPU: %s\n", get_cpu_subtype(¤t_cpu_data)); - -#ifndef __LITTLE_ENDIAN__ - /* 'eb' means 'Endian Big' */ - *p++ = 'e'; - *p++ = 'b'; -#endif - *p = '\0'; -} -#endif /* __ASM_SH_BUGS_H */ diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h index 85a6c1c3c16e..73fba7c922f9 100644 --- a/arch/sh/include/asm/processor.h +++ b/arch/sh/include/asm/processor.h @@ -166,6 +166,8 @@ extern unsigned int instruction_size(unsigned int insn); #define instruction_size(insn) (2) #endif +void select_idle_routine(void); + #endif /* __ASSEMBLY__ */ #include <asm/processor_32.h> diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index d662503b0665..045d93f151fd 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -15,6 +15,7 @@ #include <linux/irqflags.h> #include <linux/smp.h> #include <linux/atomic.h> +#include <asm/processor.h> #include <asm/smp.h> #include <asm/bl_bit.h> diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index e4f0f9a1d355..b3da2757faaf 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -43,6 +43,7 @@ #include <asm/smp.h> #include <asm/mmu_context.h> #include <asm/mmzone.h> +#include <asm/processor.h> #include <asm/sparsemem.h> #include <asm/platform_early.h> @@ -354,3 +355,57 @@ int test_mode_pin(int pin) { return sh_mv.mv_mode_pins() & pin; } + +void __init arch_cpu_finalize_init(void) +{ + char *p = &init_utsname()->machine[2]; /* "sh" */ + + select_idle_routine(); + + current_cpu_data.loops_per_jiffy = loops_per_jiffy; + + switch (current_cpu_data.family) { + case CPU_FAMILY_SH2: + *p++ = '2'; + break; + case CPU_FAMILY_SH2A: + *p++ = '2'; + *p++ = 'a'; + break; + case CPU_FAMILY_SH3: + *p++ = '3'; + break; + case CPU_FAMILY_SH4: + *p++ = '4'; + break; + case CPU_FAMILY_SH4A: + *p++ = '4'; + *p++ = 'a'; + break; + case CPU_FAMILY_SH4AL_DSP: + *p++ = '4'; + *p++ = 'a'; + *p++ = 'l'; + *p++ = '-'; + *p++ = 'd'; + *p++ = 's'; + *p++ = 'p'; + break; + case CPU_FAMILY_UNKNOWN: + /* + * Specifically use CPU_FAMILY_UNKNOWN rather than + * default:, so we're able to have the compiler whine + * about unhandled enumerations. + */ + break; + } + + pr_info("CPU: %s\n", get_cpu_subtype(¤t_cpu_data)); + +#ifndef __LITTLE_ENDIAN__ + /* 'eb' means 'Endian Big' */ + *p++ = 'e'; + *p++ = 'b'; +#endif + *p = '\0'; +} diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 8535e19062f6..36fd488ccbfa 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -52,6 +52,7 @@ config SPARC config SPARC32 def_bool !64BIT select ARCH_32BIT_OFF_T + select ARCH_HAS_CPU_FINALIZE_INIT if !SMP select ARCH_HAS_SYNC_DMA_FOR_CPU select CLZ_TAB select DMA_DIRECT_REMAP diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index d775daa83d12..60ce2fe57fcd 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -19,17 +19,31 @@ #include <asm-generic/atomic64.h> int arch_atomic_add_return(int, atomic_t *); +#define arch_atomic_add_return arch_atomic_add_return + int arch_atomic_fetch_add(int, atomic_t *); +#define arch_atomic_fetch_add arch_atomic_fetch_add + int arch_atomic_fetch_and(int, atomic_t *); +#define arch_atomic_fetch_and arch_atomic_fetch_and + int arch_atomic_fetch_or(int, atomic_t *); +#define arch_atomic_fetch_or arch_atomic_fetch_or + int arch_atomic_fetch_xor(int, atomic_t *); +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + int arch_atomic_cmpxchg(atomic_t *, int, int); +#define arch_atomic_cmpxchg arch_atomic_cmpxchg + int arch_atomic_xchg(atomic_t *, int); -int arch_atomic_fetch_add_unless(atomic_t *, int, int); -void arch_atomic_set(atomic_t *, int); +#define arch_atomic_xchg arch_atomic_xchg +int arch_atomic_fetch_add_unless(atomic_t *, int, int); #define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless +void arch_atomic_set(atomic_t *, int); + #define arch_atomic_set_release(v, i) arch_atomic_set((v), (i)) #define arch_atomic_read(v) READ_ONCE((v)->counter) diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index 077891686715..a5e9c37605a7 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -37,6 +37,16 @@ s64 arch_atomic64_fetch_##op(s64, atomic64_t *); ATOMIC_OPS(add) ATOMIC_OPS(sub) +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + +#define arch_atomic64_add_return arch_atomic64_add_return +#define arch_atomic64_sub_return arch_atomic64_sub_return +#define arch_atomic64_fetch_add arch_atomic64_fetch_add +#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub + #undef ATOMIC_OPS #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) @@ -44,22 +54,19 @@ ATOMIC_OPS(and) ATOMIC_OPS(or) ATOMIC_OPS(xor) +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + +#define arch_atomic64_fetch_and arch_atomic64_fetch_and +#define arch_atomic64_fetch_or arch_atomic64_fetch_or +#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor + #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -#define arch_atomic_cmpxchg(v, o, n) (arch_cmpxchg(&((v)->counter), (o), (n))) - -static inline int arch_atomic_xchg(atomic_t *v, int new) -{ - return arch_xchg(&v->counter, new); -} - -#define arch_atomic64_cmpxchg(v, o, n) \ - ((__typeof__((v)->counter))arch_cmpxchg(&((v)->counter), (o), (n))) -#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new)) - s64 arch_atomic64_dec_if_positive(atomic64_t *v); #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive diff --git a/arch/sparc/include/asm/bugs.h b/arch/sparc/include/asm/bugs.h deleted file mode 100644 index 02fa369b9c21..000000000000 --- a/arch/sparc/include/asm/bugs.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* include/asm/bugs.h: Sparc probes for various bugs. - * - * Copyright (C) 1996, 2007 David S. Miller (davem@davemloft.net) - */ - -#ifdef CONFIG_SPARC32 -#include <asm/cpudata.h> -#endif - -extern unsigned long loops_per_jiffy; - -static void __init check_bugs(void) -{ -#if defined(CONFIG_SPARC32) && !defined(CONFIG_SMP) - cpu_data(0).udelay_val = loops_per_jiffy; -#endif -} diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index ace0e9adfd77..1adf5c1c16b8 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -412,3 +412,10 @@ static int __init topology_init(void) } subsys_initcall(topology_init); + +#if defined(CONFIG_SPARC32) && !defined(CONFIG_SMP) +void __init arch_cpu_finalize_init(void) +{ + cpu_data(0).udelay_val = loops_per_jiffy; +} +#endif diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 541a9b18e343..b5e179360534 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -5,7 +5,7 @@ menu "UML-specific options" config UML bool default y - select ARCH_EPHEMERAL_INODES + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index dee6f66353b3..a461a950f051 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -16,7 +16,8 @@ mconsole-objs := mconsole_kern.o mconsole_user.o hostaudio-objs := hostaudio_kern.o ubd-objs := ubd_kern.o ubd_user.o port-objs := port_kern.o port_user.o -harddog-objs := harddog_kern.o harddog_user.o +harddog-objs := harddog_kern.o +harddog-builtin-$(CONFIG_UML_WATCHDOG) := harddog_user.o harddog_user_exp.o rtc-objs := rtc_kern.o rtc_user.o LDFLAGS_pcap.o = $(shell $(CC) $(KBUILD_CFLAGS) -print-file-name=libpcap.a) @@ -60,6 +61,7 @@ obj-$(CONFIG_PTY_CHAN) += pty.o obj-$(CONFIG_TTY_CHAN) += tty.o obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o obj-$(CONFIG_UML_WATCHDOG) += harddog.o +obj-y += $(harddog-builtin-y) $(harddog-builtin-m) obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o obj-$(CONFIG_UML_RANDOM) += random.o obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o diff --git a/arch/um/drivers/harddog.h b/arch/um/drivers/harddog.h new file mode 100644 index 000000000000..6d9ea60e7133 --- /dev/null +++ b/arch/um/drivers/harddog.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef UM_WATCHDOG_H +#define UM_WATCHDOG_H + +int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock); +void stop_watchdog(int in_fd, int out_fd); +int ping_watchdog(int fd); + +#endif /* UM_WATCHDOG_H */ diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c index e6d4f43deba8..60d1c6cab8a9 100644 --- a/arch/um/drivers/harddog_kern.c +++ b/arch/um/drivers/harddog_kern.c @@ -47,6 +47,7 @@ #include <linux/spinlock.h> #include <linux/uaccess.h> #include "mconsole.h" +#include "harddog.h" MODULE_LICENSE("GPL"); @@ -60,8 +61,6 @@ static int harddog_out_fd = -1; * Allow only one person to hold it open */ -extern int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock); - static int harddog_open(struct inode *inode, struct file *file) { int err = -EBUSY; @@ -92,8 +91,6 @@ err: return err; } -extern void stop_watchdog(int in_fd, int out_fd); - static int harddog_release(struct inode *inode, struct file *file) { /* @@ -112,8 +109,6 @@ static int harddog_release(struct inode *inode, struct file *file) return 0; } -extern int ping_watchdog(int fd); - static ssize_t harddog_write(struct file *file, const char __user *data, size_t len, loff_t *ppos) { diff --git a/arch/um/drivers/harddog_user.c b/arch/um/drivers/harddog_user.c index 070468d22e39..9ed89304975e 100644 --- a/arch/um/drivers/harddog_user.c +++ b/arch/um/drivers/harddog_user.c @@ -7,6 +7,7 @@ #include <unistd.h> #include <errno.h> #include <os.h> +#include "harddog.h" struct dog_data { int stdin_fd; diff --git a/arch/um/drivers/harddog_user_exp.c b/arch/um/drivers/harddog_user_exp.c new file mode 100644 index 000000000000..c74d4b815d14 --- /dev/null +++ b/arch/um/drivers/harddog_user_exp.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/export.h> +#include "harddog.h" + +#if IS_MODULE(CONFIG_UML_WATCHDOG) +EXPORT_SYMBOL(start_watchdog); +EXPORT_SYMBOL(stop_watchdog); +EXPORT_SYMBOL(ping_watchdog); +#endif diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index f4c1e6e97ad5..50206feac577 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -108,9 +108,9 @@ static inline void ubd_set_bit(__u64 bit, unsigned char *data) static DEFINE_MUTEX(ubd_lock); static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */ -static int ubd_open(struct block_device *bdev, fmode_t mode); -static void ubd_release(struct gendisk *disk, fmode_t mode); -static int ubd_ioctl(struct block_device *bdev, fmode_t mode, +static int ubd_open(struct gendisk *disk, blk_mode_t mode); +static void ubd_release(struct gendisk *disk); +static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg); static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo); @@ -1154,9 +1154,8 @@ static int __init ubd_driver_init(void){ device_initcall(ubd_driver_init); -static int ubd_open(struct block_device *bdev, fmode_t mode) +static int ubd_open(struct gendisk *disk, blk_mode_t mode) { - struct gendisk *disk = bdev->bd_disk; struct ubd *ubd_dev = disk->private_data; int err = 0; @@ -1171,19 +1170,12 @@ static int ubd_open(struct block_device *bdev, fmode_t mode) } ubd_dev->count++; set_disk_ro(disk, !ubd_dev->openflags.w); - - /* This should no more be needed. And it didn't work anyway to exclude - * read-write remounting of filesystems.*/ - /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){ - if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev); - err = -EROFS; - }*/ out: mutex_unlock(&ubd_mutex); return err; } -static void ubd_release(struct gendisk *disk, fmode_t mode) +static void ubd_release(struct gendisk *disk) { struct ubd *ubd_dev = disk->private_data; @@ -1397,7 +1389,7 @@ static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } -static int ubd_ioctl(struct block_device *bdev, fmode_t mode, +static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg) { struct ubd *ubd_dev = bdev->bd_disk->private_data; diff --git a/arch/um/include/asm/bugs.h b/arch/um/include/asm/bugs.h deleted file mode 100644 index 4473942a0839..000000000000 --- a/arch/um/include/asm/bugs.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __UM_BUGS_H -#define __UM_BUGS_H - -void check_bugs(void); - -#endif diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 0a23a98d4ca0..918fed7ad4d8 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -3,6 +3,7 @@ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) */ +#include <linux/cpu.h> #include <linux/delay.h> #include <linux/init.h> #include <linux/mm.h> @@ -430,7 +431,7 @@ void __init setup_arch(char **cmdline_p) } } -void __init check_bugs(void) +void __init arch_cpu_finalize_init(void) { arch_check_bugs(); os_check_bugs(); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 53bab123a8ee..d5c69145a73c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -71,6 +71,7 @@ config X86 select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE @@ -274,7 +275,9 @@ config X86 select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER select HAVE_GENERIC_VDSO + select HOTPLUG_PARALLEL if SMP && X86_64 select HOTPLUG_SMT if SMP + select HOTPLUG_SPLIT_STARTUP if SMP && X86_32 select IRQ_FORCED_THREADING select NEED_PER_CPU_EMBED_FIRST_CHUNK select NEED_PER_CPU_PAGE_FIRST_CHUNK @@ -291,7 +294,6 @@ config X86 select TRACE_IRQFLAGS_NMI_SUPPORT select USER_STACKTRACE_SUPPORT select HAVE_ARCH_KCSAN if X86_64 - select X86_FEATURE_NAMES if PROC_FS select PROC_PID_ARCH_STATUS if PROC_FS select HAVE_ARCH_NODE_DEV_GROUP if X86_SGX select FUNCTION_ALIGNMENT_16B if X86_64 || X86_ALIGNMENT_16 @@ -441,17 +443,6 @@ config SMP If you don't know what to do here, say N. -config X86_FEATURE_NAMES - bool "Processor feature human-readable names" if EMBEDDED - default y - help - This option compiles in a table of x86 feature bits and corresponding - names. This is required to support /proc/cpuinfo and a few kernel - messages. You can disable this to save space, at the expense of - making those few kernel messages show numeric feature bits instead. - - If in doubt, say Y. - config X86_X2APIC bool "Support x2apic" depends on X86_LOCAL_APIC && X86_64 && (IRQ_REMAP || HYPERVISOR_GUEST) @@ -884,9 +875,11 @@ config INTEL_TDX_GUEST bool "Intel TDX (Trust Domain Extensions) - Guest Support" depends on X86_64 && CPU_SUP_INTEL depends on X86_X2APIC + depends on EFI_STUB select ARCH_HAS_CC_PLATFORM select X86_MEM_ENCRYPT select X86_MCE + select UNACCEPTED_MEMORY help Support running as a guest under Intel TDX. Without this support, the guest kernel can not boot or run under TDX. @@ -1541,11 +1534,13 @@ config X86_MEM_ENCRYPT config AMD_MEM_ENCRYPT bool "AMD Secure Memory Encryption (SME) support" depends on X86_64 && CPU_SUP_AMD + depends on EFI_STUB select DMA_COHERENT_POOL select ARCH_USE_MEMREMAP_PROT select INSTRUCTION_DECODER select ARCH_HAS_CC_PLATFORM select X86_MEM_ENCRYPT + select UNACCEPTED_MEMORY help Say yes to enable support for the encryption of system memory. This requires an AMD processor that supports Secure Memory @@ -2305,49 +2300,6 @@ config HOTPLUG_CPU def_bool y depends on SMP -config BOOTPARAM_HOTPLUG_CPU0 - bool "Set default setting of cpu0_hotpluggable" - depends on HOTPLUG_CPU - help - Set whether default state of cpu0_hotpluggable is on or off. - - Say Y here to enable CPU0 hotplug by default. If this switch - is turned on, there is no need to give cpu0_hotplug kernel - parameter and the CPU0 hotplug feature is enabled by default. - - Please note: there are two known CPU0 dependencies if you want - to enable the CPU0 hotplug feature either by this switch or by - cpu0_hotplug kernel parameter. - - First, resume from hibernate or suspend always starts from CPU0. - So hibernate and suspend are prevented if CPU0 is offline. - - Second dependency is PIC interrupts always go to CPU0. CPU0 can not - offline if any interrupt can not migrate out of CPU0. There may - be other CPU0 dependencies. - - Please make sure the dependencies are under your control before - you enable this feature. - - Say N if you don't want to enable CPU0 hotplug feature by default. - You still can enable the CPU0 hotplug feature at boot by kernel - parameter cpu0_hotplug. - -config DEBUG_HOTPLUG_CPU0 - def_bool n - prompt "Debug CPU0 hotplug" - depends on HOTPLUG_CPU - help - Enabling this option offlines CPU0 (if CPU0 can be offlined) as - soon as possible and boots up userspace with CPU0 offlined. User - can online CPU0 back after boot time. - - To debug CPU0 hotplug, you need to enable CPU0 offline/online - feature by either turning on CONFIG_BOOTPARAM_HOTPLUG_CPU0 during - compilation or giving cpu0_hotplug kernel parameter at boot. - - If unsure, say N. - config COMPAT_VDSO def_bool n prompt "Disable the 32-bit vDSO (needed for glibc 2.3.3)" diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 542377cd419d..00468adf180f 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -389,7 +389,7 @@ config IA32_FEAT_CTL config X86_VMX_FEATURE_NAMES def_bool y - depends on IA32_FEAT_CTL && X86_FEATURE_NAMES + depends on IA32_FEAT_CTL menuconfig PROCESSOR_SELECT bool "Supported processor vendors" if EXPERT diff --git a/arch/x86/Makefile b/arch/x86/Makefile index b39975977c03..fdc2e3abd615 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -305,6 +305,18 @@ ifeq ($(RETPOLINE_CFLAGS),) endif endif +ifdef CONFIG_UNWINDER_ORC +orc_hash_h := arch/$(SRCARCH)/include/generated/asm/orc_hash.h +orc_hash_sh := $(srctree)/scripts/orc_hash.sh +targets += $(orc_hash_h) +quiet_cmd_orc_hash = GEN $@ + cmd_orc_hash = mkdir -p $(dir $@); \ + $(CONFIG_SHELL) $(orc_hash_sh) < $< > $@ +$(orc_hash_h): $(srctree)/arch/x86/include/asm/orc_types.h $(orc_hash_sh) FORCE + $(call if_changed,orc_hash) +archprepare: $(orc_hash_h) +endif + archclean: $(Q)rm -rf $(objtree)/arch/i386 $(Q)rm -rf $(objtree)/arch/x86_64 diff --git a/arch/x86/Makefile.postlink b/arch/x86/Makefile.postlink new file mode 100644 index 000000000000..936093d29160 --- /dev/null +++ b/arch/x86/Makefile.postlink @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: GPL-2.0 +# =========================================================================== +# Post-link x86 pass +# =========================================================================== +# +# 1. Separate relocations from vmlinux into vmlinux.relocs. +# 2. Strip relocations from vmlinux. + +PHONY := __archpost +__archpost: + +-include include/config/auto.conf +include $(srctree)/scripts/Kbuild.include + +CMD_RELOCS = arch/x86/tools/relocs +OUT_RELOCS = arch/x86/boot/compressed +quiet_cmd_relocs = RELOCS $(OUT_RELOCS)/$@.relocs + cmd_relocs = \ + mkdir -p $(OUT_RELOCS); \ + $(CMD_RELOCS) $@ > $(OUT_RELOCS)/$@.relocs; \ + $(CMD_RELOCS) --abs-relocs $@ + +quiet_cmd_strip_relocs = RSTRIP $@ + cmd_strip_relocs = \ + $(OBJCOPY) --remove-section='.rel.*' --remove-section='.rel__*' \ + --remove-section='.rela.*' --remove-section='.rela__*' $@ + +# `@true` prevents complaint when there is nothing to be done + +vmlinux: FORCE + @true +ifeq ($(CONFIG_X86_NEED_RELOCS),y) + $(call cmd,relocs) + $(call cmd,strip_relocs) +endif + +%.ko: FORCE + @true + +clean: + @rm -f $(OUT_RELOCS)/vmlinux.relocs + +PHONY += FORCE clean + +FORCE: + +.PHONY: $(PHONY) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 9e38ffaadb5d..f33e45ed1437 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -55,14 +55,12 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include \ -include include/generated/autoconf.h \ -D__EXPORTED_HEADERS__ -ifdef CONFIG_X86_FEATURE_NAMES $(obj)/cpu.o: $(obj)/cpustr.h quiet_cmd_cpustr = CPUSTR $@ cmd_cpustr = $(obj)/mkcpustr > $@ $(obj)/cpustr.h: $(obj)/mkcpustr FORCE $(call if_changed,cpustr) -endif targets += cpustr.h # --------------------------------------------------------------------------- diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 6b6cfe607bdb..40d2ff503079 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -106,7 +106,8 @@ ifdef CONFIG_X86_64 endif vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o -vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o +vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o $(obj)/tdx-shared.o +vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/mem.o vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o @@ -121,11 +122,9 @@ $(obj)/vmlinux.bin: vmlinux FORCE targets += $(patsubst $(obj)/%,%,$(vmlinux-objs-y)) vmlinux.bin.all vmlinux.relocs -CMD_RELOCS = arch/x86/tools/relocs -quiet_cmd_relocs = RELOCS $@ - cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $< -$(obj)/vmlinux.relocs: vmlinux FORCE - $(call if_changed,relocs) +# vmlinux.relocs is created by the vmlinux postlink step. +$(obj)/vmlinux.relocs: vmlinux + @true vmlinux.bin.all-y := $(obj)/vmlinux.bin vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs diff --git a/arch/x86/boot/compressed/efi.h b/arch/x86/boot/compressed/efi.h index 7db2f41b54cd..866c0af8b5b9 100644 --- a/arch/x86/boot/compressed/efi.h +++ b/arch/x86/boot/compressed/efi.h @@ -16,6 +16,7 @@ typedef guid_t efi_guid_t __aligned(__alignof__(u32)); #define ACPI_TABLE_GUID EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define ACPI_20_TABLE_GUID EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81) #define EFI_CC_BLOB_GUID EFI_GUID(0x067b1f5f, 0xcf26, 0x44c5, 0x85, 0x54, 0x93, 0xd7, 0x77, 0x91, 0x2d, 0x42) +#define LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID EFI_GUID(0xd5d1de3c, 0x105c, 0x44f9, 0x9e, 0xa9, 0xbc, 0xef, 0x98, 0x12, 0x00, 0x31) #define EFI32_LOADER_SIGNATURE "EL32" #define EFI64_LOADER_SIGNATURE "EL64" @@ -32,6 +33,7 @@ typedef struct { } efi_table_hdr_t; #define EFI_CONVENTIONAL_MEMORY 7 +#define EFI_UNACCEPTED_MEMORY 15 #define EFI_MEMORY_MORE_RELIABLE \ ((u64)0x0000000000010000ULL) /* higher reliability */ @@ -104,6 +106,14 @@ struct efi_setup_data { u64 reserved[8]; }; +struct efi_unaccepted_memory { + u32 version; + u32 unit_size; + u64 phys_base; + u64 size; + unsigned long bitmap[]; +}; + static inline int efi_guidcmp (efi_guid_t left, efi_guid_t right) { return memcmp(&left, &right, sizeof (efi_guid_t)); diff --git a/arch/x86/boot/compressed/error.c b/arch/x86/boot/compressed/error.c index c881878e56d3..5313c5cb2b80 100644 --- a/arch/x86/boot/compressed/error.c +++ b/arch/x86/boot/compressed/error.c @@ -22,3 +22,22 @@ void error(char *m) while (1) asm("hlt"); } + +/* EFI libstub provides vsnprintf() */ +#ifdef CONFIG_EFI_STUB +void panic(const char *fmt, ...) +{ + static char buf[1024]; + va_list args; + int len; + + va_start(args, fmt); + len = vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + + if (len && buf[len - 1] == '\n') + buf[len - 1] = '\0'; + + error(buf); +} +#endif diff --git a/arch/x86/boot/compressed/error.h b/arch/x86/boot/compressed/error.h index 1de5821184f1..86fe33b93715 100644 --- a/arch/x86/boot/compressed/error.h +++ b/arch/x86/boot/compressed/error.h @@ -6,5 +6,6 @@ void warn(char *m); void error(char *m) __noreturn; +void panic(const char *fmt, ...) __noreturn __cold; #endif /* BOOT_COMPRESSED_ERROR_H */ diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 454757fbdfe5..9193acf0e9cd 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -672,6 +672,33 @@ static bool process_mem_region(struct mem_vector *region, } #ifdef CONFIG_EFI + +/* + * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if supported) are + * guaranteed to be free. + * + * Pick free memory more conservatively than the EFI spec allows: according to + * the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also free memory and thus + * available to place the kernel image into, but in practice there's firmware + * where using that memory leads to crashes. Buggy vendor EFI code registers + * for an event that triggers on SetVirtualAddressMap(). The handler assumes + * that EFI_BOOT_SERVICES_DATA memory has not been touched by loader yet, which + * is probably true for Windows. + * + * Preserve EFI_BOOT_SERVICES_* regions until after SetVirtualAddressMap(). + */ +static inline bool memory_type_is_free(efi_memory_desc_t *md) +{ + if (md->type == EFI_CONVENTIONAL_MEMORY) + return true; + + if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY) && + md->type == EFI_UNACCEPTED_MEMORY) + return true; + + return false; +} + /* * Returns true if we processed the EFI memmap, which we prefer over the E820 * table if it is available. @@ -716,18 +743,7 @@ process_efi_entries(unsigned long minimum, unsigned long image_size) for (i = 0; i < nr_desc; i++) { md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i); - /* - * Here we are more conservative in picking free memory than - * the EFI spec allows: - * - * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also - * free memory and thus available to place the kernel image into, - * but in practice there's firmware where using that memory leads - * to crashes. - * - * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free. - */ - if (md->type != EFI_CONVENTIONAL_MEMORY) + if (!memory_type_is_free(md)) continue; if (efi_soft_reserve_enabled() && diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c new file mode 100644 index 000000000000..3c1609245f2a --- /dev/null +++ b/arch/x86/boot/compressed/mem.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "error.h" +#include "misc.h" +#include "tdx.h" +#include "sev.h" +#include <asm/shared/tdx.h> + +/* + * accept_memory() and process_unaccepted_memory() called from EFI stub which + * runs before decompresser and its early_tdx_detect(). + * + * Enumerate TDX directly from the early users. + */ +static bool early_is_tdx_guest(void) +{ + static bool once; + static bool is_tdx; + + if (!IS_ENABLED(CONFIG_INTEL_TDX_GUEST)) + return false; + + if (!once) { + u32 eax, sig[3]; + + cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax, + &sig[0], &sig[2], &sig[1]); + is_tdx = !memcmp(TDX_IDENT, sig, sizeof(sig)); + once = true; + } + + return is_tdx; +} + +void arch_accept_memory(phys_addr_t start, phys_addr_t end) +{ + /* Platform-specific memory-acceptance call goes here */ + if (early_is_tdx_guest()) { + if (!tdx_accept_memory(start, end)) + panic("TDX: Failed to accept memory\n"); + } else if (sev_snp_enabled()) { + snp_accept_memory(start, end); + } else { + error("Cannot accept memory: unknown platform\n"); + } +} + +bool init_unaccepted_memory(void) +{ + guid_t guid = LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID; + struct efi_unaccepted_memory *table; + unsigned long cfg_table_pa; + unsigned int cfg_table_len; + enum efi_type et; + int ret; + + et = efi_get_type(boot_params); + if (et == EFI_TYPE_NONE) + return false; + + ret = efi_get_conf_table(boot_params, &cfg_table_pa, &cfg_table_len); + if (ret) { + warn("EFI config table not found."); + return false; + } + + table = (void *)efi_find_vendor_table(boot_params, cfg_table_pa, + cfg_table_len, guid); + if (!table) + return false; + + if (table->version != 1) + error("Unknown version of unaccepted memory table\n"); + + /* + * In many cases unaccepted_table is already set by EFI stub, but it + * has to be initialized again to cover cases when the table is not + * allocated by EFI stub or EFI stub copied the kernel image with + * efi_relocate_kernel() before the variable is set. + * + * It must be initialized before the first usage of accept_memory(). + */ + unaccepted_table = table; + + return true; +} diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 014ff222bf4b..94b7abcf624b 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -455,6 +455,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, #endif debug_putstr("\nDecompressing Linux... "); + + if (init_unaccepted_memory()) { + debug_putstr("Accepting memory... "); + accept_memory(__pa(output), __pa(output) + needed_size); + } + __decompress(input_data, input_len, NULL, NULL, output, output_len, NULL, error); entry_offset = parse_elf(output); diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 2f155a0e3041..964fe903a1cd 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -247,4 +247,14 @@ static inline unsigned long efi_find_vendor_table(struct boot_params *bp, } #endif /* CONFIG_EFI */ +#ifdef CONFIG_UNACCEPTED_MEMORY +bool init_unaccepted_memory(void); +#else +static inline bool init_unaccepted_memory(void) { return false; } +#endif + +/* Defined in EFI stub */ +extern struct efi_unaccepted_memory *unaccepted_table; +void accept_memory(phys_addr_t start, phys_addr_t end); + #endif /* BOOT_COMPRESSED_MISC_H */ diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 014b89c89088..09dc8c187b3c 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -115,7 +115,7 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, /* Include code for early handlers */ #include "../../kernel/sev-shared.c" -static inline bool sev_snp_enabled(void) +bool sev_snp_enabled(void) { return sev_status & MSR_AMD64_SEV_SNP_ENABLED; } @@ -181,6 +181,58 @@ static bool early_setup_ghcb(void) return true; } +static phys_addr_t __snp_accept_memory(struct snp_psc_desc *desc, + phys_addr_t pa, phys_addr_t pa_end) +{ + struct psc_hdr *hdr; + struct psc_entry *e; + unsigned int i; + + hdr = &desc->hdr; + memset(hdr, 0, sizeof(*hdr)); + + e = desc->entries; + + i = 0; + while (pa < pa_end && i < VMGEXIT_PSC_MAX_ENTRY) { + hdr->end_entry = i; + + e->gfn = pa >> PAGE_SHIFT; + e->operation = SNP_PAGE_STATE_PRIVATE; + if (IS_ALIGNED(pa, PMD_SIZE) && (pa_end - pa) >= PMD_SIZE) { + e->pagesize = RMP_PG_SIZE_2M; + pa += PMD_SIZE; + } else { + e->pagesize = RMP_PG_SIZE_4K; + pa += PAGE_SIZE; + } + + e++; + i++; + } + + if (vmgexit_psc(boot_ghcb, desc)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); + + pvalidate_pages(desc); + + return pa; +} + +void snp_accept_memory(phys_addr_t start, phys_addr_t end) +{ + struct snp_psc_desc desc = {}; + unsigned int i; + phys_addr_t pa; + + if (!boot_ghcb && !early_setup_ghcb()) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); + + pa = start; + while (pa < end) + pa = __snp_accept_memory(&desc, pa, end); +} + void sev_es_shutdown_ghcb(void) { if (!boot_ghcb) diff --git a/arch/x86/boot/compressed/sev.h b/arch/x86/boot/compressed/sev.h new file mode 100644 index 000000000000..fc725a981b09 --- /dev/null +++ b/arch/x86/boot/compressed/sev.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD SEV header for early boot related functions. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + */ + +#ifndef BOOT_COMPRESSED_SEV_H +#define BOOT_COMPRESSED_SEV_H + +#ifdef CONFIG_AMD_MEM_ENCRYPT + +bool sev_snp_enabled(void); +void snp_accept_memory(phys_addr_t start, phys_addr_t end); + +#else + +static inline bool sev_snp_enabled(void) { return false; } +static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } + +#endif + +#endif diff --git a/arch/x86/boot/compressed/tdx-shared.c b/arch/x86/boot/compressed/tdx-shared.c new file mode 100644 index 000000000000..5ac43762fe13 --- /dev/null +++ b/arch/x86/boot/compressed/tdx-shared.c @@ -0,0 +1,2 @@ +#include "error.h" +#include "../../coco/tdx/tdx-shared.c" diff --git a/arch/x86/boot/compressed/tdx.c b/arch/x86/boot/compressed/tdx.c index 2d81d3cc72a1..8841b945a1e2 100644 --- a/arch/x86/boot/compressed/tdx.c +++ b/arch/x86/boot/compressed/tdx.c @@ -20,7 +20,7 @@ static inline unsigned int tdx_io_in(int size, u16 port) { struct tdx_hypercall_args args = { .r10 = TDX_HYPERCALL_STANDARD, - .r11 = EXIT_REASON_IO_INSTRUCTION, + .r11 = hcall_func(EXIT_REASON_IO_INSTRUCTION), .r12 = size, .r13 = 0, .r14 = port, @@ -36,7 +36,7 @@ static inline void tdx_io_out(int size, u16 port, u32 value) { struct tdx_hypercall_args args = { .r10 = TDX_HYPERCALL_STANDARD, - .r11 = EXIT_REASON_IO_INSTRUCTION, + .r11 = hcall_func(EXIT_REASON_IO_INSTRUCTION), .r12 = size, .r13 = 1, .r14 = port, diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c index 0bbf4f3707d2..feb6dbd7ca86 100644 --- a/arch/x86/boot/cpu.c +++ b/arch/x86/boot/cpu.c @@ -14,9 +14,7 @@ */ #include "boot.h" -#ifdef CONFIG_X86_FEATURE_NAMES #include "cpustr.h" -#endif static char *cpu_name(int level) { @@ -35,7 +33,6 @@ static char *cpu_name(int level) static void show_cap_strs(u32 *err_flags) { int i, j; -#ifdef CONFIG_X86_FEATURE_NAMES const unsigned char *msg_strs = (const unsigned char *)x86_cap_strs; for (i = 0; i < NCAPINTS; i++) { u32 e = err_flags[i]; @@ -58,16 +55,6 @@ static void show_cap_strs(u32 *err_flags) e >>= 1; } } -#else - for (i = 0; i < NCAPINTS; i++) { - u32 e = err_flags[i]; - for (j = 0; j < 32; j++) { - if (e & 1) - printf("%d:%d ", i, j); - e >>= 1; - } - } -#endif } int validate_cpu(void) diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index 73f83233d25d..eeec9986570e 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -13,10 +13,10 @@ #include <asm/coco.h> #include <asm/processor.h> -enum cc_vendor cc_vendor __ro_after_init; +enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE; static u64 cc_mask __ro_after_init; -static bool intel_cc_platform_has(enum cc_attr attr) +static bool noinstr intel_cc_platform_has(enum cc_attr attr) { switch (attr) { case CC_ATTR_GUEST_UNROLL_STRING_IO: @@ -34,7 +34,7 @@ static bool intel_cc_platform_has(enum cc_attr attr) * the other levels of SME/SEV functionality, including C-bit * based SEV-SNP, are not enabled. */ -static __maybe_unused bool amd_cc_platform_vtom(enum cc_attr attr) +static __maybe_unused __always_inline bool amd_cc_platform_vtom(enum cc_attr attr) { switch (attr) { case CC_ATTR_GUEST_MEM_ENCRYPT: @@ -58,7 +58,7 @@ static __maybe_unused bool amd_cc_platform_vtom(enum cc_attr attr) * the trampoline area must be encrypted. */ -static bool amd_cc_platform_has(enum cc_attr attr) +static bool noinstr amd_cc_platform_has(enum cc_attr attr) { #ifdef CONFIG_AMD_MEM_ENCRYPT @@ -97,7 +97,7 @@ static bool amd_cc_platform_has(enum cc_attr attr) #endif } -bool cc_platform_has(enum cc_attr attr) +bool noinstr cc_platform_has(enum cc_attr attr) { switch (cc_vendor) { case CC_VENDOR_AMD: diff --git a/arch/x86/coco/tdx/Makefile b/arch/x86/coco/tdx/Makefile index 46c55998557d..2c7dcbf1458b 100644 --- a/arch/x86/coco/tdx/Makefile +++ b/arch/x86/coco/tdx/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y += tdx.o tdcall.o +obj-y += tdx.o tdx-shared.o tdcall.o diff --git a/arch/x86/coco/tdx/tdx-shared.c b/arch/x86/coco/tdx/tdx-shared.c new file mode 100644 index 000000000000..ef20ddc37b58 --- /dev/null +++ b/arch/x86/coco/tdx/tdx-shared.c @@ -0,0 +1,71 @@ +#include <asm/tdx.h> +#include <asm/pgtable.h> + +static unsigned long try_accept_one(phys_addr_t start, unsigned long len, + enum pg_level pg_level) +{ + unsigned long accept_size = page_level_size(pg_level); + u64 tdcall_rcx; + u8 page_size; + + if (!IS_ALIGNED(start, accept_size)) + return 0; + + if (len < accept_size) + return 0; + + /* + * Pass the page physical address to the TDX module to accept the + * pending, private page. + * + * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G. + */ + switch (pg_level) { + case PG_LEVEL_4K: + page_size = 0; + break; + case PG_LEVEL_2M: + page_size = 1; + break; + case PG_LEVEL_1G: + page_size = 2; + break; + default: + return 0; + } + + tdcall_rcx = start | page_size; + if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL)) + return 0; + + return accept_size; +} + +bool tdx_accept_memory(phys_addr_t start, phys_addr_t end) +{ + /* + * For shared->private conversion, accept the page using + * TDX_ACCEPT_PAGE TDX module call. + */ + while (start < end) { + unsigned long len = end - start; + unsigned long accept_size; + + /* + * Try larger accepts first. It gives chance to VMM to keep + * 1G/2M Secure EPT entries where possible and speeds up + * process by cutting number of hypercalls (if successful). + */ + + accept_size = try_accept_one(start, len, PG_LEVEL_1G); + if (!accept_size) + accept_size = try_accept_one(start, len, PG_LEVEL_2M); + if (!accept_size) + accept_size = try_accept_one(start, len, PG_LEVEL_4K); + if (!accept_size) + return false; + start += accept_size; + } + + return true; +} diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index e146b599260f..1d6b863c42b0 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -14,20 +14,6 @@ #include <asm/insn-eval.h> #include <asm/pgtable.h> -/* TDX module Call Leaf IDs */ -#define TDX_GET_INFO 1 -#define TDX_GET_VEINFO 3 -#define TDX_GET_REPORT 4 -#define TDX_ACCEPT_PAGE 6 -#define TDX_WR 8 - -/* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */ -#define TDCS_NOTIFY_ENABLES 0x9100000000000010 - -/* TDX hypercall Leaf IDs */ -#define TDVMCALL_MAP_GPA 0x10001 -#define TDVMCALL_REPORT_FATAL_ERROR 0x10003 - /* MMIO direction */ #define EPT_READ 0 #define EPT_WRITE 1 @@ -51,24 +37,6 @@ #define TDREPORT_SUBTYPE_0 0 -/* - * Wrapper for standard use of __tdx_hypercall with no output aside from - * return code. - */ -static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15) -{ - struct tdx_hypercall_args args = { - .r10 = TDX_HYPERCALL_STANDARD, - .r11 = fn, - .r12 = r12, - .r13 = r13, - .r14 = r14, - .r15 = r15, - }; - - return __tdx_hypercall(&args); -} - /* Called from __tdx_hypercall() for unrecoverable failure */ noinstr void __tdx_hypercall_failed(void) { @@ -76,17 +44,6 @@ noinstr void __tdx_hypercall_failed(void) panic("TDVMCALL failed. TDX module bug?"); } -/* - * The TDG.VP.VMCALL-Instruction-execution sub-functions are defined - * independently from but are currently matched 1:1 with VMX EXIT_REASONs. - * Reusing the KVM EXIT_REASON macros makes it easier to connect the host and - * guest sides of these calls. - */ -static __always_inline u64 hcall_func(u64 exit_reason) -{ - return exit_reason; -} - #ifdef CONFIG_KVM_GUEST long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, unsigned long p2, unsigned long p3, unsigned long p4) @@ -745,47 +702,6 @@ static bool tdx_cache_flush_required(void) return true; } -static bool try_accept_one(phys_addr_t *start, unsigned long len, - enum pg_level pg_level) -{ - unsigned long accept_size = page_level_size(pg_level); - u64 tdcall_rcx; - u8 page_size; - - if (!IS_ALIGNED(*start, accept_size)) - return false; - - if (len < accept_size) - return false; - - /* - * Pass the page physical address to the TDX module to accept the - * pending, private page. - * - * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G. - */ - switch (pg_level) { - case PG_LEVEL_4K: - page_size = 0; - break; - case PG_LEVEL_2M: - page_size = 1; - break; - case PG_LEVEL_1G: - page_size = 2; - break; - default: - return false; - } - - tdcall_rcx = *start | page_size; - if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL)) - return false; - - *start += accept_size; - return true; -} - /* * Inform the VMM of the guest's intent for this physical page: shared with * the VMM or private to the guest. The VMM is expected to change its mapping @@ -810,33 +726,34 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc) if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0)) return false; - /* private->shared conversion requires only MapGPA call */ - if (!enc) - return true; + /* shared->private conversion requires memory to be accepted before use */ + if (enc) + return tdx_accept_memory(start, end); + + return true; +} +static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages, + bool enc) +{ /* - * For shared->private conversion, accept the page using - * TDX_ACCEPT_PAGE TDX module call. + * Only handle shared->private conversion here. + * See the comment in tdx_early_init(). */ - while (start < end) { - unsigned long len = end - start; - - /* - * Try larger accepts first. It gives chance to VMM to keep - * 1G/2M SEPT entries where possible and speeds up process by - * cutting number of hypercalls (if successful). - */ - - if (try_accept_one(&start, len, PG_LEVEL_1G)) - continue; - - if (try_accept_one(&start, len, PG_LEVEL_2M)) - continue; - - if (!try_accept_one(&start, len, PG_LEVEL_4K)) - return false; - } + if (enc) + return tdx_enc_status_changed(vaddr, numpages, enc); + return true; +} +static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages, + bool enc) +{ + /* + * Only handle private->shared conversion here. + * See the comment in tdx_early_init(). + */ + if (!enc) + return tdx_enc_status_changed(vaddr, numpages, enc); return true; } @@ -852,7 +769,7 @@ void __init tdx_early_init(void) setup_force_cpu_cap(X86_FEATURE_TDX_GUEST); - cc_set_vendor(CC_VENDOR_INTEL); + cc_vendor = CC_VENDOR_INTEL; tdx_parse_tdinfo(&cc_mask); cc_set_mask(cc_mask); @@ -867,9 +784,41 @@ void __init tdx_early_init(void) */ physical_mask &= cc_mask - 1; - x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; - x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; - x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed; + /* + * The kernel mapping should match the TDX metadata for the page. + * load_unaligned_zeropad() can touch memory *adjacent* to that which is + * owned by the caller and can catch even _momentary_ mismatches. Bad + * things happen on mismatch: + * + * - Private mapping => Shared Page == Guest shutdown + * - Shared mapping => Private Page == Recoverable #VE + * + * guest.enc_status_change_prepare() converts the page from + * shared=>private before the mapping becomes private. + * + * guest.enc_status_change_finish() converts the page from + * private=>shared after the mapping becomes private. + * + * In both cases there is a temporary shared mapping to a private page, + * which can result in a #VE. But, there is never a private mapping to + * a shared page. + */ + x86_platform.guest.enc_status_change_prepare = tdx_enc_status_change_prepare; + x86_platform.guest.enc_status_change_finish = tdx_enc_status_change_finish; + + x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; + x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; + + /* + * TDX intercepts the RDMSR to read the X2APIC ID in the parallel + * bringup low level code. That raises #VE which cannot be handled + * there. + * + * Intel-TDX has a secure RDMSR hypercall, but that needs to be + * implemented seperately in the low level startup ASM code. + * Until that is in place, disable parallel bringup for TDX. + */ + x86_cpuinit.parallel_bringup = false; pr_info("Guest detected\n"); } diff --git a/arch/x86/crypto/aria-aesni-avx-asm_64.S b/arch/x86/crypto/aria-aesni-avx-asm_64.S index 7c1abc513f34..9556dacd9841 100644 --- a/arch/x86/crypto/aria-aesni-avx-asm_64.S +++ b/arch/x86/crypto/aria-aesni-avx-asm_64.S @@ -773,8 +773,6 @@ .octa 0x3F893781E95FE1576CDA64D2BA0CB204 #ifdef CONFIG_AS_GFNI -.section .rodata.cst8, "aM", @progbits, 8 -.align 8 /* AES affine: */ #define tf_aff_const BV8(1, 1, 0, 0, 0, 1, 1, 0) .Ltf_aff_bitmatrix: diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index 5e37f41e5f14..27b5da2111ac 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -26,17 +26,7 @@ SYM_FUNC_START(\name) pushq %r11 call \func - jmp __thunk_restore -SYM_FUNC_END(\name) - _ASM_NOKPROBE(\name) - .endm - - THUNK preempt_schedule_thunk, preempt_schedule - THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace - EXPORT_SYMBOL(preempt_schedule_thunk) - EXPORT_SYMBOL(preempt_schedule_notrace_thunk) -SYM_CODE_START_LOCAL(__thunk_restore) popq %r11 popq %r10 popq %r9 @@ -48,5 +38,11 @@ SYM_CODE_START_LOCAL(__thunk_restore) popq %rdi popq %rbp RET - _ASM_NOKPROBE(__thunk_restore) -SYM_CODE_END(__thunk_restore) +SYM_FUNC_END(\name) + _ASM_NOKPROBE(\name) + .endm + +THUNK preempt_schedule_thunk, preempt_schedule +THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace +EXPORT_SYMBOL(preempt_schedule_thunk) +EXPORT_SYMBOL(preempt_schedule_notrace_thunk) diff --git a/arch/x86/entry/vdso/vgetcpu.c b/arch/x86/entry/vdso/vgetcpu.c index 0a9007c24056..e4640306b2e3 100644 --- a/arch/x86/entry/vdso/vgetcpu.c +++ b/arch/x86/entry/vdso/vgetcpu.c @@ -8,6 +8,7 @@ #include <linux/kernel.h> #include <linux/getcpu.h> #include <asm/segment.h> +#include <vdso/processor.h> notrace long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index bccea57dee81..abadd5f23425 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -374,7 +374,7 @@ static int amd_pmu_hw_config(struct perf_event *event) /* pass precise event sampling to ibs: */ if (event->attr.precise_ip && get_ibs_caps()) - return -ENOENT; + return forward_event_to_ibs(event); if (has_branch_stack(event) && !x86_pmu.lbr_nr) return -EOPNOTSUPP; diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 64582954b5f6..371014802191 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -190,7 +190,7 @@ static struct perf_ibs *get_ibs_pmu(int type) } /* - * Use IBS for precise event sampling: + * core pmu config -> IBS config * * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count * perf record -a -e r076:p ... # same as -e cpu-cycles:p @@ -199,25 +199,9 @@ static struct perf_ibs *get_ibs_pmu(int type) * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl, * MSRC001_1033) is used to select either cycle or micro-ops counting * mode. - * - * The rip of IBS samples has skid 0. Thus, IBS supports precise - * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the - * rip is invalid when IBS was not able to record the rip correctly. - * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. - * */ -static int perf_ibs_precise_event(struct perf_event *event, u64 *config) +static int core_pmu_ibs_config(struct perf_event *event, u64 *config) { - switch (event->attr.precise_ip) { - case 0: - return -ENOENT; - case 1: - case 2: - break; - default: - return -EOPNOTSUPP; - } - switch (event->attr.type) { case PERF_TYPE_HARDWARE: switch (event->attr.config) { @@ -243,22 +227,37 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config) return -EOPNOTSUPP; } +/* + * The rip of IBS samples has skid 0. Thus, IBS supports precise + * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the + * rip is invalid when IBS was not able to record the rip correctly. + * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. + */ +int forward_event_to_ibs(struct perf_event *event) +{ + u64 config = 0; + + if (!event->attr.precise_ip || event->attr.precise_ip > 2) + return -EOPNOTSUPP; + + if (!core_pmu_ibs_config(event, &config)) { + event->attr.type = perf_ibs_op.pmu.type; + event->attr.config = config; + } + return -ENOENT; +} + static int perf_ibs_init(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; struct perf_ibs *perf_ibs; u64 max_cnt, config; - int ret; perf_ibs = get_ibs_pmu(event->attr.type); - if (perf_ibs) { - config = event->attr.config; - } else { - perf_ibs = &perf_ibs_op; - ret = perf_ibs_precise_event(event, &config); - if (ret) - return ret; - } + if (!perf_ibs) + return -ENOENT; + + config = event->attr.config; if (event->pmu != &perf_ibs->pmu) return -ENOENT; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 070cc4ef2672..a149fafad813 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -349,6 +349,16 @@ static struct event_constraint intel_spr_event_constraints[] = { EVENT_CONSTRAINT_END }; +static struct extra_reg intel_gnr_extra_regs[] __read_mostly = { + INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), + INTEL_UEVENT_EXTRA_REG(0x02c6, MSR_PEBS_FRONTEND, 0x9, FE), + INTEL_UEVENT_EXTRA_REG(0x03c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE), + INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE), + INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), + EVENT_EXTRA_END +}; EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); @@ -2451,7 +2461,7 @@ static void intel_pmu_disable_fixed(struct perf_event *event) intel_clear_masks(event, idx); - mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4); + mask = intel_fixed_bits_by_idx(idx - INTEL_PMC_IDX_FIXED, INTEL_FIXED_BITS_MASK); cpuc->fixed_ctrl_val &= ~mask; } @@ -2750,25 +2760,25 @@ static void intel_pmu_enable_fixed(struct perf_event *event) * if requested: */ if (!event->attr.precise_ip) - bits |= 0x8; + bits |= INTEL_FIXED_0_ENABLE_PMI; if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) - bits |= 0x2; + bits |= INTEL_FIXED_0_USER; if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) - bits |= 0x1; + bits |= INTEL_FIXED_0_KERNEL; /* * ANY bit is supported in v3 and up */ if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) - bits |= 0x4; + bits |= INTEL_FIXED_0_ANYTHREAD; idx -= INTEL_PMC_IDX_FIXED; - bits <<= (idx * 4); - mask = 0xfULL << (idx * 4); + bits = intel_fixed_bits_by_idx(idx, bits); + mask = intel_fixed_bits_by_idx(idx, INTEL_FIXED_BITS_MASK); if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) { - bits |= ICL_FIXED_0_ADAPTIVE << (idx * 4); - mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4); + bits |= intel_fixed_bits_by_idx(idx, ICL_FIXED_0_ADAPTIVE); + mask |= intel_fixed_bits_by_idx(idx, ICL_FIXED_0_ADAPTIVE); } cpuc->fixed_ctrl_val &= ~mask; @@ -4074,7 +4084,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) if (x86_pmu.intel_cap.pebs_baseline) { arr[(*nr)++] = (struct perf_guest_switch_msr){ .msr = MSR_PEBS_DATA_CFG, - .host = cpuc->pebs_data_cfg, + .host = cpuc->active_pebs_data_cfg, .guest = kvm_pmu->pebs_data_cfg, }; } @@ -6496,6 +6506,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_SAPPHIRERAPIDS_X: case INTEL_FAM6_EMERALDRAPIDS_X: x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; + x86_pmu.extra_regs = intel_spr_extra_regs; fallthrough; case INTEL_FAM6_GRANITERAPIDS_X: case INTEL_FAM6_GRANITERAPIDS_D: @@ -6506,7 +6517,8 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_spr_event_constraints; x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints; - x86_pmu.extra_regs = intel_spr_extra_regs; + if (!x86_pmu.extra_regs) + x86_pmu.extra_regs = intel_gnr_extra_regs; x86_pmu.limit_period = spr_limit_period; x86_pmu.pebs_ept = 1; x86_pmu.pebs_aliases = NULL; @@ -6650,6 +6662,7 @@ __init int intel_pmu_init(void) pmu->pebs_constraints = intel_grt_pebs_event_constraints; pmu->extra_regs = intel_grt_extra_regs; if (is_mtl(boot_cpu_data.x86_model)) { + x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_gnr_extra_regs; x86_pmu.pebs_latency_data = mtl_latency_data_small; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index fa9b209a11fa..d49e90dc04a4 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -6150,6 +6150,7 @@ static struct intel_uncore_type spr_uncore_mdf = { }; #define UNCORE_SPR_NUM_UNCORE_TYPES 12 +#define UNCORE_SPR_CHA 0 #define UNCORE_SPR_IIO 1 #define UNCORE_SPR_IMC 6 #define UNCORE_SPR_UPI 8 @@ -6460,12 +6461,22 @@ static int uncore_type_max_boxes(struct intel_uncore_type **types, return max + 1; } +#define SPR_MSR_UNC_CBO_CONFIG 0x2FFE + void spr_uncore_cpu_init(void) { + struct intel_uncore_type *type; + u64 num_cbo; + uncore_msr_uncores = uncore_get_uncores(UNCORE_ACCESS_MSR, UNCORE_SPR_MSR_EXTRA_UNCORES, spr_msr_uncores); + type = uncore_find_type_by_id(uncore_msr_uncores, UNCORE_SPR_CHA); + if (type) { + rdmsrl(SPR_MSR_UNC_CBO_CONFIG, num_cbo); + type->num_boxes = num_cbo; + } spr_uncore_iio_free_running.num_boxes = uncore_type_max_boxes(uncore_msr_uncores, UNCORE_SPR_IIO); } diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index a5f9474f08e1..6c04b52f139b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -416,7 +416,7 @@ void __init hyperv_init(void) goto free_vp_assist_page; } - cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online", + cpuhp = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "x86/hyperv_init:online", hv_cpu_init, hv_cpu_die); if (cpuhp < 0) goto free_ghcb_page; diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 1ba5d3b99b16..85d38b9f3586 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -20,6 +20,8 @@ void __init hv_vtl_init_platform(void) { pr_info("Linux runs in Hyper-V Virtual Trust Level\n"); + x86_platform.realmode_reserve = x86_init_noop; + x86_platform.realmode_init = x86_init_noop; x86_init.irqs.pre_vector_init = x86_init_noop; x86_init.timers.timer_init = x86_init_noop; diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index cc92388b7a99..14f46ad2ca64 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -17,6 +17,7 @@ #include <asm/mem_encrypt.h> #include <asm/mshyperv.h> #include <asm/hypervisor.h> +#include <asm/mtrr.h> #ifdef CONFIG_AMD_MEM_ENCRYPT @@ -364,7 +365,7 @@ void __init hv_vtom_init(void) * Set it here to indicate a vTOM VM. */ sev_status = MSR_AMD64_SNP_VTOM; - cc_set_vendor(CC_VENDOR_AMD); + cc_vendor = CC_VENDOR_AMD; cc_set_mask(ms_hyperv.shared_gpa_boundary); physical_mask &= ms_hyperv.shared_gpa_boundary - 1; @@ -372,6 +373,9 @@ void __init hv_vtom_init(void) x86_platform.guest.enc_cache_flush_required = hv_vtom_cache_flush_required; x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required; x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility; + + /* Set WB as the default cache mode. */ + mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK); } #endif /* CONFIG_AMD_MEM_ENCRYPT */ diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 1e51650b79d7..4f1ce5fc4e19 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 +generated-y += orc_hash.h generated-y += syscalls_32.h generated-y += syscalls_64.h generated-y += syscalls_x32.h diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index d7da28fada87..6c15a622ad60 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -113,7 +113,6 @@ extern void callthunks_patch_builtin_calls(void); extern void callthunks_patch_module_calls(struct callthunk_sites *sites, struct module *mod); extern void *callthunks_translate_call_dest(void *dest); -extern bool is_callthunk(void *addr); extern int x86_call_depth_emit_accounting(u8 **pprog, void *func); #else static __always_inline void callthunks_patch_builtin_calls(void) {} @@ -124,10 +123,6 @@ static __always_inline void *callthunks_translate_call_dest(void *dest) { return dest; } -static __always_inline bool is_callthunk(void *addr) -{ - return false; -} static __always_inline int x86_call_depth_emit_accounting(u8 **pprog, void *func) { diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 3216da7074ba..98c32aa5963a 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -55,6 +55,8 @@ extern int local_apic_timer_c2_ok; extern int disable_apic; extern unsigned int lapic_timer_period; +extern int cpuid_to_apicid[]; + extern enum apic_intr_mode_id apic_intr_mode; enum apic_intr_mode_id { APIC_PIC, @@ -377,7 +379,6 @@ extern struct apic *__apicdrivers[], *__apicdrivers_end[]; * APIC functionality to boot other CPUs - only used on SMP: */ #ifdef CONFIG_SMP -extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); extern int lapic_can_unplug_cpu(void); #endif @@ -507,10 +508,8 @@ extern int default_check_phys_apicid_present(int phys_apicid); #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_SMP -bool apic_id_is_primary_thread(unsigned int id); void apic_smt_update(void); #else -static inline bool apic_id_is_primary_thread(unsigned int id) { return false; } static inline void apic_smt_update(void) { } #endif diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 68d213e83fcc..4b125e5b3187 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_APICDEF_H #define _ASM_X86_APICDEF_H +#include <linux/bits.h> + /* * Constants for various Intel APICs. (local APIC, IOAPIC, etc.) * @@ -138,9 +140,10 @@ #define APIC_EILVT_MASKED (1 << 16) #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) -#define APIC_BASE_MSR 0x800 -#define XAPIC_ENABLE (1UL << 11) -#define X2APIC_ENABLE (1UL << 10) +#define APIC_BASE_MSR 0x800 +#define APIC_X2APIC_ID_MSR 0x802 +#define XAPIC_ENABLE BIT(11) +#define X2APIC_ENABLE BIT(10) #ifdef CONFIG_X86_32 # define MAX_IO_APICS 64 @@ -162,6 +165,7 @@ #define APIC_CPUID(apicid) ((apicid) & XAPIC_DEST_CPUS_MASK) #define NUM_APIC_CLUSTERS ((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT) +#ifndef __ASSEMBLY__ /* * the local APIC register structure, memory mapped. Not terribly well * tested, but we might eventually use this one in the future - the @@ -435,4 +439,5 @@ enum apic_delivery_modes { APIC_DELIVERY_MODE_EXTINT = 7, }; +#endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_APICDEF_H */ diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 5e754e895767..55a55ec04350 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -14,12 +14,6 @@ * resource counting etc.. */ -/** - * arch_atomic_read - read atomic variable - * @v: pointer of type atomic_t - * - * Atomically reads the value of @v. - */ static __always_inline int arch_atomic_read(const atomic_t *v) { /* @@ -29,25 +23,11 @@ static __always_inline int arch_atomic_read(const atomic_t *v) return __READ_ONCE((v)->counter); } -/** - * arch_atomic_set - set atomic variable - * @v: pointer of type atomic_t - * @i: required value - * - * Atomically sets the value of @v to @i. - */ static __always_inline void arch_atomic_set(atomic_t *v, int i) { __WRITE_ONCE(v->counter, i); } -/** - * arch_atomic_add - add integer to atomic variable - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v. - */ static __always_inline void arch_atomic_add(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "addl %1,%0" @@ -55,13 +35,6 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v) : "ir" (i) : "memory"); } -/** - * arch_atomic_sub - subtract integer from atomic variable - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v. - */ static __always_inline void arch_atomic_sub(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "subl %1,%0" @@ -69,27 +42,12 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v) : "ir" (i) : "memory"); } -/** - * arch_atomic_sub_and_test - subtract value from variable and test result - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v and returns - * true if the result is zero, or false for all - * other cases. - */ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, e, "er", i); } #define arch_atomic_sub_and_test arch_atomic_sub_and_test -/** - * arch_atomic_inc - increment atomic variable - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1. - */ static __always_inline void arch_atomic_inc(atomic_t *v) { asm volatile(LOCK_PREFIX "incl %0" @@ -97,12 +55,6 @@ static __always_inline void arch_atomic_inc(atomic_t *v) } #define arch_atomic_inc arch_atomic_inc -/** - * arch_atomic_dec - decrement atomic variable - * @v: pointer of type atomic_t - * - * Atomically decrements @v by 1. - */ static __always_inline void arch_atomic_dec(atomic_t *v) { asm volatile(LOCK_PREFIX "decl %0" @@ -110,69 +62,30 @@ static __always_inline void arch_atomic_dec(atomic_t *v) } #define arch_atomic_dec arch_atomic_dec -/** - * arch_atomic_dec_and_test - decrement and test - * @v: pointer of type atomic_t - * - * Atomically decrements @v by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, e); } #define arch_atomic_dec_and_test arch_atomic_dec_and_test -/** - * arch_atomic_inc_and_test - increment and test - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, e); } #define arch_atomic_inc_and_test arch_atomic_inc_and_test -/** - * arch_atomic_add_negative - add and test if negative - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, s, "er", i); } #define arch_atomic_add_negative arch_atomic_add_negative -/** - * arch_atomic_add_return - add integer and return - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v and returns @i + @v - */ static __always_inline int arch_atomic_add_return(int i, atomic_t *v) { return i + xadd(&v->counter, i); } #define arch_atomic_add_return arch_atomic_add_return -/** - * arch_atomic_sub_return - subtract integer and return - * @v: pointer of type atomic_t - * @i: integer value to subtract - * - * Atomically subtracts @i from @v and returns @v - @i - */ static __always_inline int arch_atomic_sub_return(int i, atomic_t *v) { return arch_atomic_add_return(-i, v); diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 808b4eece251..3486d91b8595 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -61,30 +61,12 @@ ATOMIC64_DECL(add_unless); #undef __ATOMIC64_DECL #undef ATOMIC64_EXPORT -/** - * arch_atomic64_cmpxchg - cmpxchg atomic64 variable - * @v: pointer to type atomic64_t - * @o: expected value - * @n: new value - * - * Atomically sets @v to @n if it was equal to @o and returns - * the old value. - */ - static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n) { return arch_cmpxchg64(&v->counter, o, n); } #define arch_atomic64_cmpxchg arch_atomic64_cmpxchg -/** - * arch_atomic64_xchg - xchg atomic64 variable - * @v: pointer to type atomic64_t - * @n: value to assign - * - * Atomically xchgs the value of @v to @n and returns - * the old value. - */ static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n) { s64 o; @@ -97,13 +79,6 @@ static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n) } #define arch_atomic64_xchg arch_atomic64_xchg -/** - * arch_atomic64_set - set atomic64 variable - * @v: pointer to type atomic64_t - * @i: value to assign - * - * Atomically sets the value of @v to @n. - */ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) { unsigned high = (unsigned)(i >> 32); @@ -113,12 +88,6 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) : "eax", "edx", "memory"); } -/** - * arch_atomic64_read - read atomic64 variable - * @v: pointer to type atomic64_t - * - * Atomically reads the value of @v and returns it. - */ static __always_inline s64 arch_atomic64_read(const atomic64_t *v) { s64 r; @@ -126,13 +95,6 @@ static __always_inline s64 arch_atomic64_read(const atomic64_t *v) return r; } -/** - * arch_atomic64_add_return - add and return - * @i: integer value to add - * @v: pointer to type atomic64_t - * - * Atomically adds @i to @v and returns @i + *@v - */ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { alternative_atomic64(add_return, @@ -142,9 +104,6 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) } #define arch_atomic64_add_return arch_atomic64_add_return -/* - * Other variants with different arithmetic operators: - */ static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) { alternative_atomic64(sub_return, @@ -172,13 +131,6 @@ static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v) } #define arch_atomic64_dec_return arch_atomic64_dec_return -/** - * arch_atomic64_add - add integer to atomic64 variable - * @i: integer value to add - * @v: pointer to type atomic64_t - * - * Atomically adds @i to @v. - */ static __always_inline s64 arch_atomic64_add(s64 i, atomic64_t *v) { __alternative_atomic64(add, add_return, @@ -187,13 +139,6 @@ static __always_inline s64 arch_atomic64_add(s64 i, atomic64_t *v) return i; } -/** - * arch_atomic64_sub - subtract the atomic64 variable - * @i: integer value to subtract - * @v: pointer to type atomic64_t - * - * Atomically subtracts @i from @v. - */ static __always_inline s64 arch_atomic64_sub(s64 i, atomic64_t *v) { __alternative_atomic64(sub, sub_return, @@ -202,12 +147,6 @@ static __always_inline s64 arch_atomic64_sub(s64 i, atomic64_t *v) return i; } -/** - * arch_atomic64_inc - increment atomic64 variable - * @v: pointer to type atomic64_t - * - * Atomically increments @v by 1. - */ static __always_inline void arch_atomic64_inc(atomic64_t *v) { __alternative_atomic64(inc, inc_return, /* no output */, @@ -215,12 +154,6 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v) } #define arch_atomic64_inc arch_atomic64_inc -/** - * arch_atomic64_dec - decrement atomic64 variable - * @v: pointer to type atomic64_t - * - * Atomically decrements @v by 1. - */ static __always_inline void arch_atomic64_dec(atomic64_t *v) { __alternative_atomic64(dec, dec_return, /* no output */, @@ -228,15 +161,6 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v) } #define arch_atomic64_dec arch_atomic64_dec -/** - * arch_atomic64_add_unless - add unless the number is a given value - * @v: pointer of type atomic64_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, so long as it was not @u. - * Returns non-zero if the add was done, zero otherwise. - */ static __always_inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) { unsigned low = (unsigned)u; diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index c496595bf601..3165c0feedf7 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -10,37 +10,16 @@ #define ATOMIC64_INIT(i) { (i) } -/** - * arch_atomic64_read - read atomic64 variable - * @v: pointer of type atomic64_t - * - * Atomically reads the value of @v. - * Doesn't imply a read memory barrier. - */ static __always_inline s64 arch_atomic64_read(const atomic64_t *v) { return __READ_ONCE((v)->counter); } -/** - * arch_atomic64_set - set atomic64 variable - * @v: pointer to type atomic64_t - * @i: required value - * - * Atomically sets the value of @v to @i. - */ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) { __WRITE_ONCE(v->counter, i); } -/** - * arch_atomic64_add - add integer to atomic64 variable - * @i: integer value to add - * @v: pointer to type atomic64_t - * - * Atomically adds @i to @v. - */ static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "addq %1,%0" @@ -48,13 +27,6 @@ static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) : "er" (i), "m" (v->counter) : "memory"); } -/** - * arch_atomic64_sub - subtract the atomic64 variable - * @i: integer value to subtract - * @v: pointer to type atomic64_t - * - * Atomically subtracts @i from @v. - */ static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "subq %1,%0" @@ -62,27 +34,12 @@ static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v) : "er" (i), "m" (v->counter) : "memory"); } -/** - * arch_atomic64_sub_and_test - subtract value from variable and test result - * @i: integer value to subtract - * @v: pointer to type atomic64_t - * - * Atomically subtracts @i from @v and returns - * true if the result is zero, or false for all - * other cases. - */ static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i); } #define arch_atomic64_sub_and_test arch_atomic64_sub_and_test -/** - * arch_atomic64_inc - increment atomic64 variable - * @v: pointer to type atomic64_t - * - * Atomically increments @v by 1. - */ static __always_inline void arch_atomic64_inc(atomic64_t *v) { asm volatile(LOCK_PREFIX "incq %0" @@ -91,12 +48,6 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v) } #define arch_atomic64_inc arch_atomic64_inc -/** - * arch_atomic64_dec - decrement atomic64 variable - * @v: pointer to type atomic64_t - * - * Atomically decrements @v by 1. - */ static __always_inline void arch_atomic64_dec(atomic64_t *v) { asm volatile(LOCK_PREFIX "decq %0" @@ -105,56 +56,24 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v) } #define arch_atomic64_dec arch_atomic64_dec -/** - * arch_atomic64_dec_and_test - decrement and test - * @v: pointer to type atomic64_t - * - * Atomically decrements @v by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, e); } #define arch_atomic64_dec_and_test arch_atomic64_dec_and_test -/** - * arch_atomic64_inc_and_test - increment and test - * @v: pointer to type atomic64_t - * - * Atomically increments @v by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, e); } #define arch_atomic64_inc_and_test arch_atomic64_inc_and_test -/** - * arch_atomic64_add_negative - add and test if negative - * @i: integer value to add - * @v: pointer to type atomic64_t - * - * Atomically adds @i to @v and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ static __always_inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i); } #define arch_atomic64_add_negative arch_atomic64_add_negative -/** - * arch_atomic64_add_return - add and return - * @i: integer value to add - * @v: pointer to type atomic64_t - * - * Atomically adds @i to @v and returns @i + @v - */ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { return i + xadd(&v->counter, i); diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h index 92ae28389940..f25ca2d709d4 100644 --- a/arch/x86/include/asm/bugs.h +++ b/arch/x86/include/asm/bugs.h @@ -4,8 +4,6 @@ #include <asm/processor.h> -extern void check_bugs(void); - #if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32) int ppro_with_ram_bug(void); #else diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index 540573f515b7..d53636506134 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -239,29 +239,4 @@ extern void __add_wrong_size(void) #define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock) #define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX) -#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \ -({ \ - bool __ret; \ - __typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \ - __typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \ - BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \ - BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ - VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \ - VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \ - asm volatile(pfx "cmpxchg%c5b %1" \ - CC_SET(e) \ - : CC_OUT(e) (__ret), \ - "+m" (*(p1)), "+m" (*(p2)), \ - "+a" (__old1), "+d" (__old2) \ - : "i" (2 * sizeof(long)), \ - "b" (__new1), "c" (__new2)); \ - __ret; \ -}) - -#define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2) \ - __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2) - -#define arch_cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \ - __cmpxchg_double(, p1, p2, o1, o2, n1, n2) - #endif /* ASM_X86_CMPXCHG_H */ diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 6ba80ce9438d..b5731c51f0f4 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -103,6 +103,6 @@ static inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *pold, u64 new) #endif -#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX8) +#define system_has_cmpxchg64() boot_cpu_has(X86_FEATURE_CX8) #endif /* _ASM_X86_CMPXCHG_32_H */ diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 0d3beb27b7fe..44b08b53ab32 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -20,6 +20,71 @@ arch_try_cmpxchg((ptr), (po), (n)); \ }) -#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16) +union __u128_halves { + u128 full; + struct { + u64 low, high; + }; +}; + +#define __arch_cmpxchg128(_ptr, _old, _new, _lock) \ +({ \ + union __u128_halves o = { .full = (_old), }, \ + n = { .full = (_new), }; \ + \ + asm volatile(_lock "cmpxchg16b %[ptr]" \ + : [ptr] "+m" (*(_ptr)), \ + "+a" (o.low), "+d" (o.high) \ + : "b" (n.low), "c" (n.high) \ + : "memory"); \ + \ + o.full; \ +}) + +static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new) +{ + return __arch_cmpxchg128(ptr, old, new, LOCK_PREFIX); +} +#define arch_cmpxchg128 arch_cmpxchg128 + +static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old, u128 new) +{ + return __arch_cmpxchg128(ptr, old, new,); +} +#define arch_cmpxchg128_local arch_cmpxchg128_local + +#define __arch_try_cmpxchg128(_ptr, _oldp, _new, _lock) \ +({ \ + union __u128_halves o = { .full = *(_oldp), }, \ + n = { .full = (_new), }; \ + bool ret; \ + \ + asm volatile(_lock "cmpxchg16b %[ptr]" \ + CC_SET(e) \ + : CC_OUT(e) (ret), \ + [ptr] "+m" (*ptr), \ + "+a" (o.low), "+d" (o.high) \ + : "b" (n.low), "c" (n.high) \ + : "memory"); \ + \ + if (unlikely(!ret)) \ + *(_oldp) = o.full; \ + \ + likely(ret); \ +}) + +static __always_inline bool arch_try_cmpxchg128(volatile u128 *ptr, u128 *oldp, u128 new) +{ + return __arch_try_cmpxchg128(ptr, oldp, new, LOCK_PREFIX); +} +#define arch_try_cmpxchg128 arch_try_cmpxchg128 + +static __always_inline bool arch_try_cmpxchg128_local(volatile u128 *ptr, u128 *oldp, u128 new) +{ + return __arch_try_cmpxchg128(ptr, oldp, new,); +} +#define arch_try_cmpxchg128_local arch_try_cmpxchg128_local + +#define system_has_cmpxchg128() boot_cpu_has(X86_FEATURE_CX16) #endif /* _ASM_X86_CMPXCHG_64_H */ diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index eb08796002f3..6ae2d16a7613 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -10,30 +10,13 @@ enum cc_vendor { CC_VENDOR_INTEL, }; -#ifdef CONFIG_ARCH_HAS_CC_PLATFORM extern enum cc_vendor cc_vendor; -static inline enum cc_vendor cc_get_vendor(void) -{ - return cc_vendor; -} - -static inline void cc_set_vendor(enum cc_vendor vendor) -{ - cc_vendor = vendor; -} - +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM void cc_set_mask(u64 mask); u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); #else -static inline enum cc_vendor cc_get_vendor(void) -{ - return CC_VENDOR_NONE; -} - -static inline void cc_set_vendor(enum cc_vendor vendor) { } - static inline u64 cc_mkenc(u64 val) { return val; diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 78796b98a544..3a233ebff712 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -30,10 +30,7 @@ struct x86_cpu { #ifdef CONFIG_HOTPLUG_CPU extern int arch_register_cpu(int num); extern void arch_unregister_cpu(int); -extern void start_cpu0(void); -#ifdef CONFIG_DEBUG_HOTPLUG_CPU0 -extern int _debug_hotplug_cpu(int cpu, int action); -#endif +extern void soft_restart_cpu(void); #endif extern void ap_init_aperfmperf(void); @@ -98,4 +95,6 @@ extern u64 x86_read_arch_cap_msr(void); int intel_find_matching_signature(void *mc, unsigned int csig, int cpf); int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type); +extern struct cpumask cpus_stop_mask; + #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index ce0c8f7d3218..a26bebbdff87 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -38,15 +38,10 @@ enum cpuid_leafs #define X86_CAP_FMT_NUM "%d:%d" #define x86_cap_flag_num(flag) ((flag) >> 5), ((flag) & 31) -#ifdef CONFIG_X86_FEATURE_NAMES extern const char * const x86_cap_flags[NCAPINTS*32]; extern const char * const x86_power_flags[32]; #define X86_CAP_FMT "%s" #define x86_cap_flag(flag) x86_cap_flags[flag] -#else -#define X86_CAP_FMT X86_CAP_FMT_NUM -#define x86_cap_flag x86_cap_flag_num -#endif /* * In order to save room, we index into this array by doing diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index c5aed9e9226c..4acfd57de8f1 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -4,11 +4,6 @@ #ifndef __ASSEMBLY__ #include <linux/cpumask.h> -extern cpumask_var_t cpu_callin_mask; -extern cpumask_var_t cpu_callout_mask; -extern cpumask_var_t cpu_initialized_mask; -extern cpumask_var_t cpu_sibling_setup_mask; - extern void setup_cpu_local_masks(void); /* diff --git a/arch/x86/include/asm/doublefault.h b/arch/x86/include/asm/doublefault.h index 54a6e4a2e132..de0e88b32207 100644 --- a/arch/x86/include/asm/doublefault.h +++ b/arch/x86/include/asm/doublefault.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_DOUBLEFAULT_H #define _ASM_X86_DOUBLEFAULT_H +#include <linux/linkage.h> + #ifdef CONFIG_X86_32 extern void doublefault_init_cpu_tss(void); #else @@ -10,4 +12,6 @@ static inline void doublefault_init_cpu_tss(void) } #endif +asmlinkage void __noreturn doublefault_shim(void); + #endif /* _ASM_X86_DOUBLEFAULT_H */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 419280d263d2..8b4be7cecdb8 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -31,6 +31,8 @@ extern unsigned long efi_mixed_mode_stack_pa; #define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF +#define EFI_UNACCEPTED_UNIT_SIZE PMD_SIZE + /* * The EFI services are called through variadic functions in many cases. These * functions are implemented in assembler and support only a fixed number of diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 503a577814b2..b475d9a582b8 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -109,7 +109,7 @@ extern void fpu_reset_from_exception_fixup(void); /* Boot, hotplug and resume */ extern void fpu__init_cpu(void); -extern void fpu__init_system(struct cpuinfo_x86 *c); +extern void fpu__init_system(void); extern void fpu__init_check_bugs(void); extern void fpu__resume_cpu(void); diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h index c2d6cd78ed0c..78fcde7b1f07 100644 --- a/arch/x86/include/asm/fpu/sched.h +++ b/arch/x86/include/asm/fpu/sched.h @@ -39,7 +39,7 @@ extern void fpu_flush_thread(void); static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { if (cpu_feature_enabled(X86_FEATURE_FPU) && - !(current->flags & (PF_KTHREAD | PF_IO_WORKER))) { + !(current->flags & (PF_KTHREAD | PF_USER_WORKER))) { save_fpregs_to_fpstate(old_fpu); /* * The save operation preserved register state, so the diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 5061ac98ffa1..b8d4a07f9595 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -106,6 +106,9 @@ struct dyn_arch_ftrace { #ifndef __ASSEMBLY__ +void prepare_ftrace_return(unsigned long ip, unsigned long *parent, + unsigned long frame_pointer); + #if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE) extern void set_ftrace_ops_ro(void); #else diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 9646ed6e8c0b..180b1cbfcc4e 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -350,4 +350,7 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } #endif static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); } + +unsigned long copy_mc_fragile_handle_tail(char *to, char *from, unsigned len); + #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index b7126701574c..7f97a8a97e24 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -17,6 +17,12 @@ #include <asm/bootparam.h> +#ifdef CONFIG_X86_MEM_ENCRYPT +void __init mem_encrypt_init(void); +#else +static inline void mem_encrypt_init(void) { } +#endif + #ifdef CONFIG_AMD_MEM_ENCRYPT extern u64 sme_me_mask; @@ -87,9 +93,6 @@ static inline void mem_encrypt_free_decrypted_mem(void) { } #endif /* CONFIG_AMD_MEM_ENCRYPT */ -/* Architecture __weak replacement functions */ -void __init mem_encrypt_init(void); - void add_encrypt_protection_map(void); /* diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 49bb4f2bd300..88d9ef98e087 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -257,6 +257,11 @@ void hv_set_register(unsigned int reg, u64 value); u64 hv_get_non_nested_register(unsigned int reg); void hv_set_non_nested_register(unsigned int reg, u64 value); +static __always_inline u64 hv_raw_get_register(unsigned int reg) +{ + return __rdmsr(reg); +} + #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline void hyperv_setup_mmu_ops(void) {} diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index f0eeaf6e5f5f..090d658a85a6 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -23,14 +23,43 @@ #ifndef _ASM_X86_MTRR_H #define _ASM_X86_MTRR_H +#include <linux/bits.h> #include <uapi/asm/mtrr.h> +/* Defines for hardware MTRR registers. */ +#define MTRR_CAP_VCNT GENMASK(7, 0) +#define MTRR_CAP_FIX BIT_MASK(8) +#define MTRR_CAP_WC BIT_MASK(10) + +#define MTRR_DEF_TYPE_TYPE GENMASK(7, 0) +#define MTRR_DEF_TYPE_FE BIT_MASK(10) +#define MTRR_DEF_TYPE_E BIT_MASK(11) + +#define MTRR_DEF_TYPE_ENABLE (MTRR_DEF_TYPE_FE | MTRR_DEF_TYPE_E) +#define MTRR_DEF_TYPE_DISABLE ~(MTRR_DEF_TYPE_TYPE | MTRR_DEF_TYPE_ENABLE) + +#define MTRR_PHYSBASE_TYPE GENMASK(7, 0) +#define MTRR_PHYSBASE_RSVD GENMASK(11, 8) + +#define MTRR_PHYSMASK_RSVD GENMASK(10, 0) +#define MTRR_PHYSMASK_V BIT_MASK(11) + +struct mtrr_state_type { + struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES]; + mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES]; + unsigned char enabled; + bool have_fixed; + mtrr_type def_type; +}; + /* * The following functions are for use by other drivers that cannot use * arch_phys_wc_add and arch_phys_wc_del. */ # ifdef CONFIG_MTRR void mtrr_bp_init(void); +void mtrr_overwrite_state(struct mtrr_var_range *var, unsigned int num_var, + mtrr_type def_type); extern u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform); extern void mtrr_save_fixed_ranges(void *); extern void mtrr_save_state(void); @@ -40,7 +69,6 @@ extern int mtrr_add_page(unsigned long base, unsigned long size, unsigned int type, bool increment); extern int mtrr_del(int reg, unsigned long base, unsigned long size); extern int mtrr_del_page(int reg, unsigned long base, unsigned long size); -extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi); extern void mtrr_bp_restore(void); extern int mtrr_trim_uncached_memory(unsigned long end_pfn); extern int amd_special_default_mtrr(void); @@ -48,12 +76,21 @@ void mtrr_disable(void); void mtrr_enable(void); void mtrr_generic_set_state(void); # else +static inline void mtrr_overwrite_state(struct mtrr_var_range *var, + unsigned int num_var, + mtrr_type def_type) +{ +} + static inline u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform) { /* - * Return no-MTRRs: + * Return the default MTRR type, without any known other types in + * that range. */ - return MTRR_TYPE_INVALID; + *uniform = 1; + + return MTRR_TYPE_UNCACHABLE; } #define mtrr_save_fixed_ranges(arg) do {} while (0) #define mtrr_save_state() do {} while (0) @@ -79,9 +116,6 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn) { return 0; } -static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) -{ -} #define mtrr_bp_init() do {} while (0) #define mtrr_bp_restore() do {} while (0) #define mtrr_disable() do {} while (0) @@ -121,7 +155,8 @@ struct mtrr_gentry32 { #endif /* CONFIG_COMPAT */ /* Bit fields for enabled in struct mtrr_state_type */ -#define MTRR_STATE_MTRR_FIXED_ENABLED 0x01 -#define MTRR_STATE_MTRR_ENABLED 0x02 +#define MTRR_STATE_SHIFT 10 +#define MTRR_STATE_MTRR_FIXED_ENABLED (MTRR_DEF_TYPE_FE >> MTRR_STATE_SHIFT) +#define MTRR_STATE_MTRR_ENABLED (MTRR_DEF_TYPE_E >> MTRR_STATE_SHIFT) #endif /* _ASM_X86_MTRR_H */ diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h index c5573eaa5bb9..1c1b7550fa55 100644 --- a/arch/x86/include/asm/nops.h +++ b/arch/x86/include/asm/nops.h @@ -34,6 +34,8 @@ #define BYTES_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00 #define BYTES_NOP8 0x3e,BYTES_NOP7 +#define ASM_NOP_MAX 8 + #else /* @@ -47,6 +49,9 @@ * 6: osp nopl 0x00(%eax,%eax,1) * 7: nopl 0x00000000(%eax) * 8: nopl 0x00000000(%eax,%eax,1) + * 9: cs nopl 0x00000000(%eax,%eax,1) + * 10: osp cs nopl 0x00000000(%eax,%eax,1) + * 11: osp osp cs nopl 0x00000000(%eax,%eax,1) */ #define BYTES_NOP1 0x90 #define BYTES_NOP2 0x66,BYTES_NOP1 @@ -56,6 +61,15 @@ #define BYTES_NOP6 0x66,BYTES_NOP5 #define BYTES_NOP7 0x0f,0x1f,0x80,0x00,0x00,0x00,0x00 #define BYTES_NOP8 0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 +#define BYTES_NOP9 0x2e,BYTES_NOP8 +#define BYTES_NOP10 0x66,BYTES_NOP9 +#define BYTES_NOP11 0x66,BYTES_NOP10 + +#define ASM_NOP9 _ASM_BYTES(BYTES_NOP9) +#define ASM_NOP10 _ASM_BYTES(BYTES_NOP10) +#define ASM_NOP11 _ASM_BYTES(BYTES_NOP11) + +#define ASM_NOP_MAX 11 #endif /* CONFIG_64BIT */ @@ -68,8 +82,6 @@ #define ASM_NOP7 _ASM_BYTES(BYTES_NOP7) #define ASM_NOP8 _ASM_BYTES(BYTES_NOP8) -#define ASM_NOP_MAX 8 - #ifndef __ASSEMBLY__ extern const unsigned char * const x86_nops[]; #endif diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index edb2b0cb8efe..55388c9f7601 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -84,12 +84,12 @@ movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth); #define RESET_CALL_DEPTH \ - mov $0x80, %rax; \ - shl $56, %rax; \ + xor %eax, %eax; \ + bts $63, %rax; \ movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); #define RESET_CALL_DEPTH_FROM_CALL \ - mov $0xfc, %rax; \ + movb $0xfc, %al; \ shl $56, %rax; \ movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); \ CALL_THUNKS_DEBUG_INC_CALLS diff --git a/arch/x86/include/asm/orc_header.h b/arch/x86/include/asm/orc_header.h new file mode 100644 index 000000000000..07bacf3e160e --- /dev/null +++ b/arch/x86/include/asm/orc_header.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (c) Meta Platforms, Inc. and affiliates. */ + +#ifndef _ORC_HEADER_H +#define _ORC_HEADER_H + +#include <linux/types.h> +#include <linux/compiler.h> +#include <asm/orc_hash.h> + +/* + * The header is currently a 20-byte hash of the ORC entry definition; see + * scripts/orc_hash.sh. + */ +#define ORC_HEADER \ + __used __section(".orc_header") __aligned(4) \ + static const u8 orc_header[] = { ORC_HASH } + +#endif /* _ORC_HEADER_H */ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 13c0d63ed55e..34734d730463 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -210,6 +210,67 @@ do { \ (typeof(_var))(unsigned long) pco_old__; \ }) +#if defined(CONFIG_X86_32) && !defined(CONFIG_UML) +#define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval) \ +({ \ + union { \ + u64 var; \ + struct { \ + u32 low, high; \ + }; \ + } old__, new__; \ + \ + old__.var = _oval; \ + new__.var = _nval; \ + \ + asm qual (ALTERNATIVE("leal %P[var], %%esi; call this_cpu_cmpxchg8b_emu", \ + "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \ + : [var] "+m" (_var), \ + "+a" (old__.low), \ + "+d" (old__.high) \ + : "b" (new__.low), \ + "c" (new__.high) \ + : "memory", "esi"); \ + \ + old__.var; \ +}) + +#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, , pcp, oval, nval) +#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, volatile, pcp, oval, nval) +#endif + +#ifdef CONFIG_X86_64 +#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval); +#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval); + +#define percpu_cmpxchg128_op(size, qual, _var, _oval, _nval) \ +({ \ + union { \ + u128 var; \ + struct { \ + u64 low, high; \ + }; \ + } old__, new__; \ + \ + old__.var = _oval; \ + new__.var = _nval; \ + \ + asm qual (ALTERNATIVE("leaq %P[var], %%rsi; call this_cpu_cmpxchg16b_emu", \ + "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \ + : [var] "+m" (_var), \ + "+a" (old__.low), \ + "+d" (old__.high) \ + : "b" (new__.low), \ + "c" (new__.high) \ + : "memory", "rsi"); \ + \ + old__.var; \ +}) + +#define raw_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, , pcp, oval, nval) +#define this_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, volatile, pcp, oval, nval) +#endif + /* * this_cpu_read() makes gcc load the percpu variable every time it is * accessed while this_cpu_read_stable() allows the value to be cached. @@ -290,23 +351,6 @@ do { \ #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, volatile, pcp, oval, nval) #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, volatile, pcp, oval, nval) -#ifdef CONFIG_X86_CMPXCHG64 -#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \ -({ \ - bool __ret; \ - typeof(pcp1) __o1 = (o1), __n1 = (n1); \ - typeof(pcp2) __o2 = (o2), __n2 = (n2); \ - asm volatile("cmpxchg8b "__percpu_arg(1) \ - CC_SET(z) \ - : CC_OUT(z) (__ret), "+m" (pcp1), "+m" (pcp2), "+a" (__o1), "+d" (__o2) \ - : "b" (__n1), "c" (__n2)); \ - __ret; \ -}) - -#define raw_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double -#define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double -#endif /* CONFIG_X86_CMPXCHG64 */ - /* * Per cpu atomic 64 bit operations are only available under 64 bit. * 32 bit must fall back to generic operations. @@ -329,30 +373,6 @@ do { \ #define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val) #define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(8, volatile, pcp, nval) #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval) - -/* - * Pretty complex macro to generate cmpxchg16 instruction. The instruction - * is not supported on early AMD64 processors so we must be able to emulate - * it in software. The address used in the cmpxchg16 instruction must be - * aligned to a 16 byte boundary. - */ -#define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2) \ -({ \ - bool __ret; \ - typeof(pcp1) __o1 = (o1), __n1 = (n1); \ - typeof(pcp2) __o2 = (o2), __n2 = (n2); \ - alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \ - "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \ - X86_FEATURE_CX16, \ - ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \ - "+m" (pcp2), "+d" (__o2)), \ - "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \ - __ret; \ -}) - -#define raw_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double -#define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double - #endif static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr, diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index abf09882f58b..85a9fd5a3ec3 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -32,11 +32,21 @@ #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL +#define INTEL_FIXED_BITS_MASK 0xFULL +#define INTEL_FIXED_BITS_STRIDE 4 +#define INTEL_FIXED_0_KERNEL (1ULL << 0) +#define INTEL_FIXED_0_USER (1ULL << 1) +#define INTEL_FIXED_0_ANYTHREAD (1ULL << 2) +#define INTEL_FIXED_0_ENABLE_PMI (1ULL << 3) + #define HSW_IN_TX (1ULL << 32) #define HSW_IN_TX_CHECKPOINTED (1ULL << 33) #define ICL_EVENTSEL_ADAPTIVE (1ULL << 34) #define ICL_FIXED_0_ADAPTIVE (1ULL << 32) +#define intel_fixed_bits_by_idx(_idx, _bits) \ + ((_bits) << ((_idx) * INTEL_FIXED_BITS_STRIDE)) + #define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36) #define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) #define AMD64_EVENTSEL_HOSTONLY (1ULL << 41) @@ -478,8 +488,10 @@ struct pebs_xmm { #ifdef CONFIG_X86_LOCAL_APIC extern u32 get_ibs_caps(void); +extern int forward_event_to_ibs(struct perf_event *event); #else static inline u32 get_ibs_caps(void) { return 0; } +static inline int forward_event_to_ibs(struct perf_event *event) { return -ENOENT; } #endif #ifdef CONFIG_PERF_EVENTS diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 15ae4d6ba476..5700bb337987 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -27,6 +27,7 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD]; bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd); +struct seq_file; void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm); void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, bool user); diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 7929327abe00..a629b1b9f65a 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -237,8 +237,8 @@ static inline void native_pgd_clear(pgd_t *pgd) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) #define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val((pmd)) }) -#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) -#define __swp_entry_to_pmd(x) ((pmd_t) { .pmd = (x).val }) +#define __swp_entry_to_pte(x) (__pte((x).val)) +#define __swp_entry_to_pmd(x) (__pmd((x).val)) extern void cleanup_highmap(void); diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 447d4bee25c4..ba3e2554799a 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -513,9 +513,6 @@ extern void native_pagetable_init(void); #define native_pagetable_init paging_init #endif -struct seq_file; -extern void arch_report_meminfo(struct seq_file *m); - enum pg_level { PG_LEVEL_NONE, PG_LEVEL_4K, diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a1e4fa58b357..d46300e94f85 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -551,7 +551,6 @@ extern void switch_gdt_and_percpu_base(int); extern void load_direct_gdt(int); extern void load_fixmap_gdt(int); extern void cpu_init(void); -extern void cpu_init_secondary(void); extern void cpu_init_exception_handling(void); extern void cr4_init(void); diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index f6a1737c77be..87e5482acd0d 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -52,6 +52,7 @@ struct trampoline_header { u64 efer; u32 cr4; u32 flags; + u32 lock; #endif }; @@ -64,6 +65,8 @@ extern unsigned long initial_stack; extern unsigned long initial_vc_handler; #endif +extern u32 *trampoline_lock; + extern unsigned char real_mode_blob[]; extern unsigned char real_mode_relocs[]; diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 0759af9b1acf..b463fcbd4b90 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -106,8 +106,13 @@ enum psc_op { #define GHCB_HV_FT_SNP BIT_ULL(0) #define GHCB_HV_FT_SNP_AP_CREATION BIT_ULL(1) -/* SNP Page State Change NAE event */ -#define VMGEXIT_PSC_MAX_ENTRY 253 +/* + * SNP Page State Change NAE event + * The VMGEXIT_PSC_MAX_ENTRY determines the size of the PSC structure, which + * is a local stack variable in set_pages_state(). Do not increase this value + * without evaluating the impact to stack usage. + */ +#define VMGEXIT_PSC_MAX_ENTRY 64 struct psc_hdr { u16 cur_entry; diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 13dc2a9d23c1..66c806784c52 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -14,6 +14,7 @@ #include <asm/insn.h> #include <asm/sev-common.h> #include <asm/bootparam.h> +#include <asm/coco.h> #define GHCB_PROTOCOL_MIN 1ULL #define GHCB_PROTOCOL_MAX 2ULL @@ -80,11 +81,15 @@ extern void vc_no_ghcb(void); extern void vc_boot_ghcb(void); extern bool handle_vc_boot_ghcb(struct pt_regs *regs); +/* PVALIDATE return codes */ +#define PVALIDATE_FAIL_SIZEMISMATCH 6 + /* Software defined (when rFlags.CF = 1) */ #define PVALIDATE_FAIL_NOUPDATE 255 /* RMP page size */ #define RMP_PG_SIZE_4K 0 +#define RMP_PG_SIZE_2M 1 #define RMPADJUST_VMSA_PAGE_BIT BIT(16) @@ -136,24 +141,26 @@ struct snp_secrets_page_layout { } __packed; #ifdef CONFIG_AMD_MEM_ENCRYPT -extern struct static_key_false sev_es_enable_key; extern void __sev_es_ist_enter(struct pt_regs *regs); extern void __sev_es_ist_exit(void); static __always_inline void sev_es_ist_enter(struct pt_regs *regs) { - if (static_branch_unlikely(&sev_es_enable_key)) + if (cc_vendor == CC_VENDOR_AMD && + cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) __sev_es_ist_enter(regs); } static __always_inline void sev_es_ist_exit(void) { - if (static_branch_unlikely(&sev_es_enable_key)) + if (cc_vendor == CC_VENDOR_AMD && + cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) __sev_es_ist_exit(); } extern int sev_es_setup_ap_jump_table(struct real_mode_header *rmh); extern void __sev_es_nmi_complete(void); static __always_inline void sev_es_nmi_complete(void) { - if (static_branch_unlikely(&sev_es_enable_key)) + if (cc_vendor == CC_VENDOR_AMD && + cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) __sev_es_nmi_complete(); } extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); @@ -192,16 +199,17 @@ struct snp_guest_request_ioctl; void setup_ghcb(void); void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, - unsigned int npages); + unsigned long npages); void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, - unsigned int npages); + unsigned long npages); void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op); -void snp_set_memory_shared(unsigned long vaddr, unsigned int npages); -void snp_set_memory_private(unsigned long vaddr, unsigned int npages); +void snp_set_memory_shared(unsigned long vaddr, unsigned long npages); +void snp_set_memory_private(unsigned long vaddr, unsigned long npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); void __init __noreturn snp_abort(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); +void snp_accept_memory(phys_addr_t start, phys_addr_t end); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } @@ -212,12 +220,12 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; } static inline void setup_ghcb(void) { } static inline void __init -early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned int npages) { } +early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } static inline void __init -early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned int npages) { } +early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { } -static inline void snp_set_memory_shared(unsigned long vaddr, unsigned int npages) { } -static inline void snp_set_memory_private(unsigned long vaddr, unsigned int npages) { } +static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { } +static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { } static inline void snp_set_wakeup_secondary_cpu(void) { } static inline bool snp_init(struct boot_params *bp) { return false; } static inline void snp_abort(void) { } @@ -225,6 +233,8 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in { return -ENOTTY; } + +static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } #endif #endif diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index 2631e01f6e0f..7513b3bb69b7 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -10,6 +10,20 @@ #define TDX_CPUID_LEAF_ID 0x21 #define TDX_IDENT "IntelTDX " +/* TDX module Call Leaf IDs */ +#define TDX_GET_INFO 1 +#define TDX_GET_VEINFO 3 +#define TDX_GET_REPORT 4 +#define TDX_ACCEPT_PAGE 6 +#define TDX_WR 8 + +/* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */ +#define TDCS_NOTIFY_ENABLES 0x9100000000000010 + +/* TDX hypercall Leaf IDs */ +#define TDVMCALL_MAP_GPA 0x10001 +#define TDVMCALL_REPORT_FATAL_ERROR 0x10003 + #ifndef __ASSEMBLY__ /* @@ -37,8 +51,58 @@ struct tdx_hypercall_args { u64 __tdx_hypercall(struct tdx_hypercall_args *args); u64 __tdx_hypercall_ret(struct tdx_hypercall_args *args); +/* + * Wrapper for standard use of __tdx_hypercall with no output aside from + * return code. + */ +static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15) +{ + struct tdx_hypercall_args args = { + .r10 = TDX_HYPERCALL_STANDARD, + .r11 = fn, + .r12 = r12, + .r13 = r13, + .r14 = r14, + .r15 = r15, + }; + + return __tdx_hypercall(&args); +} + + /* Called from __tdx_hypercall() for unrecoverable failure */ void __tdx_hypercall_failed(void); +/* + * Used in __tdx_module_call() to gather the output registers' values of the + * TDCALL instruction when requesting services from the TDX module. This is a + * software only structure and not part of the TDX module/VMM ABI + */ +struct tdx_module_output { + u64 rcx; + u64 rdx; + u64 r8; + u64 r9; + u64 r10; + u64 r11; +}; + +/* Used to communicate with the TDX module */ +u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9, + struct tdx_module_output *out); + +bool tdx_accept_memory(phys_addr_t start, phys_addr_t end); + +/* + * The TDG.VP.VMCALL-Instruction-execution sub-functions are defined + * independently from but are currently matched 1:1 with VMX EXIT_REASONs. + * Reusing the KVM EXIT_REASON macros makes it easier to connect the host and + * guest sides of these calls. + */ +static __always_inline u64 hcall_func(u64 exit_reason) +{ + return exit_reason; +} + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_SHARED_TDX_H */ diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h index 5b1ed650b124..84eab2724875 100644 --- a/arch/x86/include/asm/sigframe.h +++ b/arch/x86/include/asm/sigframe.h @@ -85,6 +85,4 @@ struct rt_sigframe_x32 { #endif /* CONFIG_X86_64 */ -void __init init_sigframe_size(void); - #endif /* _ASM_X86_SIGFRAME_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 4e91054c84be..600cf25dbfc6 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -38,7 +38,9 @@ struct smp_ops { void (*crash_stop_other_cpus)(void); void (*smp_send_reschedule)(int cpu); - int (*cpu_up)(unsigned cpu, struct task_struct *tidle); + void (*cleanup_dead_cpu)(unsigned cpu); + void (*poll_sync_state)(void); + int (*kick_ap_alive)(unsigned cpu, struct task_struct *tidle); int (*cpu_disable)(void); void (*cpu_die)(unsigned int cpu); void (*play_dead)(void); @@ -78,11 +80,6 @@ static inline void smp_cpus_done(unsigned int max_cpus) smp_ops.smp_cpus_done(max_cpus); } -static inline int __cpu_up(unsigned int cpu, struct task_struct *tidle) -{ - return smp_ops.cpu_up(cpu, tidle); -} - static inline int __cpu_disable(void) { return smp_ops.cpu_disable(); @@ -90,7 +87,8 @@ static inline int __cpu_disable(void) static inline void __cpu_die(unsigned int cpu) { - smp_ops.cpu_die(cpu); + if (smp_ops.cpu_die) + smp_ops.cpu_die(cpu); } static inline void __noreturn play_dead(void) @@ -121,22 +119,23 @@ void native_smp_prepare_cpus(unsigned int max_cpus); void calculate_max_logical_packages(void); void native_smp_cpus_done(unsigned int max_cpus); int common_cpu_up(unsigned int cpunum, struct task_struct *tidle); -int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); +int native_kick_ap(unsigned int cpu, struct task_struct *tidle); int native_cpu_disable(void); -int common_cpu_die(unsigned int cpu); -void native_cpu_die(unsigned int cpu); void __noreturn hlt_play_dead(void); void native_play_dead(void); void play_dead_common(void); void wbinvd_on_cpu(int cpu); int wbinvd_on_all_cpus(void); -void cond_wakeup_cpu0(void); + +void smp_kick_mwait_play_dead(void); void native_smp_send_reschedule(int cpu); void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); +bool smp_park_other_cpus_in_init(void); + void smp_store_boot_cpu_info(void); void smp_store_cpu_info(int id); @@ -201,7 +200,14 @@ extern void nmi_selftest(void); #endif extern unsigned int smpboot_control; +extern unsigned long apic_mmio_base; #endif /* !__ASSEMBLY__ */ +/* Control bits for startup_64 */ +#define STARTUP_READ_APICID 0x80000000 + +/* Top 8 bits are reserved for control */ +#define STARTUP_PARALLEL_MASK 0xFF000000 + #endif /* _ASM_X86_SMP_H */ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 5b85987a5e97..4fb36fba4b5a 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -127,9 +127,11 @@ static inline int syscall_get_arch(struct task_struct *task) } void do_syscall_64(struct pt_regs *regs, int nr); -void do_int80_syscall_32(struct pt_regs *regs); -long do_fast_syscall_32(struct pt_regs *regs); #endif /* CONFIG_X86_32 */ +void do_int80_syscall_32(struct pt_regs *regs); +long do_fast_syscall_32(struct pt_regs *regs); +long do_SYSENTER_32(struct pt_regs *regs); + #endif /* _ASM_X86_SYSCALL_H */ diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 28d889c9aa16..603e6d1e9d4a 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -5,6 +5,8 @@ #include <linux/init.h> #include <linux/bits.h> + +#include <asm/errno.h> #include <asm/ptrace.h> #include <asm/shared/tdx.h> @@ -21,21 +23,6 @@ #ifndef __ASSEMBLY__ /* - * Used to gather the output registers values of the TDCALL and SEAMCALL - * instructions when requesting services from the TDX module. - * - * This is a software only structure and not part of the TDX module/VMM ABI. - */ -struct tdx_module_output { - u64 rcx; - u64 rdx; - u64 r8; - u64 r9; - u64 r10; - u64 r11; -}; - -/* * Used by the #VE exception handler to gather the #VE exception * info from the TDX module. This is a software only structure * and not part of the TDX module/VMM ABI. @@ -55,10 +42,6 @@ struct ve_info { void __init tdx_early_init(void); -/* Used to communicate with the TDX module */ -u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9, - struct tdx_module_output *out); - void tdx_get_ve_info(struct ve_info *ve); bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve); diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 75bfaa421030..80450e1d5385 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -14,6 +14,8 @@ #include <asm/processor-flags.h> #include <asm/pgtable.h> +DECLARE_PER_CPU(u64, tlbstate_untag_mask); + void __flush_tlb_all(void); #define TLB_FLUSH_ALL -1UL @@ -54,15 +56,6 @@ static inline void cr4_clear_bits(unsigned long mask) local_irq_restore(flags); } -#ifdef CONFIG_ADDRESS_MASKING -DECLARE_PER_CPU(u64, tlbstate_untag_mask); - -static inline u64 current_untag_mask(void) -{ - return this_cpu_read(tlbstate_untag_mask); -} -#endif - #ifndef MODULE /* * 6 because 6 should be plenty and struct tlb_state will fit in two cache diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 458c891a8273..caf41c4869a0 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -31,9 +31,9 @@ * CONFIG_NUMA. */ #include <linux/numa.h> +#include <linux/cpumask.h> #ifdef CONFIG_NUMA -#include <linux/cpumask.h> #include <asm/mpspec.h> #include <asm/percpu.h> @@ -139,23 +139,31 @@ static inline int topology_max_smt_threads(void) int topology_update_package_map(unsigned int apicid, unsigned int cpu); int topology_update_die_map(unsigned int dieid, unsigned int cpu); int topology_phys_to_logical_pkg(unsigned int pkg); -int topology_phys_to_logical_die(unsigned int die, unsigned int cpu); -bool topology_is_primary_thread(unsigned int cpu); bool topology_smt_supported(void); -#else + +extern struct cpumask __cpu_primary_thread_mask; +#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask) + +/** + * topology_is_primary_thread - Check whether CPU is the primary SMT thread + * @cpu: CPU to check + */ +static inline bool topology_is_primary_thread(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_primary_thread_mask); +} +#else /* CONFIG_SMP */ #define topology_max_packages() (1) static inline int topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } static inline int topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } -static inline int topology_phys_to_logical_die(unsigned int die, - unsigned int cpu) { return 0; } static inline int topology_max_die_per_package(void) { return 1; } static inline int topology_max_smt_threads(void) { return 1; } static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } static inline bool topology_smt_supported(void) { return false; } -#endif +#endif /* !CONFIG_SMP */ static inline void arch_fix_phys_package_id(int num, u32 slot) { diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index fbdc3d951494..dc1b03be43eb 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -55,12 +55,10 @@ extern bool tsc_async_resets; #ifdef CONFIG_X86_TSC extern bool tsc_store_and_check_tsc_adjust(bool bootcpu); extern void tsc_verify_tsc_adjust(bool resume); -extern void check_tsc_sync_source(int cpu); extern void check_tsc_sync_target(void); #else static inline bool tsc_store_and_check_tsc_adjust(bool bootcpu) { return false; } static inline void tsc_verify_tsc_adjust(bool resume) { } -static inline void check_tsc_sync_source(int cpu) { } static inline void check_tsc_sync_target(void) { } #endif diff --git a/arch/x86/include/asm/unaccepted_memory.h b/arch/x86/include/asm/unaccepted_memory.h new file mode 100644 index 000000000000..f5937e9866ac --- /dev/null +++ b/arch/x86/include/asm/unaccepted_memory.h @@ -0,0 +1,27 @@ +#ifndef _ASM_X86_UNACCEPTED_MEMORY_H +#define _ASM_X86_UNACCEPTED_MEMORY_H + +#include <linux/efi.h> +#include <asm/tdx.h> +#include <asm/sev.h> + +static inline void arch_accept_memory(phys_addr_t start, phys_addr_t end) +{ + /* Platform-specific memory-acceptance call goes here */ + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) { + if (!tdx_accept_memory(start, end)) + panic("TDX: Failed to accept memory\n"); + } else if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) { + snp_accept_memory(start, end); + } else { + panic("Cannot accept memory: unknown platform\n"); + } +} + +static inline struct efi_unaccepted_memory *efi_get_unaccepted_table(void) +{ + if (efi.unaccepted == EFI_INVALID_TABLE_ADDR) + return NULL; + return __va(efi.unaccepted); +} +#endif diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index 01cb9692b160..85cc57cb6539 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -76,9 +76,18 @@ #else +#define UNWIND_HINT_UNDEFINED \ + UNWIND_HINT(UNWIND_HINT_TYPE_UNDEFINED, 0, 0, 0) + #define UNWIND_HINT_FUNC \ UNWIND_HINT(UNWIND_HINT_TYPE_FUNC, ORC_REG_SP, 8, 0) +#define UNWIND_HINT_SAVE \ + UNWIND_HINT(UNWIND_HINT_TYPE_SAVE, 0, 0, 0) + +#define UNWIND_HINT_RESTORE \ + UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0) + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_UNWIND_HINTS_H */ diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index d3e3197917be..5fa76c2ced51 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -177,6 +177,7 @@ struct uv_hub_info_s { unsigned short nr_possible_cpus; unsigned short nr_online_cpus; short memory_nid; + unsigned short *node_to_socket; }; /* CPU specific info with a pointer to the hub common info struct */ @@ -519,25 +520,30 @@ static inline int uv_socket_to_node(int socket) return _uv_socket_to_node(socket, uv_hub_info->socket_to_node); } +static inline int uv_pnode_to_socket(int pnode) +{ + unsigned short *p2s = uv_hub_info->pnode_to_socket; + + return p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode; +} + /* pnode, offset --> socket virtual */ static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) { unsigned int m_val = uv_hub_info->m_val; unsigned long base; - unsigned short sockid, node, *p2s; + unsigned short sockid; if (m_val) return __va(((unsigned long)pnode << m_val) | offset); - p2s = uv_hub_info->pnode_to_socket; - sockid = p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode; - node = uv_socket_to_node(sockid); + sockid = uv_pnode_to_socket(pnode); /* limit address of previous socket is our base, except node 0 is 0 */ - if (!node) + if (sockid == 0) return __va((unsigned long)offset); - base = (unsigned long)(uv_hub_info->gr_table[node - 1].limit); + base = (unsigned long)(uv_hub_info->gr_table[sockid - 1].limit); return __va(base << UV_GAM_RANGE_SHFT | offset); } @@ -644,7 +650,7 @@ static inline int uv_cpu_blade_processor_id(int cpu) /* Blade number to Node number (UV2..UV4 is 1:1) */ static inline int uv_blade_to_node(int blade) { - return blade; + return uv_socket_to_node(blade); } /* Blade number of current cpu. Numnbered 0 .. <#blades -1> */ @@ -656,23 +662,27 @@ static inline int uv_numa_blade_id(void) /* * Convert linux node number to the UV blade number. * .. Currently for UV2 thru UV4 the node and the blade are identical. - * .. If this changes then you MUST check references to this function! + * .. UV5 needs conversion when sub-numa clustering is enabled. */ static inline int uv_node_to_blade_id(int nid) { - return nid; + unsigned short *n2s = uv_hub_info->node_to_socket; + + return n2s ? n2s[nid] : nid; } /* Convert a CPU number to the UV blade number */ static inline int uv_cpu_to_blade_id(int cpu) { - return uv_node_to_blade_id(cpu_to_node(cpu)); + return uv_cpu_hub_info(cpu)->numa_blade_id; } /* Convert a blade id to the PNODE of the blade */ static inline int uv_blade_to_pnode(int bid) { - return uv_hub_info_list(uv_blade_to_node(bid))->pnode; + unsigned short *s2p = uv_hub_info->socket_to_pnode; + + return s2p ? s2p[bid] : bid; } /* Nid of memory node on blade. -1 if no blade-local memory */ diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 57fa67373262..bb45812889dd 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -4199,6 +4199,13 @@ union uvh_rh_gam_mmioh_overlay_config1_u { #define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0 #define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000007fffUL +/* UVH common defines */ +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK ( \ + is_uv(UV4A) ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK : \ + is_uv(UV4) ? UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK : \ + is_uv(UV3) ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK : \ + 0) + union uvh_rh_gam_mmioh_redirect_config0_u { unsigned long v; @@ -4247,8 +4254,8 @@ union uvh_rh_gam_mmioh_redirect_config0_u { 0) /* UV4A unique defines */ -#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0 -#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000000fffUL +#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0 +#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x0000000000000fffUL /* UV4 unique defines */ #define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0 @@ -4258,6 +4265,13 @@ union uvh_rh_gam_mmioh_redirect_config0_u { #define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0 #define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x0000000000007fffUL +/* UVH common defines */ +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK ( \ + is_uv(UV4A) ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK : \ + is_uv(UV4) ? UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK : \ + is_uv(UV3) ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK : \ + 0) + union uvh_rh_gam_mmioh_redirect_config1_u { unsigned long v; diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 4cf6794f9d68..c81858d903dc 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -231,14 +231,19 @@ static u64 vread_pvclock(void) ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); } while (pvclock_read_retry(pvti, version)); - return ret; + return ret & S64_MAX; } #endif #ifdef CONFIG_HYPERV_TIMER static u64 vread_hvclock(void) { - return hv_read_tsc_page(&hvclock_page); + u64 tsc, time; + + if (hv_read_tsc_page_tsc(&hvclock_page, &tsc, &time)) + return time & S64_MAX; + + return U64_MAX; } #endif @@ -246,7 +251,7 @@ static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd) { if (likely(clock_mode == VDSO_CLOCKMODE_TSC)) - return (u64)rdtsc_ordered(); + return (u64)rdtsc_ordered() & S64_MAX; /* * For any memory-mapped vclock type, we need to make sure that gcc * doesn't cleverly hoist a load before the mode check. Otherwise we @@ -284,6 +289,9 @@ static inline bool arch_vdso_clocksource_ok(const struct vdso_data *vd) * which can be invalidated asynchronously and indicate invalidation by * returning U64_MAX, which can be effectively tested by checking for a * negative value after casting it to s64. + * + * This effectively forces a S64_MAX mask on the calculations, unlike the + * U64_MAX mask normally used by x86 clocksources. */ static inline bool arch_vdso_cycles_ok(u64 cycles) { @@ -303,18 +311,29 @@ static inline bool arch_vdso_cycles_ok(u64 cycles) * @last. If not then use @last, which is the base time of the current * conversion period. * - * This variant also removes the masking of the subtraction because the - * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX - * which would result in a pointless operation. The compiler cannot - * optimize it away as the mask comes from the vdso data and is not compile - * time constant. + * This variant also uses a custom mask because while the clocksource mask of + * all the VDSO capable clocksources on x86 is U64_MAX, the above code uses + * U64_MASK as an exception value, additionally arch_vdso_cycles_ok() above + * declares everything with the MSB/Sign-bit set as invalid. Therefore the + * effective mask is S64_MAX. */ static __always_inline u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) { - if (cycles > last) - return (cycles - last) * mult; - return 0; + /* + * Due to the MSB/Sign-bit being used as invald marker (see + * arch_vdso_cycles_valid() above), the effective mask is S64_MAX. + */ + u64 delta = (cycles - last) & S64_MAX; + + /* + * Due to the above mentioned TSC wobbles, filter out negative motion. + * Per the above masking, the effective sign bit is now bit 62. + */ + if (unlikely(delta & (1ULL << 62))) + return 0; + + return delta * mult; } #define vdso_calc_delta vdso_calc_delta diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 498dc600bd5c..0d02c4aafa6f 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -13,7 +13,9 @@ #include <linux/bitops.h> +#include <linux/bug.h> #include <linux/types.h> + #include <uapi/asm/vmx.h> #include <asm/vmxfeatures.h> diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 88085f369ff6..5240d88db52a 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -150,7 +150,7 @@ struct x86_init_acpi { * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status */ struct x86_guest { - void (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); + bool (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); bool (*enc_tlb_flush_required)(bool enc); bool (*enc_cache_flush_required)(void); @@ -177,11 +177,14 @@ struct x86_init_ops { * struct x86_cpuinit_ops - platform specific cpu hotplug setups * @setup_percpu_clockev: set up the per cpu clock event device * @early_percpu_clock_init: early init of the per cpu clock event device + * @fixup_cpu_id: fixup function for cpuinfo_x86::phys_proc_id + * @parallel_bringup: Parallel bringup control */ struct x86_cpuinit_ops { void (*setup_percpu_clockev)(void); void (*early_percpu_clock_init)(void); void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); + bool parallel_bringup; }; struct timespec64; diff --git a/arch/x86/include/uapi/asm/mtrr.h b/arch/x86/include/uapi/asm/mtrr.h index 376563f2bac1..3a8a8eb8ac3a 100644 --- a/arch/x86/include/uapi/asm/mtrr.h +++ b/arch/x86/include/uapi/asm/mtrr.h @@ -81,14 +81,6 @@ typedef __u8 mtrr_type; #define MTRR_NUM_FIXED_RANGES 88 #define MTRR_MAX_VAR_RANGES 256 -struct mtrr_state_type { - struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES]; - mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES]; - unsigned char enabled; - unsigned char have_fixed; - mtrr_type def_type; -}; - #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) @@ -115,9 +107,9 @@ struct mtrr_state_type { #define MTRR_NUM_TYPES 7 /* - * Invalid MTRR memory type. mtrr_type_lookup() returns this value when - * MTRRs are disabled. Note, this value is allocated from the reserved - * values (0x7-0xff) of the MTRR memory types. + * Invalid MTRR memory type. No longer used outside of MTRR code. + * Note, this value is allocated from the reserved values (0x7-0xff) of + * the MTRR memory types. */ #define MTRR_TYPE_INVALID 0xff diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index dd61752f4c96..4070a01c11b7 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -17,6 +17,7 @@ CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg CFLAGS_REMOVE_head64.o = -pg CFLAGS_REMOVE_sev.o = -pg +CFLAGS_REMOVE_rethook.o = -pg endif KASAN_SANITIZE_head$(BITS).o := n diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 1328c221af30..6dfecb27b846 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -16,6 +16,7 @@ #include <asm/cacheflush.h> #include <asm/realmode.h> #include <asm/hypervisor.h> +#include <asm/smp.h> #include <linux/ftrace.h> #include "../../realmode/rm/wakeup.h" @@ -127,7 +128,13 @@ int x86_acpi_suspend_lowlevel(void) * value is in the actual %rsp register. */ current->thread.sp = (unsigned long)temp_stack + sizeof(temp_stack); - smpboot_control = smp_processor_id(); + /* + * Ensure the CPU knows which one it is when it comes back, if + * it isn't in parallel mode and expected to work that out for + * itself. + */ + if (!(smpboot_control & STARTUP_PARALLEL_MASK)) + smpboot_control = smp_processor_id(); #endif initial_code = (unsigned long)wakeup_long64; saved_magic = 0x123456789abcdef0L; diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h index 171a40c74db6..054c15a2f860 100644 --- a/arch/x86/kernel/acpi/sleep.h +++ b/arch/x86/kernel/acpi/sleep.h @@ -12,7 +12,6 @@ extern int wakeup_pmode_return; extern u8 wake_sleep_flags; -extern unsigned long acpi_copy_wakeup_routine(unsigned long); extern void wakeup_long64(void); extern void do_suspend_lowlevel(void); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index f615e0cb6d93..72646d75b6ff 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -37,11 +37,23 @@ EXPORT_SYMBOL_GPL(alternatives_patched); #define MAX_PATCH_LEN (255-1) -static int __initdata_or_module debug_alternative; +#define DA_ALL (~0) +#define DA_ALT 0x01 +#define DA_RET 0x02 +#define DA_RETPOLINE 0x04 +#define DA_ENDBR 0x08 +#define DA_SMP 0x10 + +static unsigned int __initdata_or_module debug_alternative; static int __init debug_alt(char *str) { - debug_alternative = 1; + if (str && *str == '=') + str++; + + if (!str || kstrtouint(str, 0, &debug_alternative)) + debug_alternative = DA_ALL; + return 1; } __setup("debug-alternative", debug_alt); @@ -55,15 +67,15 @@ static int __init setup_noreplace_smp(char *str) } __setup("noreplace-smp", setup_noreplace_smp); -#define DPRINTK(fmt, args...) \ +#define DPRINTK(type, fmt, args...) \ do { \ - if (debug_alternative) \ + if (debug_alternative & DA_##type) \ printk(KERN_DEBUG pr_fmt(fmt) "\n", ##args); \ } while (0) -#define DUMP_BYTES(buf, len, fmt, args...) \ +#define DUMP_BYTES(type, buf, len, fmt, args...) \ do { \ - if (unlikely(debug_alternative)) { \ + if (unlikely(debug_alternative & DA_##type)) { \ int j; \ \ if (!(len)) \ @@ -86,6 +98,11 @@ static const unsigned char x86nops[] = BYTES_NOP6, BYTES_NOP7, BYTES_NOP8, +#ifdef CONFIG_64BIT + BYTES_NOP9, + BYTES_NOP10, + BYTES_NOP11, +#endif }; const unsigned char * const x86_nops[ASM_NOP_MAX+1] = @@ -99,19 +116,44 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] = x86nops + 1 + 2 + 3 + 4 + 5, x86nops + 1 + 2 + 3 + 4 + 5 + 6, x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, +#ifdef CONFIG_64BIT + x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, + x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9, + x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10, +#endif }; -/* Use this to add nops to a buffer, then text_poke the whole buffer. */ -static void __init_or_module add_nops(void *insns, unsigned int len) +/* + * Fill the buffer with a single effective instruction of size @len. + * + * In order not to issue an ORC stack depth tracking CFI entry (Call Frame Info) + * for every single-byte NOP, try to generate the maximally available NOP of + * size <= ASM_NOP_MAX such that only a single CFI entry is generated (vs one for + * each single-byte NOPs). If @len to fill out is > ASM_NOP_MAX, pad with INT3 and + * *jump* over instead of executing long and daft NOPs. + */ +static void __init_or_module add_nop(u8 *instr, unsigned int len) { - while (len > 0) { - unsigned int noplen = len; - if (noplen > ASM_NOP_MAX) - noplen = ASM_NOP_MAX; - memcpy(insns, x86_nops[noplen], noplen); - insns += noplen; - len -= noplen; + u8 *target = instr + len; + + if (!len) + return; + + if (len <= ASM_NOP_MAX) { + memcpy(instr, x86_nops[len], len); + return; } + + if (len < 128) { + __text_gen_insn(instr, JMP8_INSN_OPCODE, instr, target, JMP8_INSN_SIZE); + instr += JMP8_INSN_SIZE; + } else { + __text_gen_insn(instr, JMP32_INSN_OPCODE, instr, target, JMP32_INSN_SIZE); + instr += JMP32_INSN_SIZE; + } + + for (;instr < target; instr++) + *instr = INT3_INSN_OPCODE; } extern s32 __retpoline_sites[], __retpoline_sites_end[]; @@ -123,133 +165,223 @@ extern s32 __smp_locks[], __smp_locks_end[]; void text_poke_early(void *addr, const void *opcode, size_t len); /* - * Are we looking at a near JMP with a 1 or 4-byte displacement. + * Matches NOP and NOPL, not any of the other possible NOPs. */ -static inline bool is_jmp(const u8 opcode) +static bool insn_is_nop(struct insn *insn) { - return opcode == 0xeb || opcode == 0xe9; + /* Anything NOP, but no REP NOP */ + if (insn->opcode.bytes[0] == 0x90 && + (!insn->prefixes.nbytes || insn->prefixes.bytes[0] != 0xF3)) + return true; + + /* NOPL */ + if (insn->opcode.bytes[0] == 0x0F && insn->opcode.bytes[1] == 0x1F) + return true; + + /* TODO: more nops */ + + return false; } -static void __init_or_module -recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff) +/* + * Find the offset of the first non-NOP instruction starting at @offset + * but no further than @len. + */ +static int skip_nops(u8 *instr, int offset, int len) { - u8 *next_rip, *tgt_rip; - s32 n_dspl, o_dspl; - int repl_len; + struct insn insn; - if (a->replacementlen != 5) - return; + for (; offset < len; offset += insn.length) { + if (insn_decode_kernel(&insn, &instr[offset])) + break; - o_dspl = *(s32 *)(insn_buff + 1); + if (!insn_is_nop(&insn)) + break; + } - /* next_rip of the replacement JMP */ - next_rip = repl_insn + a->replacementlen; - /* target rip of the replacement JMP */ - tgt_rip = next_rip + o_dspl; - n_dspl = tgt_rip - orig_insn; + return offset; +} - DPRINTK("target RIP: %px, new_displ: 0x%x", tgt_rip, n_dspl); +/* + * Optimize a sequence of NOPs, possibly preceded by an unconditional jump + * to the end of the NOP sequence into a single NOP. + */ +static bool __init_or_module +__optimize_nops(u8 *instr, size_t len, struct insn *insn, int *next, int *prev, int *target) +{ + int i = *next - insn->length; - if (tgt_rip - orig_insn >= 0) { - if (n_dspl - 2 <= 127) - goto two_byte_jmp; - else - goto five_byte_jmp; - /* negative offset */ - } else { - if (((n_dspl - 2) & 0xff) == (n_dspl - 2)) - goto two_byte_jmp; - else - goto five_byte_jmp; + switch (insn->opcode.bytes[0]) { + case JMP8_INSN_OPCODE: + case JMP32_INSN_OPCODE: + *prev = i; + *target = *next + insn->immediate.value; + return false; } -two_byte_jmp: - n_dspl -= 2; + if (insn_is_nop(insn)) { + int nop = i; - insn_buff[0] = 0xeb; - insn_buff[1] = (s8)n_dspl; - add_nops(insn_buff + 2, 3); + *next = skip_nops(instr, *next, len); + if (*target && *next == *target) + nop = *prev; - repl_len = 2; - goto done; + add_nop(instr + nop, *next - nop); + DUMP_BYTES(ALT, instr, len, "%px: [%d:%d) optimized NOPs: ", instr, nop, *next); + return true; + } + + *target = 0; + return false; +} -five_byte_jmp: - n_dspl -= 5; +/* + * "noinline" to cause control flow change and thus invalidate I$ and + * cause refetch after modification. + */ +static void __init_or_module noinline optimize_nops(u8 *instr, size_t len) +{ + int prev, target = 0; - insn_buff[0] = 0xe9; - *(s32 *)&insn_buff[1] = n_dspl; + for (int next, i = 0; i < len; i = next) { + struct insn insn; - repl_len = 5; + if (insn_decode_kernel(&insn, &instr[i])) + return; -done: + next = i + insn.length; - DPRINTK("final displ: 0x%08x, JMP 0x%lx", - n_dspl, (unsigned long)orig_insn + n_dspl + repl_len); + __optimize_nops(instr, len, &insn, &next, &prev, &target); + } } /* - * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90) + * In this context, "source" is where the instructions are placed in the + * section .altinstr_replacement, for example during kernel build by the + * toolchain. + * "Destination" is where the instructions are being patched in by this + * machinery. * - * @instr: instruction byte stream - * @instrlen: length of the above - * @off: offset within @instr where the first NOP has been detected + * The source offset is: * - * Return: number of NOPs found (and replaced). + * src_imm = target - src_next_ip (1) + * + * and the target offset is: + * + * dst_imm = target - dst_next_ip (2) + * + * so rework (1) as an expression for target like: + * + * target = src_imm + src_next_ip (1a) + * + * and substitute in (2) to get: + * + * dst_imm = (src_imm + src_next_ip) - dst_next_ip (3) + * + * Now, since the instruction stream is 'identical' at src and dst (it + * is being copied after all) it can be stated that: + * + * src_next_ip = src + ip_offset + * dst_next_ip = dst + ip_offset (4) + * + * Substitute (4) in (3) and observe ip_offset being cancelled out to + * obtain: + * + * dst_imm = src_imm + (src + ip_offset) - (dst + ip_offset) + * = src_imm + src - dst + ip_offset - ip_offset + * = src_imm + src - dst (5) + * + * IOW, only the relative displacement of the code block matters. */ -static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off) -{ - unsigned long flags; - int i = off, nnops; - while (i < instrlen) { - if (instr[i] != 0x90) - break; +#define apply_reloc_n(n_, p_, d_) \ + do { \ + s32 v = *(s##n_ *)(p_); \ + v += (d_); \ + BUG_ON((v >> 31) != (v >> (n_-1))); \ + *(s##n_ *)(p_) = (s##n_)v; \ + } while (0) + - i++; +static __always_inline +void apply_reloc(int n, void *ptr, uintptr_t diff) +{ + switch (n) { + case 1: apply_reloc_n(8, ptr, diff); break; + case 2: apply_reloc_n(16, ptr, diff); break; + case 4: apply_reloc_n(32, ptr, diff); break; + default: BUG(); } +} - nnops = i - off; +static __always_inline +bool need_reloc(unsigned long offset, u8 *src, size_t src_len) +{ + u8 *target = src + offset; + /* + * If the target is inside the patched block, it's relative to the + * block itself and does not need relocation. + */ + return (target < src || target > src + src_len); +} - if (nnops <= 1) - return nnops; +static void __init_or_module noinline +apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len) +{ + int prev, target = 0; - local_irq_save(flags); - add_nops(instr + off, nnops); - local_irq_restore(flags); + for (int next, i = 0; i < len; i = next) { + struct insn insn; - DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i); + if (WARN_ON_ONCE(insn_decode_kernel(&insn, &buf[i]))) + return; - return nnops; -} + next = i + insn.length; -/* - * "noinline" to cause control flow change and thus invalidate I$ and - * cause refetch after modification. - */ -static void __init_or_module noinline optimize_nops(u8 *instr, size_t len) -{ - struct insn insn; - int i = 0; + if (__optimize_nops(buf, len, &insn, &next, &prev, &target)) + continue; - /* - * Jump over the non-NOP insns and optimize single-byte NOPs into bigger - * ones. - */ - for (;;) { - if (insn_decode_kernel(&insn, &instr[i])) - return; + switch (insn.opcode.bytes[0]) { + case 0x0f: + if (insn.opcode.bytes[1] < 0x80 || + insn.opcode.bytes[1] > 0x8f) + break; - /* - * See if this and any potentially following NOPs can be - * optimized. - */ - if (insn.length == 1 && insn.opcode.bytes[0] == 0x90) - i += optimize_nops_range(instr, len, i); - else - i += insn.length; + fallthrough; /* Jcc.d32 */ + case 0x70 ... 0x7f: /* Jcc.d8 */ + case JMP8_INSN_OPCODE: + case JMP32_INSN_OPCODE: + case CALL_INSN_OPCODE: + if (need_reloc(next + insn.immediate.value, src, src_len)) { + apply_reloc(insn.immediate.nbytes, + buf + i + insn_offset_immediate(&insn), + src - dest); + } - if (i >= len) - return; + /* + * Where possible, convert JMP.d32 into JMP.d8. + */ + if (insn.opcode.bytes[0] == JMP32_INSN_OPCODE) { + s32 imm = insn.immediate.value; + imm += src - dest; + imm += JMP32_INSN_SIZE - JMP8_INSN_SIZE; + if ((imm >> 31) == (imm >> 7)) { + buf[i+0] = JMP8_INSN_OPCODE; + buf[i+1] = (s8)imm; + + memset(&buf[i+2], INT3_INSN_OPCODE, insn.length - 2); + } + } + break; + } + + if (insn_rip_relative(&insn)) { + if (need_reloc(next + insn.displacement.value, src, src_len)) { + apply_reloc(insn.displacement.nbytes, + buf + i + insn_offset_displacement(&insn), + src - dest); + } + } } } @@ -270,7 +402,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, u8 *instr, *replacement; u8 insn_buff[MAX_PATCH_LEN]; - DPRINTK("alt table %px, -> %px", start, end); + DPRINTK(ALT, "alt table %px, -> %px", start, end); /* * The scan order should be from start to end. A later scanned * alternative code can overwrite previously scanned alternative code. @@ -294,47 +426,31 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, * - feature not present but ALT_FLAG_NOT is set to mean, * patch if feature is *NOT* present. */ - if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) - goto next; + if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) { + optimize_nops(instr, a->instrlen); + continue; + } - DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)", + DPRINTK(ALT, "feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)", (a->flags & ALT_FLAG_NOT) ? "!" : "", a->cpuid >> 5, a->cpuid & 0x1f, instr, instr, a->instrlen, replacement, a->replacementlen); - DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr); - DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement); - memcpy(insn_buff, replacement, a->replacementlen); insn_buff_sz = a->replacementlen; - /* - * 0xe8 is a relative jump; fix the offset. - * - * Instruction length is checked before the opcode to avoid - * accessing uninitialized bytes for zero-length replacements. - */ - if (a->replacementlen == 5 && *insn_buff == 0xe8) { - *(s32 *)(insn_buff + 1) += replacement - instr; - DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx", - *(s32 *)(insn_buff + 1), - (unsigned long)instr + *(s32 *)(insn_buff + 1) + 5); - } - - if (a->replacementlen && is_jmp(replacement[0])) - recompute_jump(a, instr, replacement, insn_buff); - for (; insn_buff_sz < a->instrlen; insn_buff_sz++) insn_buff[insn_buff_sz] = 0x90; - DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr); + apply_relocation(insn_buff, a->instrlen, instr, replacement, a->replacementlen); - text_poke_early(instr, insn_buff, insn_buff_sz); + DUMP_BYTES(ALT, instr, a->instrlen, "%px: old_insn: ", instr); + DUMP_BYTES(ALT, replacement, a->replacementlen, "%px: rpl_insn: ", replacement); + DUMP_BYTES(ALT, insn_buff, insn_buff_sz, "%px: final_insn: ", instr); -next: - optimize_nops(instr, a->instrlen); + text_poke_early(instr, insn_buff, insn_buff_sz); } } @@ -555,15 +671,15 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) continue; } - DPRINTK("retpoline at: %pS (%px) len: %d to: %pS", + DPRINTK(RETPOLINE, "retpoline at: %pS (%px) len: %d to: %pS", addr, addr, insn.length, addr + insn.length + insn.immediate.value); len = patch_retpoline(addr, &insn, bytes); if (len == insn.length) { optimize_nops(bytes, len); - DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); - DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); + DUMP_BYTES(RETPOLINE, ((u8*)addr), len, "%px: orig: ", addr); + DUMP_BYTES(RETPOLINE, ((u8*)bytes), len, "%px: repl: ", addr); text_poke_early(addr, bytes, len); } } @@ -590,13 +706,12 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes) { int i = 0; + /* Patch the custom return thunks... */ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { - if (x86_return_thunk == __x86_return_thunk) - return -1; - i = JMP32_INSN_SIZE; __text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i); } else { + /* ... or patch them out if not needed. */ bytes[i++] = RET_INSN_OPCODE; } @@ -609,6 +724,14 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { s32 *s; + /* + * Do not patch out the default return thunks if those needed are the + * ones generated by the compiler. + */ + if (cpu_feature_enabled(X86_FEATURE_RETHUNK) && + (x86_return_thunk == __x86_return_thunk)) + return; + for (s = start; s < end; s++) { void *dest = NULL, *addr = (void *)s + *s; struct insn insn; @@ -630,14 +753,14 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) addr, dest, 5, addr)) continue; - DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", + DPRINTK(RET, "return thunk at: %pS (%px) len: %d to: %pS", addr, addr, insn.length, addr + insn.length + insn.immediate.value); len = patch_return(addr, &insn, bytes); if (len == insn.length) { - DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); - DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); + DUMP_BYTES(RET, ((u8*)addr), len, "%px: orig: ", addr); + DUMP_BYTES(RET, ((u8*)bytes), len, "%px: repl: ", addr); text_poke_early(addr, bytes, len); } } @@ -655,7 +778,7 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } #ifdef CONFIG_X86_KERNEL_IBT -static void poison_endbr(void *addr, bool warn) +static void __init_or_module poison_endbr(void *addr, bool warn) { u32 endbr, poison = gen_endbr_poison(); @@ -667,13 +790,13 @@ static void poison_endbr(void *addr, bool warn) return; } - DPRINTK("ENDBR at: %pS (%px)", addr, addr); + DPRINTK(ENDBR, "ENDBR at: %pS (%px)", addr, addr); /* * When we have IBT, the lack of ENDBR will trigger #CP */ - DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr); - DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr); + DUMP_BYTES(ENDBR, ((u8*)addr), 4, "%px: orig: ", addr); + DUMP_BYTES(ENDBR, ((u8*)&poison), 4, "%px: repl: ", addr); text_poke_early(addr, &poison, 4); } @@ -1148,7 +1271,7 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, smp->locks_end = locks_end; smp->text = text; smp->text_end = text_end; - DPRINTK("locks %p -> %p, text %p -> %p, name %s\n", + DPRINTK(SMP, "locks %p -> %p, text %p -> %p, name %s\n", smp->locks, smp->locks_end, smp->text, smp->text_end, smp->name); @@ -1225,6 +1348,20 @@ int alternatives_text_reserved(void *start, void *end) #endif /* CONFIG_SMP */ #ifdef CONFIG_PARAVIRT + +/* Use this to add nops to a buffer, then text_poke the whole buffer. */ +static void __init_or_module add_nops(void *insns, unsigned int len) +{ + while (len > 0) { + unsigned int noplen = len; + if (noplen > ASM_NOP_MAX) + noplen = ASM_NOP_MAX; + memcpy(insns, x86_nops[noplen], noplen); + insns += noplen; + len -= noplen; + } +} + void __init_or_module apply_paravirt(struct paravirt_patch_site *start, struct paravirt_patch_site *end) { @@ -1332,6 +1469,35 @@ static noinline void __init int3_selftest(void) unregister_die_notifier(&int3_exception_nb); } +static __initdata int __alt_reloc_selftest_addr; + +__visible noinline void __init __alt_reloc_selftest(void *arg) +{ + WARN_ON(arg != &__alt_reloc_selftest_addr); +} + +static noinline void __init alt_reloc_selftest(void) +{ + /* + * Tests apply_relocation(). + * + * This has a relative immediate (CALL) in a place other than the first + * instruction and additionally on x86_64 we get a RIP-relative LEA: + * + * lea 0x0(%rip),%rdi # 5d0: R_X86_64_PC32 .init.data+0x5566c + * call +0 # 5d5: R_X86_64_PLT32 __alt_reloc_selftest-0x4 + * + * Getting this wrong will either crash and burn or tickle the WARN + * above. + */ + asm_inline volatile ( + ALTERNATIVE("", "lea %[mem], %%" _ASM_ARG1 "; call __alt_reloc_selftest;", X86_FEATURE_ALWAYS) + : /* output */ + : [mem] "m" (__alt_reloc_selftest_addr) + : _ASM_ARG1 + ); +} + void __init alternative_instructions(void) { int3_selftest(); @@ -1419,6 +1585,8 @@ void __init alternative_instructions(void) restart_nmi(); alternatives_patched = 1; + + alt_reloc_selftest(); } /** @@ -1799,7 +1967,7 @@ struct bp_patching_desc *try_get_desc(void) { struct bp_patching_desc *desc = &bp_desc; - if (!arch_atomic_inc_not_zero(&desc->refs)) + if (!raw_atomic_inc_not_zero(&desc->refs)) return NULL; return desc; @@ -1810,7 +1978,7 @@ static __always_inline void put_desc(void) struct bp_patching_desc *desc = &bp_desc; smp_mb__before_atomic(); - arch_atomic_dec(&desc->refs); + raw_atomic_dec(&desc->refs); } static __always_inline void *text_poke_addr(struct text_poke_loc *tp) @@ -1954,6 +2122,16 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries atomic_set_release(&bp_desc.refs, 1); /* + * Function tracing can enable thousands of places that need to be + * updated. This can take quite some time, and with full kernel debugging + * enabled, this could cause the softlockup watchdog to trigger. + * This function gets called every 256 entries added to be patched. + * Call cond_resched() here to make sure that other tasks can get scheduled + * while processing all the functions being patched. + */ + cond_resched(); + + /* * Corresponding read barrier in int3 notifier for making sure the * nr_entries and handler are correctly ordered wrt. patching. */ diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 7e331e8f3692..035a3db5330b 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -15,28 +15,31 @@ #include <linux/pci_ids.h> #include <asm/amd_nb.h> -#define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450 -#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0 -#define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480 -#define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630 -#define PCI_DEVICE_ID_AMD_17H_MA0H_ROOT 0x14b5 -#define PCI_DEVICE_ID_AMD_19H_M10H_ROOT 0x14a4 -#define PCI_DEVICE_ID_AMD_19H_M60H_ROOT 0x14d8 -#define PCI_DEVICE_ID_AMD_19H_M70H_ROOT 0x14e8 -#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 -#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec -#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494 -#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c -#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444 -#define PCI_DEVICE_ID_AMD_17H_MA0H_DF_F4 0x1728 -#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654 -#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F4 0x14b1 -#define PCI_DEVICE_ID_AMD_19H_M40H_ROOT 0x14b5 -#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F4 0x167d -#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e -#define PCI_DEVICE_ID_AMD_19H_M60H_DF_F4 0x14e4 -#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4 -#define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc +#define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450 +#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0 +#define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480 +#define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630 +#define PCI_DEVICE_ID_AMD_17H_MA0H_ROOT 0x14b5 +#define PCI_DEVICE_ID_AMD_19H_M10H_ROOT 0x14a4 +#define PCI_DEVICE_ID_AMD_19H_M40H_ROOT 0x14b5 +#define PCI_DEVICE_ID_AMD_19H_M60H_ROOT 0x14d8 +#define PCI_DEVICE_ID_AMD_19H_M70H_ROOT 0x14e8 +#define PCI_DEVICE_ID_AMD_MI200_ROOT 0x14bb + +#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec +#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494 +#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c +#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444 +#define PCI_DEVICE_ID_AMD_17H_MA0H_DF_F4 0x1728 +#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654 +#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F4 0x14b1 +#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F4 0x167d +#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e +#define PCI_DEVICE_ID_AMD_19H_M60H_DF_F4 0x14e4 +#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4 +#define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc +#define PCI_DEVICE_ID_AMD_MI200_DF_F4 0x14d4 /* Protect the PCI config register pairs used for SMN. */ static DEFINE_MUTEX(smn_mutex); @@ -53,6 +56,7 @@ static const struct pci_device_id amd_root_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_ROOT) }, {} }; @@ -81,6 +85,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F3) }, {} }; @@ -101,6 +106,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F4) }, {} }; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 770557110051..af49e24b46a4 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -101,6 +101,9 @@ static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP; */ static bool virt_ext_dest_id __ro_after_init; +/* For parallel bootup. */ +unsigned long apic_mmio_base __ro_after_init; + /* * Map cpu index to physical APIC ID */ @@ -2163,6 +2166,7 @@ void __init register_lapic_address(unsigned long address) if (!x2apic_mode) { set_fixmap_nocache(FIX_APIC_BASE, address); + apic_mmio_base = APIC_BASE; apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", APIC_BASE, address); } @@ -2376,7 +2380,7 @@ static int nr_logical_cpuids = 1; /* * Used to store mapping between logical CPU IDs and APIC IDs. */ -static int cpuid_to_apicid[] = { +int cpuid_to_apicid[] = { [0 ... NR_CPUS - 1] = -1, }; @@ -2386,20 +2390,31 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id) } #ifdef CONFIG_SMP -/** - * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread - * @apicid: APIC ID to check +static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) +{ + /* Isolate the SMT bit(s) in the APICID and check for 0 */ + u32 mask = (1U << (fls(smp_num_siblings) - 1)) - 1; + + if (smp_num_siblings == 1 || !(apicid & mask)) + cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); +} + +/* + * Due to the utter mess of CPUID evaluation smp_num_siblings is not valid + * during early boot. Initialize the primary thread mask before SMP + * bringup. */ -bool apic_id_is_primary_thread(unsigned int apicid) +static int __init smp_init_primary_thread_mask(void) { - u32 mask; + unsigned int cpu; - if (smp_num_siblings == 1) - return true; - /* Isolate the SMT bit(s) in the APICID and check for 0 */ - mask = (1U << (fls(smp_num_siblings) - 1)) - 1; - return !(apicid & mask); + for (cpu = 0; cpu < nr_logical_cpuids; cpu++) + cpu_mark_primary_thread(cpu, cpuid_to_apicid[cpu]); + return 0; } +early_initcall(smp_init_primary_thread_mask); +#else +static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } #endif /* @@ -2544,6 +2559,9 @@ int generic_processor_info(int apicid, int version) set_cpu_present(cpu, true); num_processors++; + if (system_state != SYSTEM_BOOTING) + cpu_mark_primary_thread(cpu, apicid); + return cpu; } diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 6bde05a86b4e..896bc41cb2ba 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -97,7 +97,10 @@ static void init_x2apic_ldr(void) static int x2apic_phys_probe(void) { - if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys())) + if (!x2apic_mode) + return 0; + + if (x2apic_phys || x2apic_fadt_phys()) return 1; return apic == &apic_x2apic_phys; diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 482855227964..d9384d5b4b8e 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -546,7 +546,6 @@ unsigned long sn_rtc_cycles_per_second; EXPORT_SYMBOL(sn_rtc_cycles_per_second); /* The following values are used for the per node hub info struct */ -static __initdata unsigned short *_node_to_pnode; static __initdata unsigned short _min_socket, _max_socket; static __initdata unsigned short _min_pnode, _max_pnode, _gr_table_len; static __initdata struct uv_gam_range_entry *uv_gre_table; @@ -554,6 +553,7 @@ static __initdata struct uv_gam_parameters *uv_gp_table; static __initdata unsigned short *_socket_to_node; static __initdata unsigned short *_socket_to_pnode; static __initdata unsigned short *_pnode_to_socket; +static __initdata unsigned short *_node_to_socket; static __initdata struct uv_gam_range_s *_gr_table; @@ -617,7 +617,8 @@ static __init void build_uv_gr_table(void) bytes = _gr_table_len * sizeof(struct uv_gam_range_s); grt = kzalloc(bytes, GFP_KERNEL); - BUG_ON(!grt); + if (WARN_ON_ONCE(!grt)) + return; _gr_table = grt; for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { @@ -1022,7 +1023,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index, switch (index) { case UVY_MMIOH0: mmr = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0; - nasid_mask = UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK; + nasid_mask = UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK; n = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH; min_nasid = min_pnode; max_nasid = max_pnode; @@ -1030,7 +1031,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index, break; case UVY_MMIOH1: mmr = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1; - nasid_mask = UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK; + nasid_mask = UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK; n = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH; min_nasid = min_pnode; max_nasid = max_pnode; @@ -1038,7 +1039,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index, break; case UVX_MMIOH0: mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0; - nasid_mask = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK; + nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK; n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH; min_nasid = min_pnode * 2; max_nasid = max_pnode * 2; @@ -1046,7 +1047,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index, break; case UVX_MMIOH1: mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1; - nasid_mask = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK; + nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK; n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH; min_nasid = min_pnode * 2; max_nasid = max_pnode * 2; @@ -1072,8 +1073,9 @@ static void __init calc_mmioh_map(enum mmioh_arch index, /* Invalid NASID check */ if (nasid < min_nasid || max_nasid < nasid) { - pr_err("UV:%s:Invalid NASID:%x (range:%x..%x)\n", - __func__, index, min_nasid, max_nasid); + /* Not an error: unused table entries get "poison" values */ + pr_debug("UV:%s:Invalid NASID(%x):%x (range:%x..%x)\n", + __func__, index, nasid, min_nasid, max_nasid); nasid = -1; } @@ -1292,6 +1294,7 @@ static void __init uv_init_hub_info(struct uv_hub_info_s *hi) hi->nasid_shift = uv_cpuid.nasid_shift; hi->min_pnode = _min_pnode; hi->min_socket = _min_socket; + hi->node_to_socket = _node_to_socket; hi->pnode_to_socket = _pnode_to_socket; hi->socket_to_node = _socket_to_node; hi->socket_to_pnode = _socket_to_pnode; @@ -1348,7 +1351,7 @@ static void __init decode_gam_rng_tbl(unsigned long ptr) struct uv_gam_range_entry *gre = (struct uv_gam_range_entry *)ptr; unsigned long lgre = 0, gend = 0; int index = 0; - int sock_min = 999999, pnode_min = 99999; + int sock_min = INT_MAX, pnode_min = INT_MAX; int sock_max = -1, pnode_max = -1; uv_gre_table = gre; @@ -1459,11 +1462,37 @@ static int __init decode_uv_systab(void) return 0; } +/* + * Given a bitmask 'bits' representing presnt blades, numbered + * starting at 'base', masking off unused high bits of blade number + * with 'mask', update the minimum and maximum blade numbers that we + * have found. (Masking with 'mask' necessary because of BIOS + * treatment of system partitioning when creating this table we are + * interpreting.) + */ +static inline void blade_update_min_max(unsigned long bits, int base, int mask, int *min, int *max) +{ + int first, last; + + if (!bits) + return; + first = (base + __ffs(bits)) & mask; + last = (base + __fls(bits)) & mask; + + if (*min > first) + *min = first; + if (*max < last) + *max = last; +} + /* Set up physical blade translations from UVH_NODE_PRESENT_TABLE */ static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info) { unsigned long np; int i, uv_pb = 0; + int sock_min = INT_MAX, sock_max = -1, s_mask; + + s_mask = (1 << uv_cpuid.n_skt) - 1; if (UVH_NODE_PRESENT_TABLE) { pr_info("UV: NODE_PRESENT_DEPTH = %d\n", @@ -1471,35 +1500,82 @@ static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info) for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) { np = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8); pr_info("UV: NODE_PRESENT(%d) = 0x%016lx\n", i, np); - uv_pb += hweight64(np); + blade_update_min_max(np, i * 64, s_mask, &sock_min, &sock_max); } } if (UVH_NODE_PRESENT_0) { np = uv_read_local_mmr(UVH_NODE_PRESENT_0); pr_info("UV: NODE_PRESENT_0 = 0x%016lx\n", np); - uv_pb += hweight64(np); + blade_update_min_max(np, 0, s_mask, &sock_min, &sock_max); } if (UVH_NODE_PRESENT_1) { np = uv_read_local_mmr(UVH_NODE_PRESENT_1); pr_info("UV: NODE_PRESENT_1 = 0x%016lx\n", np); - uv_pb += hweight64(np); + blade_update_min_max(np, 64, s_mask, &sock_min, &sock_max); + } + + /* Only update if we actually found some bits indicating blades present */ + if (sock_max >= sock_min) { + _min_socket = sock_min; + _max_socket = sock_max; + uv_pb = sock_max - sock_min + 1; } if (uv_possible_blades != uv_pb) uv_possible_blades = uv_pb; - pr_info("UV: number nodes/possible blades %d\n", uv_pb); + pr_info("UV: number nodes/possible blades %d (%d - %d)\n", + uv_pb, sock_min, sock_max); +} + +static int __init alloc_conv_table(int num_elem, unsigned short **table) +{ + int i; + size_t bytes; + + bytes = num_elem * sizeof(*table[0]); + *table = kmalloc(bytes, GFP_KERNEL); + if (WARN_ON_ONCE(!*table)) + return -ENOMEM; + for (i = 0; i < num_elem; i++) + ((unsigned short *)*table)[i] = SOCK_EMPTY; + return 0; } +/* Remove conversion table if it's 1:1 */ +#define FREE_1_TO_1_TABLE(tbl, min, max, max2) free_1_to_1_table(&tbl, #tbl, min, max, max2) + +static void __init free_1_to_1_table(unsigned short **tp, char *tname, int min, int max, int max2) +{ + int i; + unsigned short *table = *tp; + + if (table == NULL) + return; + if (max != max2) + return; + for (i = 0; i < max; i++) { + if (i != table[i]) + return; + } + kfree(table); + *tp = NULL; + pr_info("UV: %s is 1:1, conversion table removed\n", tname); +} + +/* + * Build Socket Tables + * If the number of nodes is >1 per socket, socket to node table will + * contain lowest node number on that socket. + */ static void __init build_socket_tables(void) { struct uv_gam_range_entry *gre = uv_gre_table; - int num, nump; + int nums, numn, nump; int cpu, i, lnid; int minsock = _min_socket; int maxsock = _max_socket; int minpnode = _min_pnode; int maxpnode = _max_pnode; - size_t bytes; if (!gre) { if (is_uv2_hub() || is_uv3_hub()) { @@ -1507,39 +1583,36 @@ static void __init build_socket_tables(void) return; } pr_err("UV: Error: UVsystab address translations not available!\n"); - BUG(); + WARN_ON_ONCE(!gre); + return; } - /* Build socket id -> node id, pnode */ - num = maxsock - minsock + 1; - bytes = num * sizeof(_socket_to_node[0]); - _socket_to_node = kmalloc(bytes, GFP_KERNEL); - _socket_to_pnode = kmalloc(bytes, GFP_KERNEL); - + numn = num_possible_nodes(); nump = maxpnode - minpnode + 1; - bytes = nump * sizeof(_pnode_to_socket[0]); - _pnode_to_socket = kmalloc(bytes, GFP_KERNEL); - BUG_ON(!_socket_to_node || !_socket_to_pnode || !_pnode_to_socket); - - for (i = 0; i < num; i++) - _socket_to_node[i] = _socket_to_pnode[i] = SOCK_EMPTY; - - for (i = 0; i < nump; i++) - _pnode_to_socket[i] = SOCK_EMPTY; + nums = maxsock - minsock + 1; + + /* Allocate and clear tables */ + if ((alloc_conv_table(nump, &_pnode_to_socket) < 0) + || (alloc_conv_table(nums, &_socket_to_pnode) < 0) + || (alloc_conv_table(numn, &_node_to_socket) < 0) + || (alloc_conv_table(nums, &_socket_to_node) < 0)) { + kfree(_pnode_to_socket); + kfree(_socket_to_pnode); + kfree(_node_to_socket); + return; + } /* Fill in pnode/node/addr conversion list values: */ - pr_info("UV: GAM Building socket/pnode conversion tables\n"); for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { if (gre->type == UV_GAM_RANGE_TYPE_HOLE) continue; i = gre->sockid - minsock; - /* Duplicate: */ - if (_socket_to_pnode[i] != SOCK_EMPTY) - continue; - _socket_to_pnode[i] = gre->pnode; + if (_socket_to_pnode[i] == SOCK_EMPTY) + _socket_to_pnode[i] = gre->pnode; i = gre->pnode - minpnode; - _pnode_to_socket[i] = gre->sockid; + if (_pnode_to_socket[i] == SOCK_EMPTY) + _pnode_to_socket[i] = gre->sockid; pr_info("UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n", gre->sockid, gre->type, gre->nasid, @@ -1549,66 +1622,39 @@ static void __init build_socket_tables(void) /* Set socket -> node values: */ lnid = NUMA_NO_NODE; - for_each_present_cpu(cpu) { + for_each_possible_cpu(cpu) { int nid = cpu_to_node(cpu); int apicid, sockid; if (lnid == nid) continue; lnid = nid; + apicid = per_cpu(x86_cpu_to_apicid, cpu); sockid = apicid >> uv_cpuid.socketid_shift; - _socket_to_node[sockid - minsock] = nid; - pr_info("UV: sid:%02x: apicid:%04x node:%2d\n", - sockid, apicid, nid); - } - /* Set up physical blade to pnode translation from GAM Range Table: */ - bytes = num_possible_nodes() * sizeof(_node_to_pnode[0]); - _node_to_pnode = kmalloc(bytes, GFP_KERNEL); - BUG_ON(!_node_to_pnode); + if (_socket_to_node[sockid - minsock] == SOCK_EMPTY) + _socket_to_node[sockid - minsock] = nid; - for (lnid = 0; lnid < num_possible_nodes(); lnid++) { - unsigned short sockid; + if (_node_to_socket[nid] == SOCK_EMPTY) + _node_to_socket[nid] = sockid; - for (sockid = minsock; sockid <= maxsock; sockid++) { - if (lnid == _socket_to_node[sockid - minsock]) { - _node_to_pnode[lnid] = _socket_to_pnode[sockid - minsock]; - break; - } - } - if (sockid > maxsock) { - pr_err("UV: socket for node %d not found!\n", lnid); - BUG(); - } + pr_info("UV: sid:%02x: apicid:%04x socket:%02d node:%03x s2n:%03x\n", + sockid, + apicid, + _node_to_socket[nid], + nid, + _socket_to_node[sockid - minsock]); } /* - * If socket id == pnode or socket id == node for all nodes, + * If e.g. socket id == pnode for all pnodes, * system runs faster by removing corresponding conversion table. */ - pr_info("UV: Checking socket->node/pnode for identity maps\n"); - if (minsock == 0) { - for (i = 0; i < num; i++) - if (_socket_to_node[i] == SOCK_EMPTY || i != _socket_to_node[i]) - break; - if (i >= num) { - kfree(_socket_to_node); - _socket_to_node = NULL; - pr_info("UV: 1:1 socket_to_node table removed\n"); - } - } - if (minsock == minpnode) { - for (i = 0; i < num; i++) - if (_socket_to_pnode[i] != SOCK_EMPTY && - _socket_to_pnode[i] != i + minpnode) - break; - if (i >= num) { - kfree(_socket_to_pnode); - _socket_to_pnode = NULL; - pr_info("UV: 1:1 socket_to_pnode table removed\n"); - } - } + FREE_1_TO_1_TABLE(_socket_to_node, _min_socket, nums, numn); + FREE_1_TO_1_TABLE(_node_to_socket, _min_socket, nums, numn); + FREE_1_TO_1_TABLE(_socket_to_pnode, _min_pnode, nums, nump); + FREE_1_TO_1_TABLE(_pnode_to_socket, _min_pnode, nums, nump); } /* Check which reboot to use */ @@ -1692,12 +1738,13 @@ static __init int uv_system_init_hubless(void) static void __init uv_system_init_hub(void) { struct uv_hub_info_s hub_info = {0}; - int bytes, cpu, nodeid; - unsigned short min_pnode = 9999, max_pnode = 0; + int bytes, cpu, nodeid, bid; + unsigned short min_pnode = USHRT_MAX, max_pnode = 0; char *hub = is_uv5_hub() ? "UV500" : is_uv4_hub() ? "UV400" : is_uv3_hub() ? "UV300" : is_uv2_hub() ? "UV2000/3000" : NULL; + struct uv_hub_info_s **uv_hub_info_list_blade; if (!hub) { pr_err("UV: Unknown/unsupported UV hub\n"); @@ -1720,9 +1767,12 @@ static void __init uv_system_init_hub(void) build_uv_gr_table(); set_block_size(); uv_init_hub_info(&hub_info); - uv_possible_blades = num_possible_nodes(); - if (!_node_to_pnode) + /* If UV2 or UV3 may need to get # blades from HW */ + if (is_uv(UV2|UV3) && !uv_gre_table) boot_init_possible_blades(&hub_info); + else + /* min/max sockets set in decode_gam_rng_tbl */ + uv_possible_blades = (_max_socket - _min_socket) + 1; /* uv_num_possible_blades() is really the hub count: */ pr_info("UV: Found %d hubs, %d nodes, %d CPUs\n", uv_num_possible_blades(), num_possible_nodes(), num_possible_cpus()); @@ -1731,79 +1781,98 @@ static void __init uv_system_init_hub(void) hub_info.coherency_domain_number = sn_coherency_id; uv_rtc_init(); + /* + * __uv_hub_info_list[] is indexed by node, but there is only + * one hub_info structure per blade. First, allocate one + * structure per blade. Further down we create a per-node + * table (__uv_hub_info_list[]) pointing to hub_info + * structures for the correct blade. + */ + bytes = sizeof(void *) * uv_num_possible_blades(); - __uv_hub_info_list = kzalloc(bytes, GFP_KERNEL); - BUG_ON(!__uv_hub_info_list); + uv_hub_info_list_blade = kzalloc(bytes, GFP_KERNEL); + if (WARN_ON_ONCE(!uv_hub_info_list_blade)) + return; bytes = sizeof(struct uv_hub_info_s); - for_each_node(nodeid) { + for_each_possible_blade(bid) { struct uv_hub_info_s *new_hub; - if (__uv_hub_info_list[nodeid]) { - pr_err("UV: Node %d UV HUB already initialized!?\n", nodeid); - BUG(); + /* Allocate & fill new per hub info list */ + new_hub = (bid == 0) ? &uv_hub_info_node0 + : kzalloc_node(bytes, GFP_KERNEL, uv_blade_to_node(bid)); + if (WARN_ON_ONCE(!new_hub)) { + /* do not kfree() bid 0, which is statically allocated */ + while (--bid > 0) + kfree(uv_hub_info_list_blade[bid]); + kfree(uv_hub_info_list_blade); + return; } - /* Allocate new per hub info list */ - new_hub = (nodeid == 0) ? &uv_hub_info_node0 : kzalloc_node(bytes, GFP_KERNEL, nodeid); - BUG_ON(!new_hub); - __uv_hub_info_list[nodeid] = new_hub; - new_hub = uv_hub_info_list(nodeid); - BUG_ON(!new_hub); + uv_hub_info_list_blade[bid] = new_hub; *new_hub = hub_info; /* Use information from GAM table if available: */ - if (_node_to_pnode) - new_hub->pnode = _node_to_pnode[nodeid]; + if (uv_gre_table) + new_hub->pnode = uv_blade_to_pnode(bid); else /* Or fill in during CPU loop: */ new_hub->pnode = 0xffff; - new_hub->numa_blade_id = uv_node_to_blade_id(nodeid); + new_hub->numa_blade_id = bid; new_hub->memory_nid = NUMA_NO_NODE; new_hub->nr_possible_cpus = 0; new_hub->nr_online_cpus = 0; } + /* + * Now populate __uv_hub_info_list[] for each node with the + * pointer to the struct for the blade it resides on. + */ + + bytes = sizeof(void *) * num_possible_nodes(); + __uv_hub_info_list = kzalloc(bytes, GFP_KERNEL); + if (WARN_ON_ONCE(!__uv_hub_info_list)) { + for_each_possible_blade(bid) + /* bid 0 is statically allocated */ + if (bid != 0) + kfree(uv_hub_info_list_blade[bid]); + kfree(uv_hub_info_list_blade); + return; + } + + for_each_node(nodeid) + __uv_hub_info_list[nodeid] = uv_hub_info_list_blade[uv_node_to_blade_id(nodeid)]; + /* Initialize per CPU info: */ for_each_possible_cpu(cpu) { - int apicid = per_cpu(x86_cpu_to_apicid, cpu); - int numa_node_id; + int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); + unsigned short bid; unsigned short pnode; - nodeid = cpu_to_node(cpu); - numa_node_id = numa_cpu_node(cpu); pnode = uv_apicid_to_pnode(apicid); + bid = uv_pnode_to_socket(pnode) - _min_socket; - uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list(nodeid); + uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list_blade[bid]; uv_cpu_info_per(cpu)->blade_cpu_id = uv_cpu_hub_info(cpu)->nr_possible_cpus++; if (uv_cpu_hub_info(cpu)->memory_nid == NUMA_NO_NODE) uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu); - /* Init memoryless node: */ - if (nodeid != numa_node_id && - uv_hub_info_list(numa_node_id)->pnode == 0xffff) - uv_hub_info_list(numa_node_id)->pnode = pnode; - else if (uv_cpu_hub_info(cpu)->pnode == 0xffff) + if (uv_cpu_hub_info(cpu)->pnode == 0xffff) uv_cpu_hub_info(cpu)->pnode = pnode; } - for_each_node(nodeid) { - unsigned short pnode = uv_hub_info_list(nodeid)->pnode; + for_each_possible_blade(bid) { + unsigned short pnode = uv_hub_info_list_blade[bid]->pnode; - /* Add pnode info for pre-GAM list nodes without CPUs: */ - if (pnode == 0xffff) { - unsigned long paddr; + if (pnode == 0xffff) + continue; - paddr = node_start_pfn(nodeid) << PAGE_SHIFT; - pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr)); - uv_hub_info_list(nodeid)->pnode = pnode; - } min_pnode = min(pnode, min_pnode); max_pnode = max(pnode, max_pnode); - pr_info("UV: UVHUB node:%2d pn:%02x nrcpus:%d\n", - nodeid, - uv_hub_info_list(nodeid)->pnode, - uv_hub_info_list(nodeid)->nr_possible_cpus); + pr_info("UV: HUB:%2d pn:%02x nrcpus:%d\n", + bid, + uv_hub_info_list_blade[bid]->pnode, + uv_hub_info_list_blade[bid]->nr_possible_cpus); } pr_info("UV: min_pnode:%02x max_pnode:%02x\n", min_pnode, max_pnode); @@ -1811,6 +1880,9 @@ static void __init uv_system_init_hub(void) map_mmr_high(max_pnode); map_mmioh_high(min_pnode, max_pnode); + kfree(uv_hub_info_list_blade); + uv_hub_info_list_blade = NULL; + uv_nmi_setup(); uv_cpu_init(); uv_setup_proc_files(0); diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index 22ab13966427..c06bfc086565 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -133,8 +133,8 @@ static bool skip_addr(void *dest) /* Accounts directly */ if (dest == ret_from_fork) return true; -#ifdef CONFIG_HOTPLUG_CPU - if (dest == start_cpu0) +#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_AMD_MEM_ENCRYPT) + if (dest == soft_restart_cpu) return true; #endif #ifdef CONFIG_FUNCTION_TRACER @@ -293,7 +293,8 @@ void *callthunks_translate_call_dest(void *dest) return target ? : dest; } -bool is_callthunk(void *addr) +#ifdef CONFIG_BPF_JIT +static bool is_callthunk(void *addr) { unsigned int tmpl_size = SKL_TMPL_SIZE; void *tmpl = skl_call_thunk_template; @@ -306,7 +307,6 @@ bool is_callthunk(void *addr) return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size); } -#ifdef CONFIG_BPF_JIT int x86_call_depth_emit_accounting(u8 **pprog, void *func) { unsigned int tmpl_size = SKL_TMPL_SIZE; diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index d7e3ceaf75c1..4350f6bfc064 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -27,7 +27,7 @@ obj-y += cpuid-deps.o obj-y += umwait.o obj-$(CONFIG_PROC_FS) += proc.o -obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o +obj-y += capflags.o powerflags.o obj-$(CONFIG_IA32_FEAT_CTL) += feat_ctl.o ifdef CONFIG_CPU_SUP_INTEL @@ -54,7 +54,6 @@ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o obj-$(CONFIG_ACRN_GUEST) += acrn.o -ifdef CONFIG_X86_FEATURE_NAMES quiet_cmd_mkcapflags = MKCAP $@ cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $@ $^ @@ -63,5 +62,4 @@ vmxfeature = $(src)/../../include/asm/vmxfeatures.h $(obj)/capflags.c: $(cpufeature) $(vmxfeature) $(src)/mkcapflags.sh FORCE $(call if_changed,mkcapflags) -endif targets += capflags.c diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 182af64387d0..9e2a91830f72 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -9,7 +9,6 @@ * - Andrew D. Balsa (code cleanup). */ #include <linux/init.h> -#include <linux/utsname.h> #include <linux/cpu.h> #include <linux/module.h> #include <linux/nospec.h> @@ -27,8 +26,6 @@ #include <asm/msr.h> #include <asm/vmx.h> #include <asm/paravirt.h> -#include <asm/alternative.h> -#include <asm/set_memory.h> #include <asm/intel-family.h> #include <asm/e820/api.h> #include <asm/hypervisor.h> @@ -125,21 +122,8 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear); EXPORT_SYMBOL_GPL(mmio_stale_data_clear); -void __init check_bugs(void) +void __init cpu_select_mitigations(void) { - identify_boot_cpu(); - - /* - * identify_boot_cpu() initialized SMT support information, let the - * core code know. - */ - cpu_smt_check_topology(); - - if (!IS_ENABLED(CONFIG_SMP)) { - pr_info("CPU: "); - print_cpu_info(&boot_cpu_data); - } - /* * Read the SPEC_CTRL MSR to account for reserved bits which may * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD @@ -176,39 +160,6 @@ void __init check_bugs(void) md_clear_select_mitigation(); srbds_select_mitigation(); l1d_flush_select_mitigation(); - - arch_smt_update(); - -#ifdef CONFIG_X86_32 - /* - * Check whether we are able to run this kernel safely on SMP. - * - * - i386 is no longer supported. - * - In order to run on anything without a TSC, we need to be - * compiled for a i486. - */ - if (boot_cpu_data.x86 < 4) - panic("Kernel requires i486+ for 'invlpg' and other features"); - - init_utsname()->machine[1] = - '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); - alternative_instructions(); - - fpu__init_check_bugs(); -#else /* CONFIG_X86_64 */ - alternative_instructions(); - - /* - * Make sure the first 2MB area is not mapped by huge pages - * There are typically fixed size MTRRs in there and overlapping - * MTRRs into large pages causes slow downs. - * - * Right now we don't do that with gbpages because there seems - * very little benefit for that case. - */ - if (!direct_gbpages) - set_memory_4k((unsigned long)__va(0), 1); -#endif } /* diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 4063e8991211..8f86eacf69f7 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -39,6 +39,8 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); /* Shared L2 cache maps */ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); +static cpumask_var_t cpu_cacheinfo_mask; + /* Kernel controls MTRR and/or PAT MSRs. */ unsigned int memory_caching_control __ro_after_init; @@ -1172,8 +1174,10 @@ void cache_bp_restore(void) cache_cpu_init(); } -static int cache_ap_init(unsigned int cpu) +static int cache_ap_online(unsigned int cpu) { + cpumask_set_cpu(cpu, cpu_cacheinfo_mask); + if (!memory_caching_control || get_cache_aps_delayed_init()) return 0; @@ -1191,11 +1195,17 @@ static int cache_ap_init(unsigned int cpu) * lock to prevent MTRR entry changes */ stop_machine_from_inactive_cpu(cache_rendezvous_handler, NULL, - cpu_callout_mask); + cpu_cacheinfo_mask); return 0; } +static int cache_ap_offline(unsigned int cpu) +{ + cpumask_clear_cpu(cpu, cpu_cacheinfo_mask); + return 0; +} + /* * Delayed cache initialization for all AP's */ @@ -1210,9 +1220,12 @@ void cache_aps_init(void) static int __init cache_ap_register(void) { + zalloc_cpumask_var(&cpu_cacheinfo_mask, GFP_KERNEL); + cpumask_set_cpu(smp_processor_id(), cpu_cacheinfo_mask); + cpuhp_setup_state_nocalls(CPUHP_AP_CACHECTRL_STARTING, "x86/cachectrl:starting", - cache_ap_init, NULL); + cache_ap_online, cache_ap_offline); return 0; } -core_initcall(cache_ap_register); +early_initcall(cache_ap_register); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80710a68ef7d..52683fddafaf 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -18,12 +18,16 @@ #include <linux/init.h> #include <linux/kprobes.h> #include <linux/kgdb.h> +#include <linux/mem_encrypt.h> #include <linux/smp.h> +#include <linux/cpu.h> #include <linux/io.h> #include <linux/syscore_ops.h> #include <linux/pgtable.h> #include <linux/stackprotector.h> +#include <linux/utsname.h> +#include <asm/alternative.h> #include <asm/cmdline.h> #include <asm/perf_event.h> #include <asm/mmu_context.h> @@ -59,7 +63,7 @@ #include <asm/intel-family.h> #include <asm/cpu_device_id.h> #include <asm/uv/uv.h> -#include <asm/sigframe.h> +#include <asm/set_memory.h> #include <asm/traps.h> #include <asm/sev.h> @@ -67,14 +71,6 @@ u32 elf_hwcap2 __read_mostly; -/* all of these masks are initialized in setup_cpu_local_masks() */ -cpumask_var_t cpu_initialized_mask; -cpumask_var_t cpu_callout_mask; -cpumask_var_t cpu_callin_mask; - -/* representing cpus for which sibling maps can be computed */ -cpumask_var_t cpu_sibling_setup_mask; - /* Number of siblings per CPU package */ int smp_num_siblings = 1; EXPORT_SYMBOL(smp_num_siblings); @@ -169,15 +165,6 @@ clear_ppin: clear_cpu_cap(c, info->feature); } -/* correctly size the local cpu masks */ -void __init setup_cpu_local_masks(void) -{ - alloc_bootmem_cpumask_var(&cpu_initialized_mask); - alloc_bootmem_cpumask_var(&cpu_callin_mask); - alloc_bootmem_cpumask_var(&cpu_callout_mask); - alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); -} - static void default_init(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_64 @@ -1502,12 +1489,10 @@ static void __init cpu_parse_early_param(void) if (!kstrtouint(opt, 10, &bit)) { if (bit < NCAPINTS * 32) { -#ifdef CONFIG_X86_FEATURE_NAMES /* empty-string, i.e., ""-defined feature flags */ if (!x86_cap_flags[bit]) pr_cont(" " X86_CAP_FMT_NUM, x86_cap_flag_num(bit)); else -#endif pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit)); setup_clear_cpu_cap(bit); @@ -1520,7 +1505,6 @@ static void __init cpu_parse_early_param(void) continue; } -#ifdef CONFIG_X86_FEATURE_NAMES for (bit = 0; bit < 32 * NCAPINTS; bit++) { if (!x86_cap_flag(bit)) continue; @@ -1537,7 +1521,6 @@ static void __init cpu_parse_early_param(void) if (!found) pr_cont(" (unknown: %s)", opt); -#endif } pr_cont("\n"); @@ -1600,10 +1583,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) sld_setup(c); - fpu__init_system(c); - - init_sigframe_size(); - #ifdef CONFIG_X86_32 /* * Regardless of whether PCID is enumerated, the SDM says @@ -2123,19 +2102,6 @@ static void dbg_restore_debug_regs(void) #define dbg_restore_debug_regs() #endif /* ! CONFIG_KGDB */ -static void wait_for_master_cpu(int cpu) -{ -#ifdef CONFIG_SMP - /* - * wait for ACK from master CPU before continuing - * with AP initialization - */ - WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)); - while (!cpumask_test_cpu(cpu, cpu_callout_mask)) - cpu_relax(); -#endif -} - static inline void setup_getcpu(int cpu) { unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu)); @@ -2158,11 +2124,7 @@ static inline void setup_getcpu(int cpu) } #ifdef CONFIG_X86_64 -static inline void ucode_cpu_init(int cpu) -{ - if (cpu) - load_ucode_ap(); -} +static inline void ucode_cpu_init(int cpu) { } static inline void tss_setup_ist(struct tss_struct *tss) { @@ -2239,8 +2201,6 @@ void cpu_init(void) struct task_struct *cur = current; int cpu = raw_smp_processor_id(); - wait_for_master_cpu(cpu); - ucode_cpu_init(cpu); #ifdef CONFIG_NUMA @@ -2285,26 +2245,12 @@ void cpu_init(void) doublefault_init_cpu_tss(); - fpu__init_cpu(); - if (is_uv_system()) uv_cpu_init(); load_fixmap_gdt(cpu); } -#ifdef CONFIG_SMP -void cpu_init_secondary(void) -{ - /* - * Relies on the BP having set-up the IDT tables, which are loaded - * on this CPU in cpu_init_exception_handling(). - */ - cpu_init_exception_handling(); - cpu_init(); -} -#endif - #ifdef CONFIG_MICROCODE_LATE_LOADING /** * store_cpu_caps() - Store a snapshot of CPU capabilities @@ -2362,3 +2308,69 @@ void arch_smt_update(void) /* Check whether IPI broadcasting can be enabled */ apic_smt_update(); } + +void __init arch_cpu_finalize_init(void) +{ + identify_boot_cpu(); + + /* + * identify_boot_cpu() initialized SMT support information, let the + * core code know. + */ + cpu_smt_check_topology(); + + if (!IS_ENABLED(CONFIG_SMP)) { + pr_info("CPU: "); + print_cpu_info(&boot_cpu_data); + } + + cpu_select_mitigations(); + + arch_smt_update(); + + if (IS_ENABLED(CONFIG_X86_32)) { + /* + * Check whether this is a real i386 which is not longer + * supported and fixup the utsname. + */ + if (boot_cpu_data.x86 < 4) + panic("Kernel requires i486+ for 'invlpg' and other features"); + + init_utsname()->machine[1] = + '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); + } + + /* + * Must be before alternatives because it might set or clear + * feature bits. + */ + fpu__init_system(); + fpu__init_cpu(); + + alternative_instructions(); + + if (IS_ENABLED(CONFIG_X86_64)) { + /* + * Make sure the first 2MB area is not mapped by huge pages + * There are typically fixed size MTRRs in there and overlapping + * MTRRs into large pages causes slow downs. + * + * Right now we don't do that with gbpages because there seems + * very little benefit for that case. + */ + if (!direct_gbpages) + set_memory_4k((unsigned long)__va(0), 1); + } else { + fpu__init_check_bugs(); + } + + /* + * This needs to be called before any devices perform DMA + * operations that might use the SWIOTLB bounce buffers. It will + * mark the bounce buffers as decrypted so that their usage will + * not cause "plain-text" data to be decrypted when accessed. It + * must be called after late_time_init() so that Hyper-V x86/x64 + * hypercalls work when the SWIOTLB bounce buffers are decrypted. + */ + mem_encrypt_init(); +} diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index f97b0fe13da8..1c44630d4789 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -79,6 +79,7 @@ extern void detect_ht(struct cpuinfo_x86 *c); extern void check_null_seg_clears_base(struct cpuinfo_x86 *c); unsigned int aperfmperf_get_khz(int cpu); +void cpu_select_mitigations(void); extern void x86_spec_ctrl_setup_ap(void); extern void update_srbds_msr(void); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 0b971f974096..5e74610b39e7 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -715,11 +715,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) bool amd_mce_is_memory_error(struct mce *m) { + enum smca_bank_types bank_type; /* ErrCodeExt[20:16] */ u8 xec = (m->status >> 16) & 0x1f; + bank_type = smca_get_bank_type(m->extcpu, m->bank); if (mce_flags.smca) - return smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC && xec == 0x0; + return (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && xec == 0x0; return m->bank == 4 && xec == 0x8; } @@ -1050,7 +1052,7 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol if (bank_type >= N_SMCA_BANK_TYPES) return NULL; - if (b && bank_type == SMCA_UMC) { + if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) { if (b->block < ARRAY_SIZE(smca_umc_block_names)) return smca_umc_block_names[b->block]; return NULL; diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 2eec60f50057..89e2aab5d34d 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1022,12 +1022,12 @@ static noinstr int mce_start(int *no_way_out) if (!timeout) return ret; - arch_atomic_add(*no_way_out, &global_nwo); + raw_atomic_add(*no_way_out, &global_nwo); /* * Rely on the implied barrier below, such that global_nwo * is updated before mce_callin. */ - order = arch_atomic_inc_return(&mce_callin); + order = raw_atomic_inc_return(&mce_callin); arch_cpumask_clear_cpu(smp_processor_id(), &mce_missing_cpus); /* Enable instrumentation around calls to external facilities */ @@ -1036,10 +1036,10 @@ static noinstr int mce_start(int *no_way_out) /* * Wait for everyone. */ - while (arch_atomic_read(&mce_callin) != num_online_cpus()) { + while (raw_atomic_read(&mce_callin) != num_online_cpus()) { if (mce_timed_out(&timeout, "Timeout: Not all CPUs entered broadcast exception handler")) { - arch_atomic_set(&global_nwo, 0); + raw_atomic_set(&global_nwo, 0); goto out; } ndelay(SPINUNIT); @@ -1054,7 +1054,7 @@ static noinstr int mce_start(int *no_way_out) /* * Monarch: Starts executing now, the others wait. */ - arch_atomic_set(&mce_executing, 1); + raw_atomic_set(&mce_executing, 1); } else { /* * Subject: Now start the scanning loop one by one in @@ -1062,10 +1062,10 @@ static noinstr int mce_start(int *no_way_out) * This way when there are any shared banks it will be * only seen by one CPU before cleared, avoiding duplicates. */ - while (arch_atomic_read(&mce_executing) < order) { + while (raw_atomic_read(&mce_executing) < order) { if (mce_timed_out(&timeout, "Timeout: Subject CPUs unable to finish machine check processing")) { - arch_atomic_set(&global_nwo, 0); + raw_atomic_set(&global_nwo, 0); goto out; } ndelay(SPINUNIT); @@ -1075,7 +1075,7 @@ static noinstr int mce_start(int *no_way_out) /* * Cache the global no_way_out state. */ - *no_way_out = arch_atomic_read(&global_nwo); + *no_way_out = raw_atomic_read(&global_nwo); ret = order; @@ -1533,7 +1533,7 @@ noinstr void do_machine_check(struct pt_regs *regs) /* If this triggers there is no way to recover. Die hard. */ BUG_ON(!on_thread_stack() || !user_mode(regs)); - if (kill_current_task) + if (!mce_usable_address(&m)) queue_task_work(&m, msg, kill_me_now); else queue_task_work(&m, msg, kill_me_maybe); diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index f5fdeb1e3606..87208e46f7ed 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -78,8 +78,6 @@ static u16 find_equiv_id(struct equiv_cpu_table *et, u32 sig) if (sig == e->installed_cpu) return e->equiv_cpu; - - e++; } return 0; } @@ -596,11 +594,6 @@ void reload_ucode_amd(unsigned int cpu) } } } -static u16 __find_equiv_id(unsigned int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - return find_equiv_id(&equiv_table, uci->cpu_sig.sig); -} /* * a small, trivial cache of per-family ucode patches @@ -651,9 +644,11 @@ static void free_cache(void) static struct ucode_patch *find_patch(unsigned int cpu) { + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; u16 equiv_id; - equiv_id = __find_equiv_id(cpu); + + equiv_id = find_equiv_id(&equiv_table, uci->cpu_sig.sig); if (!equiv_id) return NULL; @@ -705,7 +700,7 @@ static enum ucode_state apply_microcode_amd(int cpu) rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); /* need to apply patch? */ - if (rev >= mc_amd->hdr.patch_id) { + if (rev > mc_amd->hdr.patch_id) { ret = UCODE_OK; goto out; } diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile index cc4f9f1cb94c..aee4bc5ad496 100644 --- a/arch/x86/kernel/cpu/mtrr/Makefile +++ b/arch/x86/kernel/cpu/mtrr/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y := mtrr.o if.o generic.o cleanup.o -obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o +obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o legacy.o diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c index eff6ac62c0ff..ef3e8e42b782 100644 --- a/arch/x86/kernel/cpu/mtrr/amd.c +++ b/arch/x86/kernel/cpu/mtrr/amd.c @@ -110,7 +110,7 @@ amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) } const struct mtrr_ops amd_mtrr_ops = { - .vendor = X86_VENDOR_AMD, + .var_regs = 2, .set = amd_set_mtrr, .get = amd_get_mtrr, .get_free_region = generic_get_free_region, diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c index b8a74eddde83..6f6c3ae92943 100644 --- a/arch/x86/kernel/cpu/mtrr/centaur.c +++ b/arch/x86/kernel/cpu/mtrr/centaur.c @@ -45,15 +45,6 @@ centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg) return -ENOSPC; } -/* - * Report boot time MCR setups - */ -void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) -{ - centaur_mcr[mcr].low = lo; - centaur_mcr[mcr].high = hi; -} - static void centaur_get_mcr(unsigned int reg, unsigned long *base, unsigned long *size, mtrr_type * type) @@ -112,7 +103,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t } const struct mtrr_ops centaur_mtrr_ops = { - .vendor = X86_VENDOR_CENTAUR, + .var_regs = 8, .set = centaur_set_mcr, .get = centaur_get_mcr, .get_free_region = centaur_get_free_region, diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index b5f43049fa5f..18cf79d6e2c5 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -55,9 +55,6 @@ static int __initdata nr_range; static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; -static int __initdata debug_print; -#define Dprintk(x...) do { if (debug_print) pr_debug(x); } while (0) - #define BIOS_BUG_MSG \ "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" @@ -79,12 +76,11 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range, nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, base, base + size); } - if (debug_print) { - pr_debug("After WB checking\n"); - for (i = 0; i < nr_range; i++) - pr_debug("MTRR MAP PFN: %016llx - %016llx\n", - range[i].start, range[i].end); - } + + Dprintk("After WB checking\n"); + for (i = 0; i < nr_range; i++) + Dprintk("MTRR MAP PFN: %016llx - %016llx\n", + range[i].start, range[i].end); /* Take out UC ranges: */ for (i = 0; i < num_var_ranges; i++) { @@ -112,24 +108,22 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range, subtract_range(range, RANGE_NUM, extra_remove_base, extra_remove_base + extra_remove_size); - if (debug_print) { - pr_debug("After UC checking\n"); - for (i = 0; i < RANGE_NUM; i++) { - if (!range[i].end) - continue; - pr_debug("MTRR MAP PFN: %016llx - %016llx\n", - range[i].start, range[i].end); - } + Dprintk("After UC checking\n"); + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + + Dprintk("MTRR MAP PFN: %016llx - %016llx\n", + range[i].start, range[i].end); } /* sort the ranges */ nr_range = clean_sort_range(range, RANGE_NUM); - if (debug_print) { - pr_debug("After sorting\n"); - for (i = 0; i < nr_range; i++) - pr_debug("MTRR MAP PFN: %016llx - %016llx\n", - range[i].start, range[i].end); - } + + Dprintk("After sorting\n"); + for (i = 0; i < nr_range; i++) + Dprintk("MTRR MAP PFN: %016llx - %016llx\n", + range[i].start, range[i].end); return nr_range; } @@ -164,16 +158,9 @@ static int __init enable_mtrr_cleanup_setup(char *str) } early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); -static int __init mtrr_cleanup_debug_setup(char *str) -{ - debug_print = 1; - return 0; -} -early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); - static void __init set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, - unsigned char type, unsigned int address_bits) + unsigned char type) { u32 base_lo, base_hi, mask_lo, mask_hi; u64 base, mask; @@ -183,7 +170,7 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, return; } - mask = (1ULL << address_bits) - 1; + mask = (1ULL << boot_cpu_data.x86_phys_bits) - 1; mask &= ~((((u64)sizek) << 10) - 1); base = ((u64)basek) << 10; @@ -209,7 +196,7 @@ save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, range_state[reg].type = type; } -static void __init set_var_mtrr_all(unsigned int address_bits) +static void __init set_var_mtrr_all(void) { unsigned long basek, sizek; unsigned char type; @@ -220,7 +207,7 @@ static void __init set_var_mtrr_all(unsigned int address_bits) sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10); type = range_state[reg].type; - set_var_mtrr(reg, basek, sizek, type, address_bits); + set_var_mtrr(reg, basek, sizek, type); } } @@ -267,7 +254,7 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, align = max_align; sizek = 1UL << align; - if (debug_print) { + if (mtrr_debug) { char start_factor = 'K', size_factor = 'K'; unsigned long start_base, size_base; @@ -542,7 +529,7 @@ static void __init print_out_mtrr_range_state(void) start_base = to_size_factor(start_base, &start_factor); type = range_state[i].type; - pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n", + Dprintk("reg %d, base: %ld%cB, range: %ld%cB, type %s\n", i, start_base, start_factor, size_base, size_factor, (type == MTRR_TYPE_UNCACHABLE) ? "UC" : @@ -680,7 +667,7 @@ static int __init mtrr_search_optimal_index(void) return index_good; } -int __init mtrr_cleanup(unsigned address_bits) +int __init mtrr_cleanup(void) { unsigned long x_remove_base, x_remove_size; unsigned long base, size, def, dummy; @@ -689,7 +676,10 @@ int __init mtrr_cleanup(unsigned address_bits) int index_good; int i; - if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) + if (!mtrr_enabled()) + return 0; + + if (!cpu_feature_enabled(X86_FEATURE_MTRR) || enable_mtrr_cleanup < 1) return 0; rdmsr(MSR_MTRRdefType, def, dummy); @@ -711,7 +701,7 @@ int __init mtrr_cleanup(unsigned address_bits) return 0; /* Print original var MTRRs at first, for debugging: */ - pr_debug("original variable MTRRs\n"); + Dprintk("original variable MTRRs\n"); print_out_mtrr_range_state(); memset(range, 0, sizeof(range)); @@ -742,8 +732,8 @@ int __init mtrr_cleanup(unsigned address_bits) mtrr_print_out_one_result(i); if (!result[i].bad) { - set_var_mtrr_all(address_bits); - pr_debug("New variable MTRRs\n"); + set_var_mtrr_all(); + Dprintk("New variable MTRRs\n"); print_out_mtrr_range_state(); return 1; } @@ -763,7 +753,7 @@ int __init mtrr_cleanup(unsigned address_bits) mtrr_calc_range_state(chunk_size, gran_size, x_remove_base, x_remove_size, i); - if (debug_print) { + if (mtrr_debug) { mtrr_print_out_one_result(i); pr_info("\n"); } @@ -786,8 +776,8 @@ int __init mtrr_cleanup(unsigned address_bits) gran_size = result[i].gran_sizek; gran_size <<= 10; x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); - set_var_mtrr_all(address_bits); - pr_debug("New variable MTRRs\n"); + set_var_mtrr_all(); + Dprintk("New variable MTRRs\n"); print_out_mtrr_range_state(); return 1; } else { @@ -802,7 +792,7 @@ int __init mtrr_cleanup(unsigned address_bits) return 0; } #else -int __init mtrr_cleanup(unsigned address_bits) +int __init mtrr_cleanup(void) { return 0; } @@ -882,15 +872,18 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) /* extra one for all 0 */ int num[MTRR_NUM_TYPES + 1]; + if (!mtrr_enabled()) + return 0; + /* * Make sure we only trim uncachable memory on machines that * support the Intel MTRR architecture: */ - if (!is_cpu(INTEL) || disable_mtrr_trim) + if (!cpu_feature_enabled(X86_FEATURE_MTRR) || disable_mtrr_trim) return 0; rdmsr(MSR_MTRRdefType, def, dummy); - def &= 0xff; + def &= MTRR_DEF_TYPE_TYPE; if (def != MTRR_TYPE_UNCACHABLE) return 0; diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index 173b9e01e623..238dad57d4d6 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -235,7 +235,7 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base, } const struct mtrr_ops cyrix_mtrr_ops = { - .vendor = X86_VENDOR_CYRIX, + .var_regs = 8, .set = cyrix_set_arr, .get = cyrix_get_arr, .get_free_region = cyrix_get_free_region, diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index ee09d359e08f..2d6aa5d2e3d7 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -8,10 +8,12 @@ #include <linux/init.h> #include <linux/io.h> #include <linux/mm.h> - +#include <linux/cc_platform.h> #include <asm/processor-flags.h> #include <asm/cacheinfo.h> #include <asm/cpufeature.h> +#include <asm/hypervisor.h> +#include <asm/mshyperv.h> #include <asm/tlbflush.h> #include <asm/mtrr.h> #include <asm/msr.h> @@ -31,6 +33,55 @@ static struct fixed_range_block fixed_range_blocks[] = { {} }; +struct cache_map { + u64 start; + u64 end; + u64 flags; + u64 type:8; + u64 fixed:1; +}; + +bool mtrr_debug; + +static int __init mtrr_param_setup(char *str) +{ + int rc = 0; + + if (!str) + return -EINVAL; + if (!strcmp(str, "debug")) + mtrr_debug = true; + else + rc = -EINVAL; + + return rc; +} +early_param("mtrr", mtrr_param_setup); + +/* + * CACHE_MAP_MAX is the maximum number of memory ranges in cache_map, where + * no 2 adjacent ranges have the same cache mode (those would be merged). + * The number is based on the worst case: + * - no two adjacent fixed MTRRs share the same cache mode + * - one variable MTRR is spanning a huge area with mode WB + * - 255 variable MTRRs with mode UC all overlap with the WB MTRR, creating 2 + * additional ranges each (result like "ababababa...aba" with a = WB, b = UC), + * accounting for MTRR_MAX_VAR_RANGES * 2 - 1 range entries + * - a TOP_MEM2 area (even with overlapping an UC MTRR can't add 2 range entries + * to the possible maximum, as it always starts at 4GB, thus it can't be in + * the middle of that MTRR, unless that MTRR starts at 0, which would remove + * the initial "a" from the "abababa" pattern above) + * The map won't contain ranges with no matching MTRR (those fall back to the + * default cache mode). + */ +#define CACHE_MAP_MAX (MTRR_NUM_FIXED_RANGES + MTRR_MAX_VAR_RANGES * 2) + +static struct cache_map init_cache_map[CACHE_MAP_MAX] __initdata; +static struct cache_map *cache_map __refdata = init_cache_map; +static unsigned int cache_map_size = CACHE_MAP_MAX; +static unsigned int cache_map_n; +static unsigned int cache_map_fixed; + static unsigned long smp_changes_mask; static int mtrr_state_set; u64 mtrr_tom2; @@ -38,6 +89,9 @@ u64 mtrr_tom2; struct mtrr_state_type mtrr_state; EXPORT_SYMBOL_GPL(mtrr_state); +/* Reserved bits in the high portion of the MTRRphysBaseN MSR. */ +u32 phys_hi_rsvd; + /* * BIOS is expected to clear MtrrFixDramModEn bit, see for example * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD @@ -69,175 +123,370 @@ static u64 get_mtrr_size(u64 mask) { u64 size; - mask >>= PAGE_SHIFT; - mask |= size_or_mask; + mask |= (u64)phys_hi_rsvd << 32; size = -mask; - size <<= PAGE_SHIFT; + return size; } +static u8 get_var_mtrr_state(unsigned int reg, u64 *start, u64 *size) +{ + struct mtrr_var_range *mtrr = mtrr_state.var_ranges + reg; + + if (!(mtrr->mask_lo & MTRR_PHYSMASK_V)) + return MTRR_TYPE_INVALID; + + *start = (((u64)mtrr->base_hi) << 32) + (mtrr->base_lo & PAGE_MASK); + *size = get_mtrr_size((((u64)mtrr->mask_hi) << 32) + + (mtrr->mask_lo & PAGE_MASK)); + + return mtrr->base_lo & MTRR_PHYSBASE_TYPE; +} + +static u8 get_effective_type(u8 type1, u8 type2) +{ + if (type1 == MTRR_TYPE_UNCACHABLE || type2 == MTRR_TYPE_UNCACHABLE) + return MTRR_TYPE_UNCACHABLE; + + if ((type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH) || + (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK)) + return MTRR_TYPE_WRTHROUGH; + + if (type1 != type2) + return MTRR_TYPE_UNCACHABLE; + + return type1; +} + +static void rm_map_entry_at(int idx) +{ + cache_map_n--; + if (cache_map_n > idx) { + memmove(cache_map + idx, cache_map + idx + 1, + sizeof(*cache_map) * (cache_map_n - idx)); + } +} + /* - * Check and return the effective type for MTRR-MTRR type overlap. - * Returns 1 if the effective type is UNCACHEABLE, else returns 0 + * Add an entry into cache_map at a specific index. Merges adjacent entries if + * appropriate. Return the number of merges for correcting the scan index + * (this is needed as merging will reduce the number of entries, which will + * result in skipping entries in future iterations if the scan index isn't + * corrected). + * Note that the corrected index can never go below -1 (resulting in being 0 in + * the next scan iteration), as "2" is returned only if the current index is + * larger than zero. */ -static int check_type_overlap(u8 *prev, u8 *curr) +static int add_map_entry_at(u64 start, u64 end, u8 type, int idx) { - if (*prev == MTRR_TYPE_UNCACHABLE || *curr == MTRR_TYPE_UNCACHABLE) { - *prev = MTRR_TYPE_UNCACHABLE; - *curr = MTRR_TYPE_UNCACHABLE; - return 1; + bool merge_prev = false, merge_next = false; + + if (start >= end) + return 0; + + if (idx > 0) { + struct cache_map *prev = cache_map + idx - 1; + + if (!prev->fixed && start == prev->end && type == prev->type) + merge_prev = true; } - if ((*prev == MTRR_TYPE_WRBACK && *curr == MTRR_TYPE_WRTHROUGH) || - (*prev == MTRR_TYPE_WRTHROUGH && *curr == MTRR_TYPE_WRBACK)) { - *prev = MTRR_TYPE_WRTHROUGH; - *curr = MTRR_TYPE_WRTHROUGH; + if (idx < cache_map_n) { + struct cache_map *next = cache_map + idx; + + if (!next->fixed && end == next->start && type == next->type) + merge_next = true; } - if (*prev != *curr) { - *prev = MTRR_TYPE_UNCACHABLE; - *curr = MTRR_TYPE_UNCACHABLE; + if (merge_prev && merge_next) { + cache_map[idx - 1].end = cache_map[idx].end; + rm_map_entry_at(idx); + return 2; + } + if (merge_prev) { + cache_map[idx - 1].end = end; return 1; } + if (merge_next) { + cache_map[idx].start = start; + return 1; + } + + /* Sanity check: the array should NEVER be too small! */ + if (cache_map_n == cache_map_size) { + WARN(1, "MTRR cache mode memory map exhausted!\n"); + cache_map_n = cache_map_fixed; + return 0; + } + + if (cache_map_n > idx) { + memmove(cache_map + idx + 1, cache_map + idx, + sizeof(*cache_map) * (cache_map_n - idx)); + } + + cache_map[idx].start = start; + cache_map[idx].end = end; + cache_map[idx].type = type; + cache_map[idx].fixed = 0; + cache_map_n++; return 0; } -/** - * mtrr_type_lookup_fixed - look up memory type in MTRR fixed entries - * - * Return the MTRR fixed memory type of 'start'. - * - * MTRR fixed entries are divided into the following ways: - * 0x00000 - 0x7FFFF : This range is divided into eight 64KB sub-ranges - * 0x80000 - 0xBFFFF : This range is divided into sixteen 16KB sub-ranges - * 0xC0000 - 0xFFFFF : This range is divided into sixty-four 4KB sub-ranges - * - * Return Values: - * MTRR_TYPE_(type) - Matched memory type - * MTRR_TYPE_INVALID - Unmatched +/* Clear a part of an entry. Return 1 if start of entry is still valid. */ +static int clr_map_range_at(u64 start, u64 end, int idx) +{ + int ret = start != cache_map[idx].start; + u64 tmp; + + if (start == cache_map[idx].start && end == cache_map[idx].end) { + rm_map_entry_at(idx); + } else if (start == cache_map[idx].start) { + cache_map[idx].start = end; + } else if (end == cache_map[idx].end) { + cache_map[idx].end = start; + } else { + tmp = cache_map[idx].end; + cache_map[idx].end = start; + add_map_entry_at(end, tmp, cache_map[idx].type, idx + 1); + } + + return ret; +} + +/* + * Add MTRR to the map. The current map is scanned and each part of the MTRR + * either overlapping with an existing entry or with a hole in the map is + * handled separately. */ -static u8 mtrr_type_lookup_fixed(u64 start, u64 end) +static void add_map_entry(u64 start, u64 end, u8 type) { - int idx; + u8 new_type, old_type; + u64 tmp; + int i; - if (start >= 0x100000) - return MTRR_TYPE_INVALID; + for (i = 0; i < cache_map_n && start < end; i++) { + if (start >= cache_map[i].end) + continue; + + if (start < cache_map[i].start) { + /* Region start has no overlap. */ + tmp = min(end, cache_map[i].start); + i -= add_map_entry_at(start, tmp, type, i); + start = tmp; + continue; + } - /* 0x0 - 0x7FFFF */ - if (start < 0x80000) { - idx = 0; - idx += (start >> 16); - return mtrr_state.fixed_ranges[idx]; - /* 0x80000 - 0xBFFFF */ - } else if (start < 0xC0000) { - idx = 1 * 8; - idx += ((start - 0x80000) >> 14); - return mtrr_state.fixed_ranges[idx]; + new_type = get_effective_type(type, cache_map[i].type); + old_type = cache_map[i].type; + + if (cache_map[i].fixed || new_type == old_type) { + /* Cut off start of new entry. */ + start = cache_map[i].end; + continue; + } + + /* Handle only overlapping part of region. */ + tmp = min(end, cache_map[i].end); + i += clr_map_range_at(start, tmp, i); + i -= add_map_entry_at(start, tmp, new_type, i); + start = tmp; } - /* 0xC0000 - 0xFFFFF */ - idx = 3 * 8; - idx += ((start - 0xC0000) >> 12); - return mtrr_state.fixed_ranges[idx]; + /* Add rest of region after last map entry (rest might be empty). */ + add_map_entry_at(start, end, type, i); } -/** - * mtrr_type_lookup_variable - look up memory type in MTRR variable entries - * - * Return Value: - * MTRR_TYPE_(type) - Matched memory type or default memory type (unmatched) - * - * Output Arguments: - * repeat - Set to 1 when [start:end] spanned across MTRR range and type - * returned corresponds only to [start:*partial_end]. Caller has - * to lookup again for [*partial_end:end]. - * - * uniform - Set to 1 when an MTRR covers the region uniformly, i.e. the - * region is fully covered by a single MTRR entry or the default - * type. +/* Add variable MTRRs to cache map. */ +static void map_add_var(void) +{ + u64 start, size; + unsigned int i; + u8 type; + + /* + * Add AMD TOP_MEM2 area. Can't be added in mtrr_build_map(), as it + * needs to be added again when rebuilding the map due to potentially + * having moved as a result of variable MTRRs for memory below 4GB. + */ + if (mtrr_tom2) { + add_map_entry(BIT_ULL(32), mtrr_tom2, MTRR_TYPE_WRBACK); + cache_map[cache_map_n - 1].fixed = 1; + } + + for (i = 0; i < num_var_ranges; i++) { + type = get_var_mtrr_state(i, &start, &size); + if (type != MTRR_TYPE_INVALID) + add_map_entry(start, start + size, type); + } +} + +/* + * Rebuild map by replacing variable entries. Needs to be called when MTRR + * registers are being changed after boot, as such changes could include + * removals of registers, which are complicated to handle without rebuild of + * the map. */ -static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, - int *repeat, u8 *uniform) +void generic_rebuild_map(void) { - int i; - u64 base, mask; - u8 prev_match, curr_match; + if (mtrr_if != &generic_mtrr_ops) + return; - *repeat = 0; - *uniform = 1; + cache_map_n = cache_map_fixed; - prev_match = MTRR_TYPE_INVALID; - for (i = 0; i < num_var_ranges; ++i) { - unsigned short start_state, end_state, inclusive; + map_add_var(); +} - if (!(mtrr_state.var_ranges[i].mask_lo & (1 << 11))) - continue; +static unsigned int __init get_cache_map_size(void) +{ + return cache_map_fixed + 2 * num_var_ranges + (mtrr_tom2 != 0); +} - base = (((u64)mtrr_state.var_ranges[i].base_hi) << 32) + - (mtrr_state.var_ranges[i].base_lo & PAGE_MASK); - mask = (((u64)mtrr_state.var_ranges[i].mask_hi) << 32) + - (mtrr_state.var_ranges[i].mask_lo & PAGE_MASK); - - start_state = ((start & mask) == (base & mask)); - end_state = ((end & mask) == (base & mask)); - inclusive = ((start < base) && (end > base)); - - if ((start_state != end_state) || inclusive) { - /* - * We have start:end spanning across an MTRR. - * We split the region into either - * - * - start_state:1 - * (start:mtrr_end)(mtrr_end:end) - * - end_state:1 - * (start:mtrr_start)(mtrr_start:end) - * - inclusive:1 - * (start:mtrr_start)(mtrr_start:mtrr_end)(mtrr_end:end) - * - * depending on kind of overlap. - * - * Return the type of the first region and a pointer - * to the start of next region so that caller will be - * advised to lookup again after having adjusted start - * and end. - * - * Note: This way we handle overlaps with multiple - * entries and the default type properly. - */ - if (start_state) - *partial_end = base + get_mtrr_size(mask); - else - *partial_end = base; - - if (unlikely(*partial_end <= start)) { - WARN_ON(1); - *partial_end = start + PAGE_SIZE; - } +/* Build the cache_map containing the cache modes per memory range. */ +void __init mtrr_build_map(void) +{ + u64 start, end, size; + unsigned int i; + u8 type; - end = *partial_end - 1; /* end is inclusive */ - *repeat = 1; - *uniform = 0; + /* Add fixed MTRRs, optimize for adjacent entries with same type. */ + if (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED) { + /* + * Start with 64k size fixed entries, preset 1st one (hence the + * loop below is starting with index 1). + */ + start = 0; + end = size = 0x10000; + type = mtrr_state.fixed_ranges[0]; + + for (i = 1; i < MTRR_NUM_FIXED_RANGES; i++) { + /* 8 64k entries, then 16 16k ones, rest 4k. */ + if (i == 8 || i == 24) + size >>= 2; + + if (mtrr_state.fixed_ranges[i] != type) { + add_map_entry(start, end, type); + start = end; + type = mtrr_state.fixed_ranges[i]; + } + end += size; } + add_map_entry(start, end, type); + } - if ((start & mask) != (base & mask)) - continue; + /* Mark fixed, they take precedence. */ + for (i = 0; i < cache_map_n; i++) + cache_map[i].fixed = 1; + cache_map_fixed = cache_map_n; - curr_match = mtrr_state.var_ranges[i].base_lo & 0xff; - if (prev_match == MTRR_TYPE_INVALID) { - prev_match = curr_match; - continue; + map_add_var(); + + pr_info("MTRR map: %u entries (%u fixed + %u variable; max %u), built from %u variable MTRRs\n", + cache_map_n, cache_map_fixed, cache_map_n - cache_map_fixed, + get_cache_map_size(), num_var_ranges + (mtrr_tom2 != 0)); + + if (mtrr_debug) { + for (i = 0; i < cache_map_n; i++) { + pr_info("%3u: %016llx-%016llx %s\n", i, + cache_map[i].start, cache_map[i].end - 1, + mtrr_attrib_to_str(cache_map[i].type)); } + } +} - *uniform = 0; - if (check_type_overlap(&prev_match, &curr_match)) - return curr_match; +/* Copy the cache_map from __initdata memory to dynamically allocated one. */ +void __init mtrr_copy_map(void) +{ + unsigned int new_size = get_cache_map_size(); + + if (!mtrr_state.enabled || !new_size) { + cache_map = NULL; + return; + } + + mutex_lock(&mtrr_mutex); + + cache_map = kcalloc(new_size, sizeof(*cache_map), GFP_KERNEL); + if (cache_map) { + memmove(cache_map, init_cache_map, + cache_map_n * sizeof(*cache_map)); + cache_map_size = new_size; + } else { + mtrr_state.enabled = 0; + pr_err("MTRRs disabled due to allocation failure for lookup map.\n"); + } + + mutex_unlock(&mtrr_mutex); +} + +/** + * mtrr_overwrite_state - set static MTRR state + * + * Used to set MTRR state via different means (e.g. with data obtained from + * a hypervisor). + * Is allowed only for special cases when running virtualized. Must be called + * from the x86_init.hyper.init_platform() hook. It can be called only once. + * The MTRR state can't be changed afterwards. To ensure that, X86_FEATURE_MTRR + * is cleared. + */ +void mtrr_overwrite_state(struct mtrr_var_range *var, unsigned int num_var, + mtrr_type def_type) +{ + unsigned int i; + + /* Only allowed to be called once before mtrr_bp_init(). */ + if (WARN_ON_ONCE(mtrr_state_set)) + return; + + /* Only allowed when running virtualized. */ + if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return; + + /* + * Only allowed for special virtualization cases: + * - when running as Hyper-V, SEV-SNP guest using vTOM + * - when running as Xen PV guest + * - when running as SEV-SNP or TDX guest to avoid unnecessary + * VMM communication/Virtualization exceptions (#VC, #VE) + */ + if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && + !hv_is_isolation_supported() && + !cpu_feature_enabled(X86_FEATURE_XENPV) && + !cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return; + + /* Disable MTRR in order to disable MTRR modifications. */ + setup_clear_cpu_cap(X86_FEATURE_MTRR); + + if (var) { + if (num_var > MTRR_MAX_VAR_RANGES) { + pr_warn("Trying to overwrite MTRR state with %u variable entries\n", + num_var); + num_var = MTRR_MAX_VAR_RANGES; + } + for (i = 0; i < num_var; i++) + mtrr_state.var_ranges[i] = var[i]; + num_var_ranges = num_var; } - if (prev_match != MTRR_TYPE_INVALID) - return prev_match; + mtrr_state.def_type = def_type; + mtrr_state.enabled |= MTRR_STATE_MTRR_ENABLED; - return mtrr_state.def_type; + mtrr_state_set = 1; +} + +static u8 type_merge(u8 type, u8 new_type, u8 *uniform) +{ + u8 effective_type; + + if (type == MTRR_TYPE_INVALID) + return new_type; + + effective_type = get_effective_type(type, new_type); + if (type != effective_type) + *uniform = 0; + + return effective_type; } /** @@ -248,66 +497,49 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, * MTRR_TYPE_INVALID - MTRR is disabled * * Output Argument: - * uniform - Set to 1 when an MTRR covers the region uniformly, i.e. the - * region is fully covered by a single MTRR entry or the default - * type. + * uniform - Set to 1 when the returned MTRR type is valid for the whole + * region, set to 0 else. */ u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform) { - u8 type, prev_type, is_uniform = 1, dummy; - int repeat; - u64 partial_end; + u8 type = MTRR_TYPE_INVALID; + unsigned int i; - /* Make end inclusive instead of exclusive */ - end--; + if (!mtrr_state_set) { + /* Uniformity is unknown. */ + *uniform = 0; + return MTRR_TYPE_UNCACHABLE; + } - if (!mtrr_state_set) - return MTRR_TYPE_INVALID; + *uniform = 1; if (!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED)) - return MTRR_TYPE_INVALID; + return MTRR_TYPE_UNCACHABLE; - /* - * Look up the fixed ranges first, which take priority over - * the variable ranges. - */ - if ((start < 0x100000) && - (mtrr_state.have_fixed) && - (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) { - is_uniform = 0; - type = mtrr_type_lookup_fixed(start, end); - goto out; - } + for (i = 0; i < cache_map_n && start < end; i++) { + /* Region after current map entry? -> continue with next one. */ + if (start >= cache_map[i].end) + continue; - /* - * Look up the variable ranges. Look of multiple ranges matching - * this address and pick type as per MTRR precedence. - */ - type = mtrr_type_lookup_variable(start, end, &partial_end, - &repeat, &is_uniform); + /* Start of region not covered by current map entry? */ + if (start < cache_map[i].start) { + /* At least some part of region has default type. */ + type = type_merge(type, mtrr_state.def_type, uniform); + /* End of region not covered, too? -> lookup done. */ + if (end <= cache_map[i].start) + return type; + } - /* - * Common path is with repeat = 0. - * However, we can have cases where [start:end] spans across some - * MTRR ranges and/or the default type. Do repeated lookups for - * that case here. - */ - while (repeat) { - prev_type = type; - start = partial_end; - is_uniform = 0; - type = mtrr_type_lookup_variable(start, end, &partial_end, - &repeat, &dummy); + /* At least part of region covered by map entry. */ + type = type_merge(type, cache_map[i].type, uniform); - if (check_type_overlap(&prev_type, &type)) - goto out; + start = cache_map[i].end; } - if (mtrr_tom2 && (start >= (1ULL<<32)) && (end < mtrr_tom2)) - type = MTRR_TYPE_WRBACK; + /* End of region past last entry in map? -> use default type. */ + if (start < end) + type = type_merge(type, mtrr_state.def_type, uniform); -out: - *uniform = is_uniform; return type; } @@ -363,8 +595,8 @@ static void __init print_fixed_last(void) if (!last_fixed_end) return; - pr_debug(" %05X-%05X %s\n", last_fixed_start, - last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type)); + pr_info(" %05X-%05X %s\n", last_fixed_start, + last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type)); last_fixed_end = 0; } @@ -402,10 +634,10 @@ static void __init print_mtrr_state(void) unsigned int i; int high_width; - pr_debug("MTRR default type: %s\n", - mtrr_attrib_to_str(mtrr_state.def_type)); + pr_info("MTRR default type: %s\n", + mtrr_attrib_to_str(mtrr_state.def_type)); if (mtrr_state.have_fixed) { - pr_debug("MTRR fixed ranges %sabled:\n", + pr_info("MTRR fixed ranges %sabled:\n", ((mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) && (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) ? "en" : "dis"); @@ -420,26 +652,27 @@ static void __init print_mtrr_state(void) /* tail */ print_fixed_last(); } - pr_debug("MTRR variable ranges %sabled:\n", - mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED ? "en" : "dis"); - high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4; + pr_info("MTRR variable ranges %sabled:\n", + mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED ? "en" : "dis"); + high_width = (boot_cpu_data.x86_phys_bits - (32 - PAGE_SHIFT) + 3) / 4; for (i = 0; i < num_var_ranges; ++i) { - if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) - pr_debug(" %u base %0*X%05X000 mask %0*X%05X000 %s\n", - i, - high_width, - mtrr_state.var_ranges[i].base_hi, - mtrr_state.var_ranges[i].base_lo >> 12, - high_width, - mtrr_state.var_ranges[i].mask_hi, - mtrr_state.var_ranges[i].mask_lo >> 12, - mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff)); + if (mtrr_state.var_ranges[i].mask_lo & MTRR_PHYSMASK_V) + pr_info(" %u base %0*X%05X000 mask %0*X%05X000 %s\n", + i, + high_width, + mtrr_state.var_ranges[i].base_hi, + mtrr_state.var_ranges[i].base_lo >> 12, + high_width, + mtrr_state.var_ranges[i].mask_hi, + mtrr_state.var_ranges[i].mask_lo >> 12, + mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & + MTRR_PHYSBASE_TYPE)); else - pr_debug(" %u disabled\n", i); + pr_info(" %u disabled\n", i); } if (mtrr_tom2) - pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20); + pr_info("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20); } /* Grab all of the MTRR state for this CPU into *state */ @@ -452,7 +685,7 @@ bool __init get_mtrr_state(void) vrs = mtrr_state.var_ranges; rdmsr(MSR_MTRRcap, lo, dummy); - mtrr_state.have_fixed = (lo >> 8) & 1; + mtrr_state.have_fixed = lo & MTRR_CAP_FIX; for (i = 0; i < num_var_ranges; i++) get_mtrr_var_range(i, &vrs[i]); @@ -460,8 +693,8 @@ bool __init get_mtrr_state(void) get_fixed_ranges(mtrr_state.fixed_ranges); rdmsr(MSR_MTRRdefType, lo, dummy); - mtrr_state.def_type = (lo & 0xff); - mtrr_state.enabled = (lo & 0xc00) >> 10; + mtrr_state.def_type = lo & MTRR_DEF_TYPE_TYPE; + mtrr_state.enabled = (lo & MTRR_DEF_TYPE_ENABLE) >> MTRR_STATE_SHIFT; if (amd_special_default_mtrr()) { unsigned low, high; @@ -474,7 +707,8 @@ bool __init get_mtrr_state(void) mtrr_tom2 &= 0xffffff800000ULL; } - print_mtrr_state(); + if (mtrr_debug) + print_mtrr_state(); mtrr_state_set = 1; @@ -574,7 +808,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); - if ((mask_lo & 0x800) == 0) { + if (!(mask_lo & MTRR_PHYSMASK_V)) { /* Invalid (i.e. free) range */ *base = 0; *size = 0; @@ -585,8 +819,8 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); /* Work out the shifted address mask: */ - tmp = (u64)mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT; - mask = size_or_mask | tmp; + tmp = (u64)mask_hi << 32 | (mask_lo & PAGE_MASK); + mask = (u64)phys_hi_rsvd << 32 | tmp; /* Expand tmp with high bits to all 1s: */ hi = fls64(tmp); @@ -604,9 +838,9 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, * This works correctly if size is a power of two, i.e. a * contiguous range: */ - *size = -mask; + *size = -mask >> PAGE_SHIFT; *base = (u64)base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; - *type = base_lo & 0xff; + *type = base_lo & MTRR_PHYSBASE_TYPE; out_put_cpu: put_cpu(); @@ -644,9 +878,8 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) bool changed = false; rdmsr(MTRRphysBase_MSR(index), lo, hi); - if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) - || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != - (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { + if ((vr->base_lo & ~MTRR_PHYSBASE_RSVD) != (lo & ~MTRR_PHYSBASE_RSVD) + || (vr->base_hi & ~phys_hi_rsvd) != (hi & ~phys_hi_rsvd)) { mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); changed = true; @@ -654,9 +887,8 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) rdmsr(MTRRphysMask_MSR(index), lo, hi); - if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL) - || (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != - (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { + if ((vr->mask_lo & ~MTRR_PHYSMASK_RSVD) != (lo & ~MTRR_PHYSMASK_RSVD) + || (vr->mask_hi & ~phys_hi_rsvd) != (hi & ~phys_hi_rsvd)) { mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); changed = true; } @@ -691,11 +923,12 @@ static unsigned long set_mtrr_state(void) * Set_mtrr_restore restores the old value of MTRRdefType, * so to set it we fiddle with the saved value: */ - if ((deftype_lo & 0xff) != mtrr_state.def_type - || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) { + if ((deftype_lo & MTRR_DEF_TYPE_TYPE) != mtrr_state.def_type || + ((deftype_lo & MTRR_DEF_TYPE_ENABLE) >> MTRR_STATE_SHIFT) != mtrr_state.enabled) { - deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | - (mtrr_state.enabled << 10); + deftype_lo = (deftype_lo & MTRR_DEF_TYPE_DISABLE) | + mtrr_state.def_type | + (mtrr_state.enabled << MTRR_STATE_SHIFT); change_mask |= MTRR_CHANGE_MASK_DEFTYPE; } @@ -708,7 +941,7 @@ void mtrr_disable(void) rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); /* Disable MTRRs, and set the default type to uncached */ - mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); + mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & MTRR_DEF_TYPE_DISABLE, deftype_hi); } void mtrr_enable(void) @@ -762,9 +995,9 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, memset(vr, 0, sizeof(struct mtrr_var_range)); } else { vr->base_lo = base << PAGE_SHIFT | type; - vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT); - vr->mask_lo = -size << PAGE_SHIFT | 0x800; - vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT); + vr->base_hi = (base >> (32 - PAGE_SHIFT)) & ~phys_hi_rsvd; + vr->mask_lo = -size << PAGE_SHIFT | MTRR_PHYSMASK_V; + vr->mask_hi = (-size >> (32 - PAGE_SHIFT)) & ~phys_hi_rsvd; mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi); mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi); @@ -783,7 +1016,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, * For Intel PPro stepping <= 7 * must be 4 MiB aligned and not touch 0x70000000 -> 0x7003FFFF */ - if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && + if (mtrr_if == &generic_mtrr_ops && boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 1 && boot_cpu_data.x86_stepping <= 7) { if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { @@ -817,7 +1050,7 @@ static int generic_have_wrcomb(void) { unsigned long config, dummy; rdmsr(MSR_MTRRcap, config, dummy); - return config & (1 << 10); + return config & MTRR_CAP_WC; } int positive_have_wrcomb(void) diff --git a/arch/x86/kernel/cpu/mtrr/legacy.c b/arch/x86/kernel/cpu/mtrr/legacy.c new file mode 100644 index 000000000000..d25882fcf181 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/legacy.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/syscore_ops.h> +#include <asm/cpufeature.h> +#include <asm/mtrr.h> +#include <asm/processor.h> +#include "mtrr.h" + +void mtrr_set_if(void) +{ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + /* Pre-Athlon (K6) AMD CPU MTRRs */ + if (cpu_feature_enabled(X86_FEATURE_K6_MTRR)) + mtrr_if = &amd_mtrr_ops; + break; + case X86_VENDOR_CENTAUR: + if (cpu_feature_enabled(X86_FEATURE_CENTAUR_MCR)) + mtrr_if = ¢aur_mtrr_ops; + break; + case X86_VENDOR_CYRIX: + if (cpu_feature_enabled(X86_FEATURE_CYRIX_ARR)) + mtrr_if = &cyrix_mtrr_ops; + break; + default: + break; + } +} + +/* + * The suspend/resume methods are only for CPUs without MTRR. CPUs using generic + * MTRR driver don't require this. + */ +struct mtrr_value { + mtrr_type ltype; + unsigned long lbase; + unsigned long lsize; +}; + +static struct mtrr_value *mtrr_value; + +static int mtrr_save(void) +{ + int i; + + if (!mtrr_value) + return -ENOMEM; + + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, &mtrr_value[i].lbase, + &mtrr_value[i].lsize, + &mtrr_value[i].ltype); + } + return 0; +} + +static void mtrr_restore(void) +{ + int i; + + for (i = 0; i < num_var_ranges; i++) { + if (mtrr_value[i].lsize) { + mtrr_if->set(i, mtrr_value[i].lbase, + mtrr_value[i].lsize, + mtrr_value[i].ltype); + } + } +} + +static struct syscore_ops mtrr_syscore_ops = { + .suspend = mtrr_save, + .resume = mtrr_restore, +}; + +void mtrr_register_syscore(void) +{ + mtrr_value = kcalloc(num_var_ranges, sizeof(*mtrr_value), GFP_KERNEL); + + /* + * The CPU has no MTRR and seems to not support SMP. They have + * specific drivers, we use a tricky method to support + * suspend/resume for them. + * + * TBD: is there any system with such CPU which supports + * suspend/resume? If no, we should remove the code. + */ + register_syscore_ops(&mtrr_syscore_ops); +} diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index 783f3210d582..767bf1c71aad 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -59,15 +59,9 @@ #define MTRR_TO_PHYS_WC_OFFSET 1000 u32 num_var_ranges; -static bool mtrr_enabled(void) -{ - return !!mtrr_if; -} unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; -static DEFINE_MUTEX(mtrr_mutex); - -u64 size_or_mask, size_and_mask; +DEFINE_MUTEX(mtrr_mutex); const struct mtrr_ops *mtrr_if; @@ -105,21 +99,6 @@ static int have_wrcomb(void) return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0; } -/* This function returns the number of variable MTRRs */ -static void __init set_num_var_ranges(bool use_generic) -{ - unsigned long config = 0, dummy; - - if (use_generic) - rdmsr(MSR_MTRRcap, config, dummy); - else if (is_cpu(AMD) || is_cpu(HYGON)) - config = 2; - else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) - config = 8; - - num_var_ranges = config & 0xff; -} - static void __init init_table(void) { int i, max; @@ -194,20 +173,8 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) * Note that the mechanism is the same for UP systems, too; all the SMP stuff * becomes nops. */ -static void -set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) -{ - struct set_mtrr_data data = { .smp_reg = reg, - .smp_base = base, - .smp_size = size, - .smp_type = type - }; - - stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask); -} - -static void set_mtrr_cpuslocked(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type) +static void set_mtrr(unsigned int reg, unsigned long base, unsigned long size, + mtrr_type type) { struct set_mtrr_data data = { .smp_reg = reg, .smp_base = base, @@ -216,6 +183,8 @@ static void set_mtrr_cpuslocked(unsigned int reg, unsigned long base, }; stop_machine_cpuslocked(mtrr_rendezvous_handler, &data, cpu_online_mask); + + generic_rebuild_map(); } /** @@ -337,7 +306,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, /* Search for an empty MTRR */ i = mtrr_if->get_free_region(base, size, replace); if (i >= 0) { - set_mtrr_cpuslocked(i, base, size, type); + set_mtrr(i, base, size, type); if (likely(replace < 0)) { mtrr_usage_table[i] = 1; } else { @@ -345,7 +314,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, if (increment) mtrr_usage_table[i]++; if (unlikely(replace != i)) { - set_mtrr_cpuslocked(replace, 0, 0, 0); + set_mtrr(replace, 0, 0, 0); mtrr_usage_table[replace] = 0; } } @@ -363,7 +332,7 @@ static int mtrr_check(unsigned long base, unsigned long size) { if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { pr_warn("size and base must be multiples of 4 kiB\n"); - pr_debug("size: 0x%lx base: 0x%lx\n", size, base); + Dprintk("size: 0x%lx base: 0x%lx\n", size, base); dump_stack(); return -1; } @@ -454,8 +423,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) } } if (reg < 0) { - pr_debug("no MTRR for %lx000,%lx000 found\n", - base, size); + Dprintk("no MTRR for %lx000,%lx000 found\n", base, size); goto out; } } @@ -473,7 +441,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) goto out; } if (--mtrr_usage_table[reg] < 1) - set_mtrr_cpuslocked(reg, 0, 0, 0); + set_mtrr(reg, 0, 0, 0); error = reg; out: mutex_unlock(&mtrr_mutex); @@ -574,136 +542,54 @@ int arch_phys_wc_index(int handle) } EXPORT_SYMBOL_GPL(arch_phys_wc_index); -/* The suspend/resume methods are only for CPU without MTRR. CPU using generic - * MTRR driver doesn't require this - */ -struct mtrr_value { - mtrr_type ltype; - unsigned long lbase; - unsigned long lsize; -}; - -static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES]; - -static int mtrr_save(void) -{ - int i; - - for (i = 0; i < num_var_ranges; i++) { - mtrr_if->get(i, &mtrr_value[i].lbase, - &mtrr_value[i].lsize, - &mtrr_value[i].ltype); - } - return 0; -} - -static void mtrr_restore(void) -{ - int i; - - for (i = 0; i < num_var_ranges; i++) { - if (mtrr_value[i].lsize) { - set_mtrr(i, mtrr_value[i].lbase, - mtrr_value[i].lsize, - mtrr_value[i].ltype); - } - } -} - - - -static struct syscore_ops mtrr_syscore_ops = { - .suspend = mtrr_save, - .resume = mtrr_restore, -}; - int __initdata changed_by_mtrr_cleanup; -#define SIZE_OR_MASK_BITS(n) (~((1ULL << ((n) - PAGE_SHIFT)) - 1)) /** - * mtrr_bp_init - initialize mtrrs on the boot CPU + * mtrr_bp_init - initialize MTRRs on the boot CPU * * This needs to be called early; before any of the other CPUs are * initialized (i.e. before smp_init()). - * */ void __init mtrr_bp_init(void) { + bool generic_mtrrs = cpu_feature_enabled(X86_FEATURE_MTRR); const char *why = "(not available)"; - u32 phys_addr; - - phys_addr = 32; + unsigned long config, dummy; - if (boot_cpu_has(X86_FEATURE_MTRR)) { - mtrr_if = &generic_mtrr_ops; - size_or_mask = SIZE_OR_MASK_BITS(36); - size_and_mask = 0x00f00000; - phys_addr = 36; + phys_hi_rsvd = GENMASK(31, boot_cpu_data.x86_phys_bits - 32); + if (!generic_mtrrs && mtrr_state.enabled) { /* - * This is an AMD specific MSR, but we assume(hope?) that - * Intel will implement it too when they extend the address - * bus of the Xeon. + * Software overwrite of MTRR state, only for generic case. + * Note that X86_FEATURE_MTRR has been reset in this case. */ - if (cpuid_eax(0x80000000) >= 0x80000008) { - phys_addr = cpuid_eax(0x80000008) & 0xff; - /* CPUID workaround for Intel 0F33/0F34 CPU */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 0xF && - boot_cpu_data.x86_model == 0x3 && - (boot_cpu_data.x86_stepping == 0x3 || - boot_cpu_data.x86_stepping == 0x4)) - phys_addr = 36; - - size_or_mask = SIZE_OR_MASK_BITS(phys_addr); - size_and_mask = ~size_or_mask & 0xfffff00000ULL; - } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && - boot_cpu_data.x86 == 6) { - /* - * VIA C* family have Intel style MTRRs, - * but don't support PAE - */ - size_or_mask = SIZE_OR_MASK_BITS(32); - size_and_mask = 0; - phys_addr = 32; - } - } else { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - if (cpu_feature_enabled(X86_FEATURE_K6_MTRR)) { - /* Pre-Athlon (K6) AMD CPU MTRRs */ - mtrr_if = &amd_mtrr_ops; - size_or_mask = SIZE_OR_MASK_BITS(32); - size_and_mask = 0; - } - break; - case X86_VENDOR_CENTAUR: - if (cpu_feature_enabled(X86_FEATURE_CENTAUR_MCR)) { - mtrr_if = ¢aur_mtrr_ops; - size_or_mask = SIZE_OR_MASK_BITS(32); - size_and_mask = 0; - } - break; - case X86_VENDOR_CYRIX: - if (cpu_feature_enabled(X86_FEATURE_CYRIX_ARR)) { - mtrr_if = &cyrix_mtrr_ops; - size_or_mask = SIZE_OR_MASK_BITS(32); - size_and_mask = 0; - } - break; - default: - break; - } + init_table(); + mtrr_build_map(); + pr_info("MTRRs set to read-only\n"); + + return; } + if (generic_mtrrs) + mtrr_if = &generic_mtrr_ops; + else + mtrr_set_if(); + if (mtrr_enabled()) { - set_num_var_ranges(mtrr_if == &generic_mtrr_ops); + /* Get the number of variable MTRR ranges. */ + if (mtrr_if == &generic_mtrr_ops) + rdmsr(MSR_MTRRcap, config, dummy); + else + config = mtrr_if->var_regs; + num_var_ranges = config & MTRR_CAP_VCNT; + init_table(); if (mtrr_if == &generic_mtrr_ops) { /* BIOS may override */ if (get_mtrr_state()) { memory_caching_control |= CACHE_MTRR; - changed_by_mtrr_cleanup = mtrr_cleanup(phys_addr); + changed_by_mtrr_cleanup = mtrr_cleanup(); + mtrr_build_map(); } else { mtrr_if = NULL; why = "by BIOS"; @@ -730,8 +616,14 @@ void mtrr_save_state(void) smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1); } -static int __init mtrr_init_finialize(void) +static int __init mtrr_init_finalize(void) { + /* + * Map might exist if mtrr_overwrite_state() has been called or if + * mtrr_enabled() returns true. + */ + mtrr_copy_map(); + if (!mtrr_enabled()) return 0; @@ -741,16 +633,8 @@ static int __init mtrr_init_finialize(void) return 0; } - /* - * The CPU has no MTRR and seems to not support SMP. They have - * specific drivers, we use a tricky method to support - * suspend/resume for them. - * - * TBD: is there any system with such CPU which supports - * suspend/resume? If no, we should remove the code. - */ - register_syscore_ops(&mtrr_syscore_ops); + mtrr_register_syscore(); return 0; } -subsys_initcall(mtrr_init_finialize); +subsys_initcall(mtrr_init_finalize); diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 02eb5871492d..5655f253d929 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -10,10 +10,13 @@ #define MTRR_CHANGE_MASK_VARIABLE 0x02 #define MTRR_CHANGE_MASK_DEFTYPE 0x04 +extern bool mtrr_debug; +#define Dprintk(x...) do { if (mtrr_debug) pr_info(x); } while (0) + extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; struct mtrr_ops { - u32 vendor; + u32 var_regs; void (*set)(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type); void (*get)(unsigned int reg, unsigned long *base, @@ -51,18 +54,26 @@ void fill_mtrr_var_range(unsigned int index, u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); bool get_mtrr_state(void); -extern u64 size_or_mask, size_and_mask; extern const struct mtrr_ops *mtrr_if; - -#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) +extern struct mutex mtrr_mutex; extern unsigned int num_var_ranges; extern u64 mtrr_tom2; extern struct mtrr_state_type mtrr_state; +extern u32 phys_hi_rsvd; void mtrr_state_warn(void); const char *mtrr_attrib_to_str(int x); void mtrr_wrmsr(unsigned, unsigned, unsigned); +#ifdef CONFIG_X86_32 +void mtrr_set_if(void); +void mtrr_register_syscore(void); +#else +static inline void mtrr_set_if(void) { } +static inline void mtrr_register_syscore(void) { } +#endif +void mtrr_build_map(void); +void mtrr_copy_map(void); /* CPU specific mtrr_ops vectors. */ extern const struct mtrr_ops amd_mtrr_ops; @@ -70,4 +81,14 @@ extern const struct mtrr_ops cyrix_mtrr_ops; extern const struct mtrr_ops centaur_mtrr_ops; extern int changed_by_mtrr_cleanup; -extern int mtrr_cleanup(unsigned address_bits); +extern int mtrr_cleanup(void); + +/* + * Must be used by code which uses mtrr_if to call platform-specific + * MTRR manipulation functions. + */ +static inline bool mtrr_enabled(void) +{ + return !!mtrr_if; +} +void generic_rebuild_map(void); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 6ad33f355861..725344048f85 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -726,11 +726,15 @@ unlock: static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) { struct task_struct *p, *t; + pid_t pid; rcu_read_lock(); for_each_process_thread(p, t) { - if (is_closid_match(t, r) || is_rmid_match(t, r)) - seq_printf(s, "%d\n", t->pid); + if (is_closid_match(t, r) || is_rmid_match(t, r)) { + pid = task_pid_vnr(t); + if (pid) + seq_printf(s, "%d\n", pid); + } } rcu_read_unlock(); } @@ -2301,6 +2305,26 @@ static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn) } } +static void rdtgroup_kn_get(struct rdtgroup *rdtgrp, struct kernfs_node *kn) +{ + atomic_inc(&rdtgrp->waitcount); + kernfs_break_active_protection(kn); +} + +static void rdtgroup_kn_put(struct rdtgroup *rdtgrp, struct kernfs_node *kn) +{ + if (atomic_dec_and_test(&rdtgrp->waitcount) && + (rdtgrp->flags & RDT_DELETED)) { + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || + rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) + rdtgroup_pseudo_lock_remove(rdtgrp); + kernfs_unbreak_active_protection(kn); + rdtgroup_remove(rdtgrp); + } else { + kernfs_unbreak_active_protection(kn); + } +} + struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn) { struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); @@ -2308,8 +2332,7 @@ struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn) if (!rdtgrp) return NULL; - atomic_inc(&rdtgrp->waitcount); - kernfs_break_active_protection(kn); + rdtgroup_kn_get(rdtgrp, kn); mutex_lock(&rdtgroup_mutex); @@ -2328,17 +2351,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn) return; mutex_unlock(&rdtgroup_mutex); - - if (atomic_dec_and_test(&rdtgrp->waitcount) && - (rdtgrp->flags & RDT_DELETED)) { - if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || - rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) - rdtgroup_pseudo_lock_remove(rdtgrp); - kernfs_unbreak_active_protection(kn); - rdtgroup_remove(rdtgrp); - } else { - kernfs_unbreak_active_protection(kn); - } + rdtgroup_kn_put(rdtgrp, kn); } static int mkdir_mondata_all(struct kernfs_node *parent_kn, @@ -3505,6 +3518,133 @@ out: return ret; } +/** + * mongrp_reparent() - replace parent CTRL_MON group of a MON group + * @rdtgrp: the MON group whose parent should be replaced + * @new_prdtgrp: replacement parent CTRL_MON group for @rdtgrp + * @cpus: cpumask provided by the caller for use during this call + * + * Replaces the parent CTRL_MON group for a MON group, resulting in all member + * tasks' CLOSID immediately changing to that of the new parent group. + * Monitoring data for the group is unaffected by this operation. + */ +static void mongrp_reparent(struct rdtgroup *rdtgrp, + struct rdtgroup *new_prdtgrp, + cpumask_var_t cpus) +{ + struct rdtgroup *prdtgrp = rdtgrp->mon.parent; + + WARN_ON(rdtgrp->type != RDTMON_GROUP); + WARN_ON(new_prdtgrp->type != RDTCTRL_GROUP); + + /* Nothing to do when simply renaming a MON group. */ + if (prdtgrp == new_prdtgrp) + return; + + WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); + list_move_tail(&rdtgrp->mon.crdtgrp_list, + &new_prdtgrp->mon.crdtgrp_list); + + rdtgrp->mon.parent = new_prdtgrp; + rdtgrp->closid = new_prdtgrp->closid; + + /* Propagate updated closid to all tasks in this group. */ + rdt_move_group_tasks(rdtgrp, rdtgrp, cpus); + + update_closid_rmid(cpus, NULL); +} + +static int rdtgroup_rename(struct kernfs_node *kn, + struct kernfs_node *new_parent, const char *new_name) +{ + struct rdtgroup *new_prdtgrp; + struct rdtgroup *rdtgrp; + cpumask_var_t tmpmask; + int ret; + + rdtgrp = kernfs_to_rdtgroup(kn); + new_prdtgrp = kernfs_to_rdtgroup(new_parent); + if (!rdtgrp || !new_prdtgrp) + return -ENOENT; + + /* Release both kernfs active_refs before obtaining rdtgroup mutex. */ + rdtgroup_kn_get(rdtgrp, kn); + rdtgroup_kn_get(new_prdtgrp, new_parent); + + mutex_lock(&rdtgroup_mutex); + + rdt_last_cmd_clear(); + + /* + * Don't allow kernfs_to_rdtgroup() to return a parent rdtgroup if + * either kernfs_node is a file. + */ + if (kernfs_type(kn) != KERNFS_DIR || + kernfs_type(new_parent) != KERNFS_DIR) { + rdt_last_cmd_puts("Source and destination must be directories"); + ret = -EPERM; + goto out; + } + + if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) { + ret = -ENOENT; + goto out; + } + + if (rdtgrp->type != RDTMON_GROUP || !kn->parent || + !is_mon_groups(kn->parent, kn->name)) { + rdt_last_cmd_puts("Source must be a MON group\n"); + ret = -EPERM; + goto out; + } + + if (!is_mon_groups(new_parent, new_name)) { + rdt_last_cmd_puts("Destination must be a mon_groups subdirectory\n"); + ret = -EPERM; + goto out; + } + + /* + * If the MON group is monitoring CPUs, the CPUs must be assigned to the + * current parent CTRL_MON group and therefore cannot be assigned to + * the new parent, making the move illegal. + */ + if (!cpumask_empty(&rdtgrp->cpu_mask) && + rdtgrp->mon.parent != new_prdtgrp) { + rdt_last_cmd_puts("Cannot move a MON group that monitors CPUs\n"); + ret = -EPERM; + goto out; + } + + /* + * Allocate the cpumask for use in mongrp_reparent() to avoid the + * possibility of failing to allocate it after kernfs_rename() has + * succeeded. + */ + if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) { + ret = -ENOMEM; + goto out; + } + + /* + * Perform all input validation and allocations needed to ensure + * mongrp_reparent() will succeed before calling kernfs_rename(), + * otherwise it would be necessary to revert this call if + * mongrp_reparent() failed. + */ + ret = kernfs_rename(kn, new_parent, new_name); + if (!ret) + mongrp_reparent(rdtgrp, new_prdtgrp, tmpmask); + + free_cpumask_var(tmpmask); + +out: + mutex_unlock(&rdtgroup_mutex); + rdtgroup_kn_put(rdtgrp, kn); + rdtgroup_kn_put(new_prdtgrp, new_parent); + return ret; +} + static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) { if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3)) @@ -3522,6 +3662,7 @@ static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = { .mkdir = rdtgroup_mkdir, .rmdir = rdtgroup_rmdir, + .rename = rdtgroup_rename, .show_options = rdtgroup_show_options, }; diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index 2a0e90fe2abc..91fa70e51004 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -755,6 +755,7 @@ static void sgx_mmu_notifier_release(struct mmu_notifier *mn, { struct sgx_encl_mm *encl_mm = container_of(mn, struct sgx_encl_mm, mmu_notifier); struct sgx_encl_mm *tmp = NULL; + bool found = false; /* * The enclave itself can remove encl_mm. Note, objects can't be moved @@ -764,12 +765,13 @@ static void sgx_mmu_notifier_release(struct mmu_notifier *mn, list_for_each_entry(tmp, &encl_mm->encl->mm_list, list) { if (tmp == encl_mm) { list_del_rcu(&encl_mm->list); + found = true; break; } } spin_unlock(&encl_mm->encl->mm_lock); - if (tmp == encl_mm) { + if (found) { synchronize_srcu(&encl_mm->encl->srcu); mmu_notifier_put(mn); } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 5e868b62a7c4..0270925fe013 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -79,7 +79,7 @@ int detect_extended_topology_early(struct cpuinfo_x86 *c) * initial apic id, which also represents 32-bit extended x2apic id. */ c->initial_apicid = edx; - smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); + smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx)); #endif return 0; } @@ -109,7 +109,8 @@ int detect_extended_topology(struct cpuinfo_x86 *c) */ cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx); c->initial_apicid = edx; - core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); + core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); + smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx)); core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); die_level_siblings = LEVEL_MAX_SIBLINGS(ebx); pkg_mask_width = die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index 3b58d8703094..6eaf9a6bc02f 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c @@ -9,6 +9,7 @@ #include <asm/processor.h> #include <asm/desc.h> #include <asm/traps.h> +#include <asm/doublefault.h> #define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 0bf6779187dd..f18ca44c904b 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -195,7 +195,6 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, printk("%sCall Trace:\n", log_lvl); unwind_start(&state, task, regs, stack); - stack = stack ? : get_stack_pointer(task, regs); regs = unwind_get_entry_regs(&state, &partial); /* @@ -214,9 +213,13 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, * - hardirq stack * - entry stack */ - for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { + for (stack = stack ?: get_stack_pointer(task, regs); + stack; + stack = stack_info.next_sp) { const char *stack_name; + stack = PTR_ALIGN(stack, sizeof(long)); + if (get_stack_info(stack, task, &stack_info, &visit_mask)) { /* * We weren't on a valid stack. It's possible that diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h index 9fcfa5c4dad7..af5cbdd9bd29 100644 --- a/arch/x86/kernel/fpu/context.h +++ b/arch/x86/kernel/fpu/context.h @@ -57,7 +57,7 @@ static inline void fpregs_restore_userregs(void) struct fpu *fpu = ¤t->thread.fpu; int cpu = smp_processor_id(); - if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_IO_WORKER))) + if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_USER_WORKER))) return; if (!fpregs_state_valid(fpu, cpu)) { diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index caf33486dc5e..1015af1ae562 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -426,7 +426,7 @@ void kernel_fpu_begin_mask(unsigned int kfpu_mask) this_cpu_write(in_kernel_fpu, true); - if (!(current->flags & (PF_KTHREAD | PF_IO_WORKER)) && + if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER)) && !test_thread_flag(TIF_NEED_FPU_LOAD)) { set_thread_flag(TIF_NEED_FPU_LOAD); save_fpregs_to_fpstate(¤t->thread.fpu); diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 851eb13edc01..998a08f17e33 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -53,7 +53,7 @@ void fpu__init_cpu(void) fpu__init_cpu_xstate(); } -static bool fpu__probe_without_cpuid(void) +static bool __init fpu__probe_without_cpuid(void) { unsigned long cr0; u16 fsw, fcw; @@ -71,7 +71,7 @@ static bool fpu__probe_without_cpuid(void) return fsw == 0 && (fcw & 0x103f) == 0x003f; } -static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) +static void __init fpu__init_system_early_generic(void) { if (!boot_cpu_has(X86_FEATURE_CPUID) && !test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) { @@ -211,10 +211,10 @@ static void __init fpu__init_system_xstate_size_legacy(void) * Called on the boot CPU once per system bootup, to set up the initial * FPU state that is later cloned into all processes: */ -void __init fpu__init_system(struct cpuinfo_x86 *c) +void __init fpu__init_system(void) { fpstate_reset(¤t->thread.fpu); - fpu__init_system_early_generic(c); + fpu__init_system_early_generic(); /* * The FPU has to be operational for some of the diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 5e7ead52cfdb..01e8f34daf22 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -525,9 +525,6 @@ static void *addr_from_call(void *ptr) return ptr + CALL_INSN_SIZE + call.disp; } -void prepare_ftrace_return(unsigned long ip, unsigned long *parent, - unsigned long frame_pointer); - /* * If the ops->trampoline was not allocated, then it probably * has a static trampoline func, or is the ftrace caller itself. diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 10c27b4261eb..246a609f889b 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -69,6 +69,7 @@ asmlinkage __visible void __init __noreturn i386_start_kernel(void) * to the first kernel PMD. Note the upper half of each PMD or PTE are * always zero at this stage. */ +void __init mk_early_pgtbl_32(void); void __init mk_early_pgtbl_32(void) { #ifdef __pa diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 67c8ed99144b..c9318993f959 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -138,20 +138,6 @@ SYM_CODE_START(startup_32) jmp .Ldefault_entry SYM_CODE_END(startup_32) -#ifdef CONFIG_HOTPLUG_CPU -/* - * Boot CPU0 entry point. It's called from play_dead(). Everything has been set - * up already except stack. We just set up stack here. Then call - * start_secondary(). - */ -SYM_FUNC_START(start_cpu0) - movl initial_stack, %ecx - movl %ecx, %esp - call *(initial_code) -1: jmp 1b -SYM_FUNC_END(start_cpu0) -#endif - /* * Non-boot CPU entry point; entered from trampoline.S * We can't lgdt here, because lgdt itself uses a data segment, but diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index a5df3e994f04..c5b9289837dc 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -24,7 +24,9 @@ #include "../entry/calling.h" #include <asm/export.h> #include <asm/nospec-branch.h> +#include <asm/apicdef.h> #include <asm/fixmap.h> +#include <asm/smp.h> /* * We are not able to switch in one step to the final KERNEL ADDRESS SPACE @@ -77,6 +79,15 @@ SYM_CODE_START_NOALIGN(startup_64) call startup_64_setup_env popq %rsi + /* Now switch to __KERNEL_CS so IRET works reliably */ + pushq $__KERNEL_CS + leaq .Lon_kernel_cs(%rip), %rax + pushq %rax + lretq + +.Lon_kernel_cs: + UNWIND_HINT_END_OF_STACK + #ifdef CONFIG_AMD_MEM_ENCRYPT /* * Activate SEV/SME memory encryption if supported/enabled. This needs to @@ -90,15 +101,6 @@ SYM_CODE_START_NOALIGN(startup_64) popq %rsi #endif - /* Now switch to __KERNEL_CS so IRET works reliably */ - pushq $__KERNEL_CS - leaq .Lon_kernel_cs(%rip), %rax - pushq %rax - lretq - -.Lon_kernel_cs: - UNWIND_HINT_END_OF_STACK - /* Sanitize CPU configuration */ call verify_cpu @@ -234,8 +236,67 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) ANNOTATE_NOENDBR // above #ifdef CONFIG_SMP + /* + * For parallel boot, the APIC ID is read from the APIC, and then + * used to look up the CPU number. For booting a single CPU, the + * CPU number is encoded in smpboot_control. + * + * Bit 31 STARTUP_READ_APICID (Read APICID from APIC) + * Bit 0-23 CPU# if STARTUP_xx flags are not set + */ movl smpboot_control(%rip), %ecx + testl $STARTUP_READ_APICID, %ecx + jnz .Lread_apicid + /* + * No control bit set, single CPU bringup. CPU number is provided + * in bit 0-23. This is also the boot CPU case (CPU number 0). + */ + andl $(~STARTUP_PARALLEL_MASK), %ecx + jmp .Lsetup_cpu +.Lread_apicid: + /* Check whether X2APIC mode is already enabled */ + mov $MSR_IA32_APICBASE, %ecx + rdmsr + testl $X2APIC_ENABLE, %eax + jnz .Lread_apicid_msr + + /* Read the APIC ID from the fix-mapped MMIO space. */ + movq apic_mmio_base(%rip), %rcx + addq $APIC_ID, %rcx + movl (%rcx), %eax + shr $24, %eax + jmp .Llookup_AP + +.Lread_apicid_msr: + mov $APIC_X2APIC_ID_MSR, %ecx + rdmsr + +.Llookup_AP: + /* EAX contains the APIC ID of the current CPU */ + xorq %rcx, %rcx + leaq cpuid_to_apicid(%rip), %rbx + +.Lfind_cpunr: + cmpl (%rbx,%rcx,4), %eax + jz .Lsetup_cpu + inc %ecx +#ifdef CONFIG_FORCE_NR_CPUS + cmpl $NR_CPUS, %ecx +#else + cmpl nr_cpu_ids(%rip), %ecx +#endif + jb .Lfind_cpunr + + /* APIC ID not found in the table. Drop the trampoline lock and bail. */ + movq trampoline_lock(%rip), %rax + movl $0, (%rax) + +1: cli + hlt + jmp 1b + +.Lsetup_cpu: /* Get the per cpu offset for the given CPU# which is in ECX */ movq __per_cpu_offset(,%rcx,8), %rdx #else @@ -252,6 +313,16 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) movq TASK_threadsp(%rax), %rsp /* + * Now that this CPU is running on its own stack, drop the realmode + * protection. For the boot CPU the pointer is NULL! + */ + movq trampoline_lock(%rip), %rax + testq %rax, %rax + jz .Lsetup_gdt + movl $0, (%rax) + +.Lsetup_gdt: + /* * We must switch to a new descriptor in kernel space for the GDT * because soon the kernel won't have access anymore to the userspace * addresses where we're currently running on. We have to do that here @@ -375,13 +446,13 @@ SYM_CODE_END(secondary_startup_64) #include "verify_cpu.S" #include "sev_verify_cbit.S" -#ifdef CONFIG_HOTPLUG_CPU +#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_AMD_MEM_ENCRYPT) /* - * Boot CPU0 entry point. It's called from play_dead(). Everything has been set - * up already except stack. We just set up stack here. Then call - * start_secondary() via .Ljump_to_C_code. + * Entry point for soft restart of a CPU. Invoked from xxx_play_dead() for + * restarting the boot CPU or for restarting SEV guest CPUs after CPU hot + * unplug. Everything is set up already except the stack. */ -SYM_CODE_START(start_cpu0) +SYM_CODE_START(soft_restart_cpu) ANNOTATE_NOENDBR UNWIND_HINT_END_OF_STACK @@ -390,7 +461,7 @@ SYM_CODE_START(start_cpu0) movq TASK_threadsp(%rcx), %rsp jmp .Ljump_to_C_code -SYM_CODE_END(start_cpu0) +SYM_CODE_END(soft_restart_cpu) #endif #ifdef CONFIG_AMD_MEM_ENCRYPT @@ -433,6 +504,8 @@ SYM_DATA(initial_code, .quad x86_64_start_kernel) #ifdef CONFIG_AMD_MEM_ENCRYPT SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb) #endif + +SYM_DATA(trampoline_lock, .quad 0); __FINITDATA __INIT diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 766ffe3ba313..9f668d2f3d11 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -211,6 +211,13 @@ u64 arch_irq_stat_cpu(unsigned int cpu) #ifdef CONFIG_X86_MCE_THRESHOLD sum += irq_stats(cpu)->irq_threshold_count; #endif +#ifdef CONFIG_X86_HV_CALLBACK_VECTOR + sum += irq_stats(cpu)->irq_hv_callback_count; +#endif +#if IS_ENABLED(CONFIG_HYPERV) + sum += irq_stats(cpu)->irq_hv_reenlightenment_count; + sum += irq_stats(cpu)->hyperv_stimer0_count; +#endif #ifdef CONFIG_X86_MCE sum += per_cpu(mce_exception_count, cpu); sum += per_cpu(mce_poll_count, cpu); diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c index 670eb08b972a..ee4fe8cdb857 100644 --- a/arch/x86/kernel/itmt.c +++ b/arch/x86/kernel/itmt.c @@ -165,32 +165,19 @@ int arch_asym_cpu_priority(int cpu) /** * sched_set_itmt_core_prio() - Set CPU priority based on ITMT - * @prio: Priority of cpu core - * @core_cpu: The cpu number associated with the core + * @prio: Priority of @cpu + * @cpu: The CPU number * * The pstate driver will find out the max boost frequency * and call this function to set a priority proportional - * to the max boost frequency. CPU with higher boost + * to the max boost frequency. CPUs with higher boost * frequency will receive higher priority. * * No need to rebuild sched domain after updating * the CPU priorities. The sched domains have no * dependency on CPU priorities. */ -void sched_set_itmt_core_prio(int prio, int core_cpu) +void sched_set_itmt_core_prio(int prio, int cpu) { - int cpu, i = 1; - - for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) { - int smt_prio; - - /* - * Ensure that the siblings are moved to the end - * of the priority chain and only used when - * all other high priority cpus are out of capacity. - */ - smt_prio = prio * smp_num_siblings / (i * i); - per_cpu(sched_core_priority, cpu) = smt_prio; - i++; - } + per_cpu(sched_core_priority, cpu) = prio; } diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 0f35d44c56fe..fb8f52149be9 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -71,7 +71,7 @@ static int kvm_set_wallclock(const struct timespec64 *now) return -ENODEV; } -static noinstr u64 kvm_clock_read(void) +static u64 kvm_clock_read(void) { u64 ret; @@ -88,7 +88,7 @@ static u64 kvm_clock_get_cycles(struct clocksource *cs) static noinstr u64 kvm_sched_clock_read(void) { - return kvm_clock_read() - kvm_sched_clock_offset; + return pvclock_clocksource_read_nowd(this_cpu_pvti()) - kvm_sched_clock_offset; } static inline void kvm_sched_clock_init(bool stable) diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 776f4b1e395b..a0c551846b35 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -496,7 +496,7 @@ DEFINE_IDTENTRY_RAW(exc_nmi) */ sev_es_nmi_complete(); if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) - arch_atomic_long_inc(&nsp->idt_calls); + raw_atomic_long_inc(&nsp->idt_calls); if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id())) return; diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c index b348a672f71d..b525fe6d6657 100644 --- a/arch/x86/kernel/platform-quirks.c +++ b/arch/x86/kernel/platform-quirks.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/init.h> +#include <linux/pnp.h> #include <asm/setup.h> #include <asm/bios_ebda.h> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index dac41a0072ea..ff9b80a0e3e3 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -759,15 +759,26 @@ bool xen_set_default_idle(void) } #endif +struct cpumask cpus_stop_mask; + void __noreturn stop_this_cpu(void *dummy) { + struct cpuinfo_x86 *c = this_cpu_ptr(&cpu_info); + unsigned int cpu = smp_processor_id(); + local_irq_disable(); + /* - * Remove this CPU: + * Remove this CPU from the online mask and disable it + * unconditionally. This might be redundant in case that the reboot + * vector was handled late and stop_other_cpus() sent an NMI. + * + * According to SDM and APM NMIs can be accepted even after soft + * disabling the local APIC. */ - set_cpu_online(smp_processor_id(), false); + set_cpu_online(cpu, false); disable_local_APIC(); - mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); + mcheck_cpu_clear(c); /* * Use wbinvd on processors that support SME. This provides support @@ -781,8 +792,17 @@ void __noreturn stop_this_cpu(void *dummy) * Test the CPUID bit directly because the machine might've cleared * X86_FEATURE_SME due to cmdline options. */ - if (cpuid_eax(0x8000001f) & BIT(0)) + if (c->extended_cpuid_level >= 0x8000001f && (cpuid_eax(0x8000001f) & BIT(0))) native_wbinvd(); + + /* + * This brings a cache line back and dirties it, but + * native_stop_other_cpus() will overwrite cpus_stop_mask after it + * observed that all CPUs reported stop. This write will invalidate + * the related cache line on this CPU. + */ + cpumask_clear_cpu(cpu, &cpus_stop_mask); + for (;;) { /* * Use native_halt() so that memory contents don't change diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 56acf53a782a..b3f81379c2fc 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -101,11 +101,11 @@ u64 __pvclock_clocksource_read(struct pvclock_vcpu_time_info *src, bool dowd) * updating at the same time, and one of them could be slightly behind, * making the assumption that last_value always go forward fail to hold. */ - last = arch_atomic64_read(&last_value); + last = raw_atomic64_read(&last_value); do { if (ret <= last) return last; - } while (!arch_atomic64_try_cmpxchg(&last_value, &last, ret)); + } while (!raw_atomic64_try_cmpxchg(&last_value, &last, ret)); return ret; } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 16babff771bd..fd975a4a5200 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -796,7 +796,6 @@ static void __init early_reserve_memory(void) memblock_x86_reserve_range_setup_data(); - reserve_ibft_region(); reserve_bios_regions(); trim_snb_memory(); } @@ -1032,11 +1031,14 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled(EFI_BOOT)) efi_init(); + reserve_ibft_region(); dmi_setup(); /* * VMware detection requires dmi to be available, so this * needs to be done after dmi_setup(), for the boot CPU. + * For some guest types (Xen PV, SEV-SNP, TDX) it is required to be + * called before cache_bp_init() for setting up MTRR state. */ init_hypervisor_platform(); diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 3a5b0c9c4fcc..2eabccde94fb 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -12,6 +12,9 @@ #ifndef __BOOT_COMPRESSED #define error(v) pr_err(v) #define has_cpuflag(f) boot_cpu_has(f) +#else +#undef WARN +#define WARN(condition, format...) (!!(condition)) #endif /* I/O parameters for CPUID-related helpers */ @@ -991,3 +994,103 @@ static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info) cpuid_ext_range_max = fn->eax; } } + +static void pvalidate_pages(struct snp_psc_desc *desc) +{ + struct psc_entry *e; + unsigned long vaddr; + unsigned int size; + unsigned int i; + bool validate; + int rc; + + for (i = 0; i <= desc->hdr.end_entry; i++) { + e = &desc->entries[i]; + + vaddr = (unsigned long)pfn_to_kaddr(e->gfn); + size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K; + validate = e->operation == SNP_PAGE_STATE_PRIVATE; + + rc = pvalidate(vaddr, size, validate); + if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) { + unsigned long vaddr_end = vaddr + PMD_SIZE; + + for (; vaddr < vaddr_end; vaddr += PAGE_SIZE) { + rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate); + if (rc) + break; + } + } + + if (rc) { + WARN(1, "Failed to validate address 0x%lx ret %d", vaddr, rc); + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); + } + } +} + +static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc) +{ + int cur_entry, end_entry, ret = 0; + struct snp_psc_desc *data; + struct es_em_ctxt ctxt; + + vc_ghcb_invalidate(ghcb); + + /* Copy the input desc into GHCB shared buffer */ + data = (struct snp_psc_desc *)ghcb->shared_buffer; + memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc))); + + /* + * As per the GHCB specification, the hypervisor can resume the guest + * before processing all the entries. Check whether all the entries + * are processed. If not, then keep retrying. Note, the hypervisor + * will update the data memory directly to indicate the status, so + * reference the data->hdr everywhere. + * + * The strategy here is to wait for the hypervisor to change the page + * state in the RMP table before guest accesses the memory pages. If the + * page state change was not successful, then later memory access will + * result in a crash. + */ + cur_entry = data->hdr.cur_entry; + end_entry = data->hdr.end_entry; + + while (data->hdr.cur_entry <= data->hdr.end_entry) { + ghcb_set_sw_scratch(ghcb, (u64)__pa(data)); + + /* This will advance the shared buffer data points to. */ + ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0); + + /* + * Page State Change VMGEXIT can pass error code through + * exit_info_2. + */ + if (WARN(ret || ghcb->save.sw_exit_info_2, + "SNP: PSC failed ret=%d exit_info_2=%llx\n", + ret, ghcb->save.sw_exit_info_2)) { + ret = 1; + goto out; + } + + /* Verify that reserved bit is not set */ + if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) { + ret = 1; + goto out; + } + + /* + * Sanity check that entry processing is not going backwards. + * This will happen only if hypervisor is tricking us. + */ + if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry, +"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n", + end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) { + ret = 1; + goto out; + } + } + +out: + return ret; +} diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index b031244d6d2d..1ee7bed453de 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -113,13 +113,23 @@ struct ghcb_state { }; static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); -DEFINE_STATIC_KEY_FALSE(sev_es_enable_key); - static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); struct sev_config { __u64 debug : 1, - __reserved : 63; + + /* + * A flag used by __set_pages_state() that indicates when the + * per-CPU GHCB has been created and registered and thus can be + * used by the BSP instead of the early boot GHCB. + * + * For APs, the per-CPU GHCB is created before they are started + * and registered upon startup, so this flag can be used globally + * for the BSP and APs. + */ + ghcbs_initialized : 1, + + __reserved : 62; }; static struct sev_config sev_cfg __read_mostly; @@ -645,32 +655,26 @@ static u64 __init get_jump_table_addr(void) return ret; } -static void pvalidate_pages(unsigned long vaddr, unsigned int npages, bool validate) -{ - unsigned long vaddr_end; - int rc; - - vaddr = vaddr & PAGE_MASK; - vaddr_end = vaddr + (npages << PAGE_SHIFT); - - while (vaddr < vaddr_end) { - rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate); - if (WARN(rc, "Failed to validate address 0x%lx ret %d", vaddr, rc)) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); - - vaddr = vaddr + PAGE_SIZE; - } -} - -static void __init early_set_pages_state(unsigned long paddr, unsigned int npages, enum psc_op op) +static void early_set_pages_state(unsigned long vaddr, unsigned long paddr, + unsigned long npages, enum psc_op op) { unsigned long paddr_end; u64 val; + int ret; + + vaddr = vaddr & PAGE_MASK; paddr = paddr & PAGE_MASK; paddr_end = paddr + (npages << PAGE_SHIFT); while (paddr < paddr_end) { + if (op == SNP_PAGE_STATE_SHARED) { + /* Page validation must be rescinded before changing to shared */ + ret = pvalidate(vaddr, RMP_PG_SIZE_4K, false); + if (WARN(ret, "Failed to validate address 0x%lx ret %d", paddr, ret)) + goto e_term; + } + /* * Use the MSR protocol because this function can be called before * the GHCB is established. @@ -691,7 +695,15 @@ static void __init early_set_pages_state(unsigned long paddr, unsigned int npage paddr, GHCB_MSR_PSC_RESP_VAL(val))) goto e_term; - paddr = paddr + PAGE_SIZE; + if (op == SNP_PAGE_STATE_PRIVATE) { + /* Page validation must be performed after changing to private */ + ret = pvalidate(vaddr, RMP_PG_SIZE_4K, true); + if (WARN(ret, "Failed to validate address 0x%lx ret %d", paddr, ret)) + goto e_term; + } + + vaddr += PAGE_SIZE; + paddr += PAGE_SIZE; } return; @@ -701,7 +713,7 @@ e_term: } void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, - unsigned int npages) + unsigned long npages) { /* * This can be invoked in early boot while running identity mapped, so @@ -716,14 +728,11 @@ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long padd * Ask the hypervisor to mark the memory pages as private in the RMP * table. */ - early_set_pages_state(paddr, npages, SNP_PAGE_STATE_PRIVATE); - - /* Validate the memory pages after they've been added in the RMP table. */ - pvalidate_pages(vaddr, npages, true); + early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE); } void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, - unsigned int npages) + unsigned long npages) { /* * This can be invoked in early boot while running identity mapped, so @@ -734,11 +743,8 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) return; - /* Invalidate the memory pages before they are marked shared in the RMP table. */ - pvalidate_pages(vaddr, npages, false); - /* Ask hypervisor to mark the memory pages shared in the RMP table. */ - early_set_pages_state(paddr, npages, SNP_PAGE_STATE_SHARED); + early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED); } void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) @@ -756,96 +762,16 @@ void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op WARN(1, "invalid memory op %d\n", op); } -static int vmgexit_psc(struct snp_psc_desc *desc) +static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, + unsigned long vaddr_end, int op) { - int cur_entry, end_entry, ret = 0; - struct snp_psc_desc *data; struct ghcb_state state; - struct es_em_ctxt ctxt; - unsigned long flags; - struct ghcb *ghcb; - - /* - * __sev_get_ghcb() needs to run with IRQs disabled because it is using - * a per-CPU GHCB. - */ - local_irq_save(flags); - - ghcb = __sev_get_ghcb(&state); - if (!ghcb) { - ret = 1; - goto out_unlock; - } - - /* Copy the input desc into GHCB shared buffer */ - data = (struct snp_psc_desc *)ghcb->shared_buffer; - memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc))); - - /* - * As per the GHCB specification, the hypervisor can resume the guest - * before processing all the entries. Check whether all the entries - * are processed. If not, then keep retrying. Note, the hypervisor - * will update the data memory directly to indicate the status, so - * reference the data->hdr everywhere. - * - * The strategy here is to wait for the hypervisor to change the page - * state in the RMP table before guest accesses the memory pages. If the - * page state change was not successful, then later memory access will - * result in a crash. - */ - cur_entry = data->hdr.cur_entry; - end_entry = data->hdr.end_entry; - - while (data->hdr.cur_entry <= data->hdr.end_entry) { - ghcb_set_sw_scratch(ghcb, (u64)__pa(data)); - - /* This will advance the shared buffer data points to. */ - ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0); - - /* - * Page State Change VMGEXIT can pass error code through - * exit_info_2. - */ - if (WARN(ret || ghcb->save.sw_exit_info_2, - "SNP: PSC failed ret=%d exit_info_2=%llx\n", - ret, ghcb->save.sw_exit_info_2)) { - ret = 1; - goto out; - } - - /* Verify that reserved bit is not set */ - if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) { - ret = 1; - goto out; - } - - /* - * Sanity check that entry processing is not going backwards. - * This will happen only if hypervisor is tricking us. - */ - if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry, -"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n", - end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) { - ret = 1; - goto out; - } - } - -out: - __sev_put_ghcb(&state); - -out_unlock: - local_irq_restore(flags); - - return ret; -} - -static void __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, - unsigned long vaddr_end, int op) -{ + bool use_large_entry; struct psc_hdr *hdr; struct psc_entry *e; + unsigned long flags; unsigned long pfn; + struct ghcb *ghcb; int i; hdr = &data->hdr; @@ -854,74 +780,104 @@ static void __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, memset(data, 0, sizeof(*data)); i = 0; - while (vaddr < vaddr_end) { - if (is_vmalloc_addr((void *)vaddr)) + while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) { + hdr->end_entry = i; + + if (is_vmalloc_addr((void *)vaddr)) { pfn = vmalloc_to_pfn((void *)vaddr); - else + use_large_entry = false; + } else { pfn = __pa(vaddr) >> PAGE_SHIFT; + use_large_entry = true; + } e->gfn = pfn; e->operation = op; - hdr->end_entry = i; - /* - * Current SNP implementation doesn't keep track of the RMP page - * size so use 4K for simplicity. - */ - e->pagesize = RMP_PG_SIZE_4K; + if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) && + (vaddr_end - vaddr) >= PMD_SIZE) { + e->pagesize = RMP_PG_SIZE_2M; + vaddr += PMD_SIZE; + } else { + e->pagesize = RMP_PG_SIZE_4K; + vaddr += PAGE_SIZE; + } - vaddr = vaddr + PAGE_SIZE; e++; i++; } - if (vmgexit_psc(data)) + /* Page validation must be rescinded before changing to shared */ + if (op == SNP_PAGE_STATE_SHARED) + pvalidate_pages(data); + + local_irq_save(flags); + + if (sev_cfg.ghcbs_initialized) + ghcb = __sev_get_ghcb(&state); + else + ghcb = boot_ghcb; + + /* Invoke the hypervisor to perform the page state changes */ + if (!ghcb || vmgexit_psc(ghcb, data)) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); + + if (sev_cfg.ghcbs_initialized) + __sev_put_ghcb(&state); + + local_irq_restore(flags); + + /* Page validation must be performed after changing to private */ + if (op == SNP_PAGE_STATE_PRIVATE) + pvalidate_pages(data); + + return vaddr; } -static void set_pages_state(unsigned long vaddr, unsigned int npages, int op) +static void set_pages_state(unsigned long vaddr, unsigned long npages, int op) { - unsigned long vaddr_end, next_vaddr; - struct snp_psc_desc *desc; + struct snp_psc_desc desc; + unsigned long vaddr_end; - desc = kmalloc(sizeof(*desc), GFP_KERNEL_ACCOUNT); - if (!desc) - panic("SNP: failed to allocate memory for PSC descriptor\n"); + /* Use the MSR protocol when a GHCB is not available. */ + if (!boot_ghcb) + return early_set_pages_state(vaddr, __pa(vaddr), npages, op); vaddr = vaddr & PAGE_MASK; vaddr_end = vaddr + (npages << PAGE_SHIFT); - while (vaddr < vaddr_end) { - /* Calculate the last vaddr that fits in one struct snp_psc_desc. */ - next_vaddr = min_t(unsigned long, vaddr_end, - (VMGEXIT_PSC_MAX_ENTRY * PAGE_SIZE) + vaddr); - - __set_pages_state(desc, vaddr, next_vaddr, op); - - vaddr = next_vaddr; - } - - kfree(desc); + while (vaddr < vaddr_end) + vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op); } -void snp_set_memory_shared(unsigned long vaddr, unsigned int npages) +void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) return; - pvalidate_pages(vaddr, npages, false); - set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED); } -void snp_set_memory_private(unsigned long vaddr, unsigned int npages) +void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) return; set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); +} + +void snp_accept_memory(phys_addr_t start, phys_addr_t end) +{ + unsigned long vaddr; + unsigned int npages; + + if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + return; + + vaddr = (unsigned long)__va(start); + npages = (end - start) >> PAGE_SHIFT; - pvalidate_pages(vaddr, npages, true); + set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); } static int snp_set_vmsa(void *va, bool vmsa) @@ -1267,6 +1223,8 @@ void setup_ghcb(void) if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) snp_register_per_cpu_ghcb(); + sev_cfg.ghcbs_initialized = true; + return; } @@ -1328,7 +1286,7 @@ static void sev_es_play_dead(void) * If we get here, the VCPU was woken up again. Jump to CPU * startup code to get it back online. */ - start_cpu0(); + soft_restart_cpu(); } #else /* CONFIG_HOTPLUG_CPU */ #define sev_es_play_dead native_play_dead @@ -1395,9 +1353,6 @@ void __init sev_es_init_vc_handling(void) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); } - /* Enable SEV-ES special handling */ - static_branch_enable(&sev_es_enable_key); - /* Initialize per-cpu GHCB pages */ for_each_possible_cpu(cpu) { alloc_runtime_data(cpu); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 004cb30b7419..cfeec3ee877e 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -182,7 +182,7 @@ get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size, static unsigned long __ro_after_init max_frame_size; static unsigned int __ro_after_init fpu_default_state_size; -void __init init_sigframe_size(void) +static int __init init_sigframe_size(void) { fpu_default_state_size = fpu__get_fpstate_size(); @@ -194,7 +194,9 @@ void __init init_sigframe_size(void) max_frame_size = round_up(max_frame_size, FRAME_ALIGNMENT); pr_info("max sigframe size: %lu\n", max_frame_size); + return 0; } +early_initcall(init_sigframe_size); unsigned long get_sigframe_size(void) { diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 375b33ecafa2..7eb18ca7bd45 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -21,12 +21,14 @@ #include <linux/interrupt.h> #include <linux/cpu.h> #include <linux/gfp.h> +#include <linux/kexec.h> #include <asm/mtrr.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> #include <asm/proto.h> #include <asm/apic.h> +#include <asm/cpu.h> #include <asm/idtentry.h> #include <asm/nmi.h> #include <asm/mce.h> @@ -129,7 +131,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) } /* - * this function calls the 'stop' function on all other CPUs in the system. + * Disable virtualization, APIC etc. and park the CPU in a HLT loop */ DEFINE_IDTENTRY_SYSVEC(sysvec_reboot) { @@ -146,61 +148,96 @@ static int register_stop_handler(void) static void native_stop_other_cpus(int wait) { - unsigned long flags; - unsigned long timeout; + unsigned int cpu = smp_processor_id(); + unsigned long flags, timeout; if (reboot_force) return; - /* - * Use an own vector here because smp_call_function - * does lots of things not suitable in a panic situation. - */ + /* Only proceed if this is the first CPU to reach this code */ + if (atomic_cmpxchg(&stopping_cpu, -1, cpu) != -1) + return; + + /* For kexec, ensure that offline CPUs are out of MWAIT and in HLT */ + if (kexec_in_progress) + smp_kick_mwait_play_dead(); /* - * We start by using the REBOOT_VECTOR irq. - * The irq is treated as a sync point to allow critical - * regions of code on other cpus to release their spin locks - * and re-enable irqs. Jumping straight to an NMI might - * accidentally cause deadlocks with further shutdown/panic - * code. By syncing, we give the cpus up to one second to - * finish their work before we force them off with the NMI. + * 1) Send an IPI on the reboot vector to all other CPUs. + * + * The other CPUs should react on it after leaving critical + * sections and re-enabling interrupts. They might still hold + * locks, but there is nothing which can be done about that. + * + * 2) Wait for all other CPUs to report that they reached the + * HLT loop in stop_this_cpu() + * + * 3) If the system uses INIT/STARTUP for CPU bringup, then + * send all present CPUs an INIT vector, which brings them + * completely out of the way. + * + * 4) If #3 is not possible and #2 timed out send an NMI to the + * CPUs which did not yet report + * + * 5) Wait for all other CPUs to report that they reached the + * HLT loop in stop_this_cpu() + * + * #4 can obviously race against a CPU reaching the HLT loop late. + * That CPU will have reported already and the "have all CPUs + * reached HLT" condition will be true despite the fact that the + * other CPU is still handling the NMI. Again, there is no + * protection against that as "disabled" APICs still respond to + * NMIs. */ - if (num_online_cpus() > 1) { - /* did someone beat us here? */ - if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1) - return; - - /* sync above data before sending IRQ */ - wmb(); + cpumask_copy(&cpus_stop_mask, cpu_online_mask); + cpumask_clear_cpu(cpu, &cpus_stop_mask); + if (!cpumask_empty(&cpus_stop_mask)) { apic_send_IPI_allbutself(REBOOT_VECTOR); /* * Don't wait longer than a second for IPI completion. The * wait request is not checked here because that would - * prevent an NMI shutdown attempt in case that not all + * prevent an NMI/INIT shutdown in case that not all * CPUs reach shutdown state. */ timeout = USEC_PER_SEC; - while (num_online_cpus() > 1 && timeout--) + while (!cpumask_empty(&cpus_stop_mask) && timeout--) udelay(1); } - /* if the REBOOT_VECTOR didn't work, try with the NMI */ - if (num_online_cpus() > 1) { + /* + * Park all other CPUs in INIT including "offline" CPUs, if + * possible. That's a safe place where they can't resume execution + * of HLT and then execute the HLT loop from overwritten text or + * page tables. + * + * The only downside is a broadcast MCE, but up to the point where + * the kexec() kernel brought all APs online again an MCE will just + * make HLT resume and handle the MCE. The machine crashes and burns + * due to overwritten text, page tables and data. So there is a + * choice between fire and frying pan. The result is pretty much + * the same. Chose frying pan until x86 provides a sane mechanism + * to park a CPU. + */ + if (smp_park_other_cpus_in_init()) + goto done; + + /* + * If park with INIT was not possible and the REBOOT_VECTOR didn't + * take all secondary CPUs offline, try with the NMI. + */ + if (!cpumask_empty(&cpus_stop_mask)) { /* * If NMI IPI is enabled, try to register the stop handler * and send the IPI. In any case try to wait for the other * CPUs to stop. */ if (!smp_no_nmi_ipi && !register_stop_handler()) { - /* Sync above data before sending IRQ */ - wmb(); - pr_emerg("Shutting down cpus with NMI\n"); - apic_send_IPI_allbutself(NMI_VECTOR); + for_each_cpu(cpu, &cpus_stop_mask) + apic->send_IPI(cpu, NMI_VECTOR); } /* * Don't wait longer than 10 ms if the caller didn't @@ -208,14 +245,21 @@ static void native_stop_other_cpus(int wait) * one or more CPUs do not reach shutdown state. */ timeout = USEC_PER_MSEC * 10; - while (num_online_cpus() > 1 && (wait || timeout--)) + while (!cpumask_empty(&cpus_stop_mask) && (wait || timeout--)) udelay(1); } +done: local_irq_save(flags); disable_local_APIC(); mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); local_irq_restore(flags); + + /* + * Ensure that the cpus_stop_mask cache lines are invalidated on + * the other CPUs. See comment vs. SME in stop_this_cpu(). + */ + cpumask_clear(&cpus_stop_mask); } /* @@ -268,8 +312,7 @@ struct smp_ops smp_ops = { #endif .smp_send_reschedule = native_smp_send_reschedule, - .cpu_up = native_cpu_up, - .cpu_die = native_cpu_die, + .kick_ap_alive = native_kick_ap, .cpu_disable = native_cpu_disable, .play_dead = native_play_dead, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 352f0ce1ece4..ed2d51960a7d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -53,10 +53,13 @@ #include <linux/tboot.h> #include <linux/gfp.h> #include <linux/cpuidle.h> +#include <linux/kexec.h> #include <linux/numa.h> #include <linux/pgtable.h> #include <linux/overflow.h> #include <linux/stackprotector.h> +#include <linux/cpuhotplug.h> +#include <linux/mc146818rtc.h> #include <asm/acpi.h> #include <asm/cacheinfo.h> @@ -74,7 +77,7 @@ #include <asm/fpu/api.h> #include <asm/setup.h> #include <asm/uv/uv.h> -#include <linux/mc146818rtc.h> +#include <asm/microcode.h> #include <asm/i8259.h> #include <asm/misc.h> #include <asm/qspinlock.h> @@ -101,6 +104,26 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map); DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); +/* CPUs which are the primary SMT threads */ +struct cpumask __cpu_primary_thread_mask __read_mostly; + +/* Representing CPUs for which sibling maps can be computed */ +static cpumask_var_t cpu_sibling_setup_mask; + +struct mwait_cpu_dead { + unsigned int control; + unsigned int status; +}; + +#define CPUDEAD_MWAIT_WAIT 0xDEADBEEF +#define CPUDEAD_MWAIT_KEXEC_HLT 0x4A17DEAD + +/* + * Cache line aligned data for mwait_play_dead(). Separate on purpose so + * that it's unlikely to be touched by other CPUs. + */ +static DEFINE_PER_CPU_ALIGNED(struct mwait_cpu_dead, mwait_cpu_dead); + /* Logical package management. We might want to allocate that dynamically */ unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); @@ -121,7 +144,6 @@ int arch_update_cpu_topology(void) return retval; } - static unsigned int smpboot_warm_reset_vector_count; static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) @@ -154,66 +176,63 @@ static inline void smpboot_restore_warm_reset_vector(void) } -/* - * Report back to the Boot Processor during boot time or to the caller processor - * during CPU online. - */ -static void smp_callin(void) +/* Run the next set of setup steps for the upcoming CPU */ +static void ap_starting(void) { - int cpuid; + int cpuid = smp_processor_id(); - /* - * If waken up by an INIT in an 82489DX configuration - * cpu_callout_mask guarantees we don't get here before - * an INIT_deassert IPI reaches our local APIC, so it is - * now safe to touch our local APIC. - */ - cpuid = smp_processor_id(); + /* Mop up eventual mwait_play_dead() wreckage */ + this_cpu_write(mwait_cpu_dead.status, 0); + this_cpu_write(mwait_cpu_dead.control, 0); /* - * the boot CPU has finished the init stage and is spinning - * on callin_map until we finish. We are free to set up this - * CPU, first the APIC. (this is probably redundant on most - * boards) + * If woken up by an INIT in an 82489DX configuration the alive + * synchronization guarantees that the CPU does not reach this + * point before an INIT_deassert IPI reaches the local APIC, so it + * is now safe to touch the local APIC. + * + * Set up this CPU, first the APIC, which is probably redundant on + * most boards. */ apic_ap_setup(); - /* - * Save our processor parameters. Note: this information - * is needed for clock calibration. - */ + /* Save the processor parameters. */ smp_store_cpu_info(cpuid); /* * The topology information must be up to date before - * calibrate_delay() and notify_cpu_starting(). + * notify_cpu_starting(). */ - set_cpu_sibling_map(raw_smp_processor_id()); + set_cpu_sibling_map(cpuid); ap_init_aperfmperf(); - /* - * Get our bogomips. - * Update loops_per_jiffy in cpu_data. Previous call to - * smp_store_cpu_info() stored a value that is close but not as - * accurate as the value just calculated. - */ - calibrate_delay(); - cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; pr_debug("Stack at about %p\n", &cpuid); wmb(); + /* + * This runs the AP through all the cpuhp states to its target + * state CPUHP_ONLINE. + */ notify_cpu_starting(cpuid); +} +static void ap_calibrate_delay(void) +{ /* - * Allow the master to continue. + * Calibrate the delay loop and update loops_per_jiffy in cpu_data. + * smp_store_cpu_info() stored a value that is close but not as + * accurate as the value just calculated. + * + * As this is invoked after the TSC synchronization check, + * calibrate_delay_is_known() will skip the calibration routine + * when TSC is synchronized across sockets. */ - cpumask_set_cpu(cpuid, cpu_callin_mask); + calibrate_delay(); + cpu_data(smp_processor_id()).loops_per_jiffy = loops_per_jiffy; } -static int cpu0_logical_apicid; -static int enable_start_cpu0; /* * Activate a secondary processor. */ @@ -226,24 +245,63 @@ static void notrace start_secondary(void *unused) */ cr4_init(); -#ifdef CONFIG_X86_32 - /* switch away from the initial page table */ - load_cr3(swapper_pg_dir); - __flush_tlb_all(); -#endif - cpu_init_secondary(); + /* + * 32-bit specific. 64-bit reaches this code with the correct page + * table established. Yet another historical divergence. + */ + if (IS_ENABLED(CONFIG_X86_32)) { + /* switch away from the initial page table */ + load_cr3(swapper_pg_dir); + __flush_tlb_all(); + } + + cpu_init_exception_handling(); + + /* + * 32-bit systems load the microcode from the ASM startup code for + * historical reasons. + * + * On 64-bit systems load it before reaching the AP alive + * synchronization point below so it is not part of the full per + * CPU serialized bringup part when "parallel" bringup is enabled. + * + * That's even safe when hyperthreading is enabled in the CPU as + * the core code starts the primary threads first and leaves the + * secondary threads waiting for SIPI. Loading microcode on + * physical cores concurrently is a safe operation. + * + * This covers both the Intel specific issue that concurrent + * microcode loading on SMT siblings must be prohibited and the + * vendor independent issue`that microcode loading which changes + * CPUID, MSRs etc. must be strictly serialized to maintain + * software state correctness. + */ + if (IS_ENABLED(CONFIG_X86_64)) + load_ucode_ap(); + + /* + * Synchronization point with the hotplug core. Sets this CPUs + * synchronization state to ALIVE and spin-waits for the control CPU to + * release this CPU for further bringup. + */ + cpuhp_ap_sync_alive(); + + cpu_init(); + fpu__init_cpu(); rcu_cpu_starting(raw_smp_processor_id()); x86_cpuinit.early_percpu_clock_init(); - smp_callin(); - enable_start_cpu0 = 0; + ap_starting(); + + /* Check TSC synchronization with the control CPU. */ + check_tsc_sync_target(); - /* otherwise gcc will move up smp_processor_id before the cpu_init */ - barrier(); /* - * Check TSC synchronization with the boot CPU: + * Calibrate the delay loop after the TSC synchronization check. + * This allows to skip the calibration when TSC is synchronized + * across sockets. */ - check_tsc_sync_target(); + ap_calibrate_delay(); speculative_store_bypass_ht_init(); @@ -257,7 +315,6 @@ static void notrace start_secondary(void *unused) set_cpu_online(smp_processor_id(), true); lapic_online(); unlock_vector_lock(); - cpu_set_state_online(smp_processor_id()); x86_platform.nmi_init(); /* enable local interrupts */ @@ -270,15 +327,6 @@ static void notrace start_secondary(void *unused) } /** - * topology_is_primary_thread - Check whether CPU is the primary SMT thread - * @cpu: CPU to check - */ -bool topology_is_primary_thread(unsigned int cpu) -{ - return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu)); -} - -/** * topology_smt_supported - Check whether SMT is supported by the CPUs */ bool topology_smt_supported(void) @@ -288,6 +336,7 @@ bool topology_smt_supported(void) /** * topology_phys_to_logical_pkg - Map a physical package id to a logical + * @phys_pkg: The physical package id to map * * Returns logical package id or -1 if not found */ @@ -304,15 +353,17 @@ int topology_phys_to_logical_pkg(unsigned int phys_pkg) return -1; } EXPORT_SYMBOL(topology_phys_to_logical_pkg); + /** * topology_phys_to_logical_die - Map a physical die id to logical + * @die_id: The physical die id to map + * @cur_cpu: The CPU for which the mapping is done * * Returns logical die id or -1 if not found */ -int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu) +static int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu) { - int cpu; - int proc_id = cpu_data(cur_cpu).phys_proc_id; + int cpu, proc_id = cpu_data(cur_cpu).phys_proc_id; for_each_possible_cpu(cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -323,7 +374,6 @@ int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu) } return -1; } -EXPORT_SYMBOL(topology_phys_to_logical_die); /** * topology_update_package_map - Update the physical to logical package map @@ -398,7 +448,7 @@ void smp_store_cpu_info(int id) c->cpu_index = id; /* * During boot time, CPU0 has this setup already. Save the info when - * bringing up AP or offlined CPU0. + * bringing up an AP. */ identify_secondary_cpu(c); c->initialized = true; @@ -552,7 +602,7 @@ static int x86_core_flags(void) #ifdef CONFIG_SCHED_SMT static int x86_smt_flags(void) { - return cpu_smt_flags() | x86_sched_itmt_flags(); + return cpu_smt_flags(); } #endif #ifdef CONFIG_SCHED_CLUSTER @@ -563,50 +613,57 @@ static int x86_cluster_flags(void) #endif #endif -static struct sched_domain_topology_level x86_numa_in_package_topology[] = { -#ifdef CONFIG_SCHED_SMT - { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) }, -#endif -#ifdef CONFIG_SCHED_CLUSTER - { cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) }, -#endif -#ifdef CONFIG_SCHED_MC - { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) }, -#endif - { NULL, }, -}; +/* + * Set if a package/die has multiple NUMA nodes inside. + * AMD Magny-Cours, Intel Cluster-on-Die, and Intel + * Sub-NUMA Clustering have this. + */ +static bool x86_has_numa_in_package; -static struct sched_domain_topology_level x86_hybrid_topology[] = { -#ifdef CONFIG_SCHED_SMT - { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) }, -#endif -#ifdef CONFIG_SCHED_MC - { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) }, -#endif - { cpu_cpu_mask, SD_INIT_NAME(DIE) }, - { NULL, }, -}; +static struct sched_domain_topology_level x86_topology[6]; + +static void __init build_sched_topology(void) +{ + int i = 0; -static struct sched_domain_topology_level x86_topology[] = { #ifdef CONFIG_SCHED_SMT - { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) }, + x86_topology[i++] = (struct sched_domain_topology_level){ + cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) + }; #endif #ifdef CONFIG_SCHED_CLUSTER - { cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) }, + /* + * For now, skip the cluster domain on Hybrid. + */ + if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { + x86_topology[i++] = (struct sched_domain_topology_level){ + cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) + }; + } #endif #ifdef CONFIG_SCHED_MC - { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) }, + x86_topology[i++] = (struct sched_domain_topology_level){ + cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) + }; #endif - { cpu_cpu_mask, SD_INIT_NAME(DIE) }, - { NULL, }, -}; + /* + * When there is NUMA topology inside the package skip the DIE domain + * since the NUMA domains will auto-magically create the right spanning + * domains based on the SLIT. + */ + if (!x86_has_numa_in_package) { + x86_topology[i++] = (struct sched_domain_topology_level){ + cpu_cpu_mask, SD_INIT_NAME(DIE) + }; + } -/* - * Set if a package/die has multiple NUMA nodes inside. - * AMD Magny-Cours, Intel Cluster-on-Die, and Intel - * Sub-NUMA Clustering have this. - */ -static bool x86_has_numa_in_package; + /* + * There must be one trailing NULL entry left. + */ + BUG_ON(i >= ARRAY_SIZE(x86_topology)-1); + + set_sched_topology(x86_topology); +} void set_cpu_sibling_map(int cpu) { @@ -706,9 +763,9 @@ static void impress_friends(void) * Allow the user to impress friends. */ pr_debug("Before bogomips\n"); - for_each_possible_cpu(cpu) - if (cpumask_test_cpu(cpu, cpu_callout_mask)) - bogosum += cpu_data(cpu).loops_per_jiffy; + for_each_online_cpu(cpu) + bogosum += cpu_data(cpu).loops_per_jiffy; + pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n", num_online_cpus(), bogosum/(500000/HZ), @@ -795,86 +852,42 @@ static void __init smp_quirk_init_udelay(void) } /* - * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal - * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this - * won't ... remember to clear down the APIC, etc later. + * Wake up AP by INIT, INIT, STARTUP sequence. */ -int -wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip) +static void send_init_sequence(int phys_apicid) { - u32 dm = apic->dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL; - unsigned long send_status, accept_status = 0; - int maxlvt; + int maxlvt = lapic_get_maxlvt(); - /* Target chip */ - /* Boot on the stack */ - /* Kick the second */ - apic_icr_write(APIC_DM_NMI | dm, apicid); - - pr_debug("Waiting for send to finish...\n"); - send_status = safe_apic_wait_icr_idle(); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(200); + /* Be paranoid about clearing APIC errors. */ if (APIC_INTEGRATED(boot_cpu_apic_version)) { - maxlvt = lapic_get_maxlvt(); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + /* Due to the Pentium erratum 3AP. */ + if (maxlvt > 3) apic_write(APIC_ESR, 0); - accept_status = (apic_read(APIC_ESR) & 0xEF); + apic_read(APIC_ESR); } - pr_debug("NMI sent\n"); - if (send_status) - pr_err("APIC never delivered???\n"); - if (accept_status) - pr_err("APIC delivery error (%lx)\n", accept_status); + /* Assert INIT on the target CPU */ + apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT, phys_apicid); + safe_apic_wait_icr_idle(); - return (send_status | accept_status); + udelay(init_udelay); + + /* Deassert INIT on the target CPU */ + apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid); + safe_apic_wait_icr_idle(); } -static int -wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) +/* + * Wake up AP by INIT, INIT, STARTUP sequence. + */ +static int wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) { unsigned long send_status = 0, accept_status = 0; - int maxlvt, num_starts, j; + int num_starts, j, maxlvt; + preempt_disable(); maxlvt = lapic_get_maxlvt(); - - /* - * Be paranoid about clearing APIC errors. - */ - if (APIC_INTEGRATED(boot_cpu_apic_version)) { - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } - - pr_debug("Asserting INIT\n"); - - /* - * Turn INIT on target chip - */ - /* - * Send IPI - */ - apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT, - phys_apicid); - - pr_debug("Waiting for send to finish...\n"); - send_status = safe_apic_wait_icr_idle(); - - udelay(init_udelay); - - pr_debug("Deasserting INIT\n"); - - /* Target chip */ - /* Send IPI */ - apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid); - - pr_debug("Waiting for send to finish...\n"); - send_status = safe_apic_wait_icr_idle(); + send_init_sequence(phys_apicid); mb(); @@ -945,15 +958,16 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) if (accept_status) pr_err("APIC delivery error (%lx)\n", accept_status); + preempt_enable(); return (send_status | accept_status); } /* reduce the number of lines printed when booting a large cpu count system */ static void announce_cpu(int cpu, int apicid) { + static int width, node_width, first = 1; static int current_node = NUMA_NO_NODE; int node = early_cpu_to_node(cpu); - static int width, node_width; if (!width) width = num_digits(num_possible_cpus()) + 1; /* + '#' sign */ @@ -961,10 +975,10 @@ static void announce_cpu(int cpu, int apicid) if (!node_width) node_width = num_digits(num_possible_nodes()) + 1; /* + '#' */ - if (cpu == 1) - printk(KERN_INFO "x86: Booting SMP configuration:\n"); - if (system_state < SYSTEM_RUNNING) { + if (first) + pr_info("x86: Booting SMP configuration:\n"); + if (node != current_node) { if (current_node > (-1)) pr_cont("\n"); @@ -975,77 +989,16 @@ static void announce_cpu(int cpu, int apicid) } /* Add padding for the BSP */ - if (cpu == 1) + if (first) pr_cont("%*s", width + 1, " "); + first = 0; pr_cont("%*s#%d", width - num_digits(cpu), " ", cpu); - } else pr_info("Booting Node %d Processor %d APIC 0x%x\n", node, cpu, apicid); } -static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs) -{ - int cpu; - - cpu = smp_processor_id(); - if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0) - return NMI_HANDLED; - - return NMI_DONE; -} - -/* - * Wake up AP by INIT, INIT, STARTUP sequence. - * - * Instead of waiting for STARTUP after INITs, BSP will execute the BIOS - * boot-strap code which is not a desired behavior for waking up BSP. To - * void the boot-strap code, wake up CPU0 by NMI instead. - * - * This works to wake up soft offlined CPU0 only. If CPU0 is hard offlined - * (i.e. physically hot removed and then hot added), NMI won't wake it up. - * We'll change this code in the future to wake up hard offlined CPU0 if - * real platform and request are available. - */ -static int -wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid, - int *cpu0_nmi_registered) -{ - int id; - int boot_error; - - preempt_disable(); - - /* - * Wake up AP by INIT, INIT, STARTUP sequence. - */ - if (cpu) { - boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip); - goto out; - } - - /* - * Wake up BSP by nmi. - * - * Register a NMI handler to help wake up CPU0. - */ - boot_error = register_nmi_handler(NMI_LOCAL, - wakeup_cpu0_nmi, 0, "wake_cpu0"); - - if (!boot_error) { - enable_start_cpu0 = 1; - *cpu0_nmi_registered = 1; - id = apic->dest_mode_logical ? cpu0_logical_apicid : apicid; - boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip); - } - -out: - preempt_enable(); - - return boot_error; -} - int common_cpu_up(unsigned int cpu, struct task_struct *idle) { int ret; @@ -1071,17 +1024,13 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. - * Returns zero if CPU booted OK, else error code from + * Returns zero if startup was successfully sent, else error code from * ->wakeup_secondary_cpu. */ -static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, - int *cpu0_nmi_registered) +static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) { - /* start_ip had better be page-aligned! */ unsigned long start_ip = real_mode_header->trampoline_start; - - unsigned long boot_error = 0; - unsigned long timeout; + int ret; #ifdef CONFIG_X86_64 /* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */ @@ -1094,7 +1043,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, if (IS_ENABLED(CONFIG_X86_32)) { early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu); initial_stack = idle->thread.sp; - } else { + } else if (!(smpboot_control & STARTUP_PARALLEL_MASK)) { smpboot_control = cpu; } @@ -1108,7 +1057,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, * This grunge runs the startup process for * the targeted processor. */ - if (x86_platform.legacy.warm_reset) { pr_debug("Setting warm reset code and vector.\n"); @@ -1123,13 +1071,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, } } - /* - * AP might wait on cpu_callout_mask in cpu_init() with - * cpu_initialized_mask set if previous attempt to online - * it timed-out. Clear cpu_initialized_mask so that after - * INIT/SIPI it could start with a clean state. - */ - cpumask_clear_cpu(cpu, cpu_initialized_mask); smp_mb(); /* @@ -1137,66 +1078,25 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, * - Use a method from the APIC driver if one defined, with wakeup * straight to 64-bit mode preferred over wakeup to RM. * Otherwise, - * - Use an INIT boot APIC message for APs or NMI for BSP. + * - Use an INIT boot APIC message */ if (apic->wakeup_secondary_cpu_64) - boot_error = apic->wakeup_secondary_cpu_64(apicid, start_ip); + ret = apic->wakeup_secondary_cpu_64(apicid, start_ip); else if (apic->wakeup_secondary_cpu) - boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); + ret = apic->wakeup_secondary_cpu(apicid, start_ip); else - boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid, - cpu0_nmi_registered); - - if (!boot_error) { - /* - * Wait 10s total for first sign of life from AP - */ - boot_error = -1; - timeout = jiffies + 10*HZ; - while (time_before(jiffies, timeout)) { - if (cpumask_test_cpu(cpu, cpu_initialized_mask)) { - /* - * Tell AP to proceed with initialization - */ - cpumask_set_cpu(cpu, cpu_callout_mask); - boot_error = 0; - break; - } - schedule(); - } - } - - if (!boot_error) { - /* - * Wait till AP completes initial initialization - */ - while (!cpumask_test_cpu(cpu, cpu_callin_mask)) { - /* - * Allow other tasks to run while we wait for the - * AP to come online. This also gives a chance - * for the MTRR work(triggered by the AP coming online) - * to be completed in the stop machine context. - */ - schedule(); - } - } + ret = wakeup_secondary_cpu_via_init(apicid, start_ip); - if (x86_platform.legacy.warm_reset) { - /* - * Cleanup possible dangling ends... - */ - smpboot_restore_warm_reset_vector(); - } - - return boot_error; + /* If the wakeup mechanism failed, cleanup the warm reset vector */ + if (ret) + arch_cpuhp_cleanup_kick_cpu(cpu); + return ret; } -int native_cpu_up(unsigned int cpu, struct task_struct *tidle) +int native_kick_ap(unsigned int cpu, struct task_struct *tidle) { int apicid = apic->cpu_present_to_apicid(cpu); - int cpu0_nmi_registered = 0; - unsigned long flags; - int err, ret = 0; + int err; lockdep_assert_irqs_enabled(); @@ -1210,24 +1110,11 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) } /* - * Already booted CPU? - */ - if (cpumask_test_cpu(cpu, cpu_callin_mask)) { - pr_debug("do_boot_cpu %d Already started\n", cpu); - return -ENOSYS; - } - - /* * Save current MTRR state in case it was changed since early boot * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync: */ mtrr_save_state(); - /* x86 CPUs take themselves offline, so delayed offline is OK. */ - err = cpu_check_up_prepare(cpu); - if (err && err != -EBUSY) - return err; - /* the FPU context is blank, nobody can own it */ per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; @@ -1235,41 +1122,44 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) if (err) return err; - err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered); - if (err) { + err = do_boot_cpu(apicid, cpu, tidle); + if (err) pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu); - ret = -EIO; - goto unreg_nmi; - } - /* - * Check TSC synchronization with the AP (keep irqs disabled - * while doing so): - */ - local_irq_save(flags); - check_tsc_sync_source(cpu); - local_irq_restore(flags); + return err; +} - while (!cpu_online(cpu)) { - cpu_relax(); - touch_nmi_watchdog(); - } +int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle) +{ + return smp_ops.kick_ap_alive(cpu, tidle); +} -unreg_nmi: - /* - * Clean up the nmi handler. Do this after the callin and callout sync - * to avoid impact of possible long unregister time. - */ - if (cpu0_nmi_registered) - unregister_nmi_handler(NMI_LOCAL, "wake_cpu0"); +void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) +{ + /* Cleanup possible dangling ends... */ + if (smp_ops.kick_ap_alive == native_kick_ap && x86_platform.legacy.warm_reset) + smpboot_restore_warm_reset_vector(); +} - return ret; +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) +{ + if (smp_ops.cleanup_dead_cpu) + smp_ops.cleanup_dead_cpu(cpu); + + if (system_state == SYSTEM_RUNNING) + pr_info("CPU %u is now offline\n", cpu); +} + +void arch_cpuhp_sync_state_poll(void) +{ + if (smp_ops.poll_sync_state) + smp_ops.poll_sync_state(); } /** - * arch_disable_smp_support() - disables SMP support for x86 at runtime + * arch_disable_smp_support() - Disables SMP support for x86 at boottime */ -void arch_disable_smp_support(void) +void __init arch_disable_smp_support(void) { disable_ioapic_support(); } @@ -1361,14 +1251,6 @@ static void __init smp_cpu_index_default(void) } } -static void __init smp_get_logical_apicid(void) -{ - if (x2apic_mode) - cpu0_logical_apicid = apic_read(APIC_LDR); - else - cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); -} - void __init smp_prepare_cpus_common(void) { unsigned int i; @@ -1379,7 +1261,6 @@ void __init smp_prepare_cpus_common(void) * Setup boot CPU information */ smp_store_boot_cpu_info(); /* Final full version of the data */ - cpumask_copy(cpu_callin_mask, cpumask_of(0)); mb(); for_each_possible_cpu(i) { @@ -1390,18 +1271,24 @@ void __init smp_prepare_cpus_common(void) zalloc_cpumask_var(&per_cpu(cpu_l2c_shared_map, i), GFP_KERNEL); } - /* - * Set 'default' x86 topology, this matches default_topology() in that - * it has NUMA nodes as a topology level. See also - * native_smp_cpus_done(). - * - * Must be done before set_cpus_sibling_map() is ran. - */ - set_sched_topology(x86_topology); - set_cpu_sibling_map(0); } +#ifdef CONFIG_X86_64 +/* Establish whether parallel bringup can be supported. */ +bool __init arch_cpuhp_init_parallel_bringup(void) +{ + if (!x86_cpuinit.parallel_bringup) { + pr_info("Parallel CPU startup disabled by the platform\n"); + return false; + } + + smpboot_control = STARTUP_READ_APICID; + pr_debug("Parallel CPU startup enabled: 0x%08x\n", smpboot_control); + return true; +} +#endif + /* * Prepare for SMP bootup. * @max_cpus: configured maximum number of CPUs, It is a legacy parameter @@ -1431,8 +1318,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) /* Setup local timer */ x86_init.timers.setup_percpu_clockev(); - smp_get_logical_apicid(); - pr_info("CPU0: "); print_cpu_info(&cpu_data(0)); @@ -1455,6 +1340,25 @@ void arch_thaw_secondary_cpus_end(void) cache_aps_init(); } +bool smp_park_other_cpus_in_init(void) +{ + unsigned int cpu, this_cpu = smp_processor_id(); + unsigned int apicid; + + if (apic->wakeup_secondary_cpu_64 || apic->wakeup_secondary_cpu) + return false; + + for_each_present_cpu(cpu) { + if (cpu == this_cpu) + continue; + apicid = apic->cpu_present_to_apicid(cpu); + if (apicid == BAD_APICID) + continue; + send_init_sequence(apicid); + } + return true; +} + /* * Early setup to make printk work. */ @@ -1466,9 +1370,6 @@ void __init native_smp_prepare_boot_cpu(void) if (!IS_ENABLED(CONFIG_SMP)) switch_gdt_and_percpu_base(me); - /* already set me in cpu_online_mask in boot_cpu_init() */ - cpumask_set_cpu(me, cpu_callout_mask); - cpu_set_state_online(me); native_pv_lock_init(); } @@ -1490,13 +1391,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) pr_debug("Boot done\n"); calculate_max_logical_packages(); - - /* XXX for now assume numa-in-package and hybrid don't overlap */ - if (x86_has_numa_in_package) - set_sched_topology(x86_numa_in_package_topology); - if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) - set_sched_topology(x86_hybrid_topology); - + build_sched_topology(); nmi_selftest(); impress_friends(); cache_aps_init(); @@ -1592,6 +1487,12 @@ __init void prefill_possible_map(void) set_cpu_possible(i, true); } +/* correctly size the local cpu masks */ +void __init setup_cpu_local_masks(void) +{ + alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); +} + #ifdef CONFIG_HOTPLUG_CPU /* Recompute SMT state for all CPUs on offline */ @@ -1650,10 +1551,6 @@ static void remove_siblinginfo(int cpu) static void remove_cpu_from_maps(int cpu) { set_cpu_online(cpu, false); - cpumask_clear_cpu(cpu, cpu_callout_mask); - cpumask_clear_cpu(cpu, cpu_callin_mask); - /* was set by cpu_init() */ - cpumask_clear_cpu(cpu, cpu_initialized_mask); numa_remove_cpu(cpu); } @@ -1704,64 +1601,27 @@ int native_cpu_disable(void) return 0; } -int common_cpu_die(unsigned int cpu) -{ - int ret = 0; - - /* We don't do anything here: idle task is faking death itself. */ - - /* They ack this in play_dead() by setting CPU_DEAD */ - if (cpu_wait_death(cpu, 5)) { - if (system_state == SYSTEM_RUNNING) - pr_info("CPU %u is now offline\n", cpu); - } else { - pr_err("CPU %u didn't die...\n", cpu); - ret = -1; - } - - return ret; -} - -void native_cpu_die(unsigned int cpu) -{ - common_cpu_die(cpu); -} - void play_dead_common(void) { idle_task_exit(); - /* Ack it */ - (void)cpu_report_death(); - + cpuhp_ap_report_dead(); /* * With physical CPU hotplug, we should halt the cpu */ local_irq_disable(); } -/** - * cond_wakeup_cpu0 - Wake up CPU0 if needed. - * - * If NMI wants to wake up CPU0, start CPU0. - */ -void cond_wakeup_cpu0(void) -{ - if (smp_processor_id() == 0 && enable_start_cpu0) - start_cpu0(); -} -EXPORT_SYMBOL_GPL(cond_wakeup_cpu0); - /* * We need to flush the caches before going to sleep, lest we have * dirty data in our caches when we come back up. */ static inline void mwait_play_dead(void) { + struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead); unsigned int eax, ebx, ecx, edx; unsigned int highest_cstate = 0; unsigned int highest_subcstate = 0; - void *mwait_ptr; int i; if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || @@ -1796,12 +1656,9 @@ static inline void mwait_play_dead(void) (highest_subcstate - 1); } - /* - * This should be a memory location in a cache line which is - * unlikely to be touched by other processors. The actual - * content is immaterial as it is not actually modified in any way. - */ - mwait_ptr = ¤t_thread_info()->flags; + /* Set up state for the kexec() hack below */ + md->status = CPUDEAD_MWAIT_WAIT; + md->control = CPUDEAD_MWAIT_WAIT; wbinvd(); @@ -1814,13 +1671,58 @@ static inline void mwait_play_dead(void) * case where we return around the loop. */ mb(); - clflush(mwait_ptr); + clflush(md); mb(); - __monitor(mwait_ptr, 0, 0); + __monitor(md, 0, 0); mb(); __mwait(eax, 0); - cond_wakeup_cpu0(); + if (READ_ONCE(md->control) == CPUDEAD_MWAIT_KEXEC_HLT) { + /* + * Kexec is about to happen. Don't go back into mwait() as + * the kexec kernel might overwrite text and data including + * page tables and stack. So mwait() would resume when the + * monitor cache line is written to and then the CPU goes + * south due to overwritten text, page tables and stack. + * + * Note: This does _NOT_ protect against a stray MCE, NMI, + * SMI. They will resume execution at the instruction + * following the HLT instruction and run into the problem + * which this is trying to prevent. + */ + WRITE_ONCE(md->status, CPUDEAD_MWAIT_KEXEC_HLT); + while(1) + native_halt(); + } + } +} + +/* + * Kick all "offline" CPUs out of mwait on kexec(). See comment in + * mwait_play_dead(). + */ +void smp_kick_mwait_play_dead(void) +{ + u32 newstate = CPUDEAD_MWAIT_KEXEC_HLT; + struct mwait_cpu_dead *md; + unsigned int cpu, i; + + for_each_cpu_andnot(cpu, cpu_present_mask, cpu_online_mask) { + md = per_cpu_ptr(&mwait_cpu_dead, cpu); + + /* Does it sit in mwait_play_dead() ? */ + if (READ_ONCE(md->status) != CPUDEAD_MWAIT_WAIT) + continue; + + /* Wait up to 5ms */ + for (i = 0; READ_ONCE(md->status) != newstate && i < 1000; i++) { + /* Bring it out of mwait */ + WRITE_ONCE(md->control, newstate); + udelay(5); + } + + if (READ_ONCE(md->status) != newstate) + pr_err_once("CPU%u is stuck in mwait_play_dead()\n", cpu); } } @@ -1829,11 +1731,8 @@ void __noreturn hlt_play_dead(void) if (__this_cpu_read(cpu_info.x86) >= 4) wbinvd(); - while (1) { + while (1) native_halt(); - - cond_wakeup_cpu0(); - } } void native_play_dead(void) @@ -1852,12 +1751,6 @@ int native_cpu_disable(void) return -ENOSYS; } -void native_cpu_die(unsigned int cpu) -{ - /* We said "no" in __cpu_disable */ - BUG(); -} - void native_play_dead(void) { BUG(); diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index 1b83377274b8..ca004e2e4469 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -38,102 +38,12 @@ static DEFINE_PER_CPU(struct x86_cpu, cpu_devices); #ifdef CONFIG_HOTPLUG_CPU - -#ifdef CONFIG_BOOTPARAM_HOTPLUG_CPU0 -static int cpu0_hotpluggable = 1; -#else -static int cpu0_hotpluggable; -static int __init enable_cpu0_hotplug(char *str) -{ - cpu0_hotpluggable = 1; - return 1; -} - -__setup("cpu0_hotplug", enable_cpu0_hotplug); -#endif - -#ifdef CONFIG_DEBUG_HOTPLUG_CPU0 -/* - * This function offlines a CPU as early as possible and allows userspace to - * boot up without the CPU. The CPU can be onlined back by user after boot. - * - * This is only called for debugging CPU offline/online feature. - */ -int _debug_hotplug_cpu(int cpu, int action) +int arch_register_cpu(int cpu) { - int ret; - - if (!cpu_is_hotpluggable(cpu)) - return -EINVAL; + struct x86_cpu *xc = per_cpu_ptr(&cpu_devices, cpu); - switch (action) { - case 0: - ret = remove_cpu(cpu); - if (!ret) - pr_info("DEBUG_HOTPLUG_CPU0: CPU %u is now offline\n", cpu); - else - pr_debug("Can't offline CPU%d.\n", cpu); - break; - case 1: - ret = add_cpu(cpu); - if (ret) - pr_debug("Can't online CPU%d.\n", cpu); - - break; - default: - ret = -EINVAL; - } - - return ret; -} - -static int __init debug_hotplug_cpu(void) -{ - _debug_hotplug_cpu(0, 0); - return 0; -} - -late_initcall_sync(debug_hotplug_cpu); -#endif /* CONFIG_DEBUG_HOTPLUG_CPU0 */ - -int arch_register_cpu(int num) -{ - struct cpuinfo_x86 *c = &cpu_data(num); - - /* - * Currently CPU0 is only hotpluggable on Intel platforms. Other - * vendors can add hotplug support later. - * Xen PV guests don't support CPU0 hotplug at all. - */ - if (c->x86_vendor != X86_VENDOR_INTEL || - cpu_feature_enabled(X86_FEATURE_XENPV)) - cpu0_hotpluggable = 0; - - /* - * Two known BSP/CPU0 dependencies: Resume from suspend/hibernate - * depends on BSP. PIC interrupts depend on BSP. - * - * If the BSP dependencies are under control, one can tell kernel to - * enable BSP hotplug. This basically adds a control file and - * one can attempt to offline BSP. - */ - if (num == 0 && cpu0_hotpluggable) { - unsigned int irq; - /* - * We won't take down the boot processor on i386 if some - * interrupts only are able to be serviced by the BSP in PIC. - */ - for_each_active_irq(irq) { - if (!IO_APIC_IRQ(irq) && irq_has_action(irq)) { - cpu0_hotpluggable = 0; - break; - } - } - } - if (num || cpu0_hotpluggable) - per_cpu(cpu_devices, num).cpu.hotpluggable = 1; - - return register_cpu(&per_cpu(cpu_devices, num).cpu, num); + xc->cpu.hotpluggable = cpu > 0; + return register_cpu(&xc->cpu, cpu); } EXPORT_SYMBOL(arch_register_cpu); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 344698852146..3425c6a943e4 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -69,12 +69,10 @@ static int __init tsc_early_khz_setup(char *buf) } early_param("tsc_early_khz", tsc_early_khz_setup); -__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data) +__always_inline void __cyc2ns_read(struct cyc2ns_data *data) { int seq, idx; - preempt_disable_notrace(); - do { seq = this_cpu_read(cyc2ns.seq.seqcount.sequence); idx = seq & 1; @@ -86,6 +84,12 @@ __always_inline void cyc2ns_read_begin(struct cyc2ns_data *data) } while (unlikely(seq != this_cpu_read(cyc2ns.seq.seqcount.sequence))); } +__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data) +{ + preempt_disable_notrace(); + __cyc2ns_read(data); +} + __always_inline void cyc2ns_read_end(void) { preempt_enable_notrace(); @@ -115,18 +119,25 @@ __always_inline void cyc2ns_read_end(void) * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc) +static __always_inline unsigned long long __cycles_2_ns(unsigned long long cyc) { struct cyc2ns_data data; unsigned long long ns; - cyc2ns_read_begin(&data); + __cyc2ns_read(&data); ns = data.cyc2ns_offset; ns += mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift); - cyc2ns_read_end(); + return ns; +} +static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc) +{ + unsigned long long ns; + preempt_disable_notrace(); + ns = __cycles_2_ns(cyc); + preempt_enable_notrace(); return ns; } @@ -223,7 +234,7 @@ noinstr u64 native_sched_clock(void) u64 tsc_now = rdtsc(); /* return the value in ns */ - return cycles_2_ns(tsc_now); + return __cycles_2_ns(tsc_now); } /* @@ -250,7 +261,7 @@ u64 native_sched_clock_from_tsc(u64 tsc) /* We need to define a real function for sched_clock, to override the weak default version */ #ifdef CONFIG_PARAVIRT -noinstr u64 sched_clock(void) +noinstr u64 sched_clock_noinstr(void) { return paravirt_sched_clock(); } @@ -260,11 +271,20 @@ bool using_native_sched_clock(void) return static_call_query(pv_sched_clock) == native_sched_clock; } #else -u64 sched_clock(void) __attribute__((alias("native_sched_clock"))); +u64 sched_clock_noinstr(void) __attribute__((alias("native_sched_clock"))); bool using_native_sched_clock(void) { return true; } #endif +notrace u64 sched_clock(void) +{ + u64 now; + preempt_disable_notrace(); + now = sched_clock_noinstr(); + preempt_enable_notrace(); + return now; +} + int check_tsc_unstable(void) { return tsc_unstable; @@ -1598,10 +1618,7 @@ void __init tsc_init(void) #ifdef CONFIG_SMP /* - * If we have a constant TSC and are using the TSC for the delay loop, - * we can skip clock calibration if another cpu in the same socket has already - * been calibrated. This assumes that CONSTANT_TSC applies to all - * cpus in the socket - this should be a safe assumption. + * Check whether existing calibration data can be reused. */ unsigned long calibrate_delay_is_known(void) { @@ -1609,6 +1626,21 @@ unsigned long calibrate_delay_is_known(void) int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC); const struct cpumask *mask = topology_core_cpumask(cpu); + /* + * If TSC has constant frequency and TSC is synchronized across + * sockets then reuse CPU0 calibration. + */ + if (constant_tsc && !tsc_unstable) + return cpu_data(0).loops_per_jiffy; + + /* + * If TSC has constant frequency and TSC is not synchronized across + * sockets and this is not the first CPU in the socket, then reuse + * the calibration value of an already online CPU on that socket. + * + * This assumes that CONSTANT_TSC is consistent for all CPUs in a + * socket. + */ if (!constant_tsc || !mask) return 0; diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 9452dc9664b5..bbc440c93e08 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -245,7 +245,6 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu) */ static atomic_t start_count; static atomic_t stop_count; -static atomic_t skip_test; static atomic_t test_runs; /* @@ -344,21 +343,14 @@ static inline unsigned int loop_timeout(int cpu) } /* - * Source CPU calls into this - it waits for the freshly booted - * target CPU to arrive and then starts the measurement: + * The freshly booted CPU initiates this via an async SMP function call. */ -void check_tsc_sync_source(int cpu) +static void check_tsc_sync_source(void *__cpu) { + unsigned int cpu = (unsigned long)__cpu; int cpus = 2; /* - * No need to check if we already know that the TSC is not - * synchronized or if we have no TSC. - */ - if (unsynchronized_tsc()) - return; - - /* * Set the maximum number of test runs to * 1 if the CPU does not provide the TSC_ADJUST MSR * 3 if the MSR is available, so the target can try to adjust @@ -368,16 +360,9 @@ void check_tsc_sync_source(int cpu) else atomic_set(&test_runs, 3); retry: - /* - * Wait for the target to start or to skip the test: - */ - while (atomic_read(&start_count) != cpus - 1) { - if (atomic_read(&skip_test) > 0) { - atomic_set(&skip_test, 0); - return; - } + /* Wait for the target to start. */ + while (atomic_read(&start_count) != cpus - 1) cpu_relax(); - } /* * Trigger the target to continue into the measurement too: @@ -397,14 +382,14 @@ retry: if (!nr_warps) { atomic_set(&test_runs, 0); - pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n", + pr_debug("TSC synchronization [CPU#%d -> CPU#%u]: passed\n", smp_processor_id(), cpu); } else if (atomic_dec_and_test(&test_runs) || random_warps) { /* Force it to 0 if random warps brought us here */ atomic_set(&test_runs, 0); - pr_warn("TSC synchronization [CPU#%d -> CPU#%d]:\n", + pr_warn("TSC synchronization [CPU#%d -> CPU#%u]:\n", smp_processor_id(), cpu); pr_warn("Measured %Ld cycles TSC warp between CPUs, " "turning off TSC clock.\n", max_warp); @@ -457,11 +442,12 @@ void check_tsc_sync_target(void) * SoCs the TSC is frequency synchronized, but still the TSC ADJUST * register might have been wreckaged by the BIOS.. */ - if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable) { - atomic_inc(&skip_test); + if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable) return; - } + /* Kick the control CPU into the TSC synchronization function */ + smp_call_function_single(cpumask_first(cpu_online_mask), check_tsc_sync_source, + (unsigned long *)(unsigned long)cpu, 0); retry: /* * Register this CPU's participation and wait for the diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 3ac50b7298d1..7e574cf3bf8a 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -7,14 +7,23 @@ #include <asm/unwind.h> #include <asm/orc_types.h> #include <asm/orc_lookup.h> +#include <asm/orc_header.h> + +ORC_HEADER; #define orc_warn(fmt, ...) \ printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__) #define orc_warn_current(args...) \ ({ \ - if (state->task == current && !state->error) \ + static bool dumped_before; \ + if (state->task == current && !state->error) { \ orc_warn(args); \ + if (unwind_debug && !dumped_before) { \ + dumped_before = true; \ + unwind_dump(state); \ + } \ + } \ }) extern int __start_orc_unwind_ip[]; @@ -23,8 +32,49 @@ extern struct orc_entry __start_orc_unwind[]; extern struct orc_entry __stop_orc_unwind[]; static bool orc_init __ro_after_init; +static bool unwind_debug __ro_after_init; static unsigned int lookup_num_blocks __ro_after_init; +static int __init unwind_debug_cmdline(char *str) +{ + unwind_debug = true; + + return 0; +} +early_param("unwind_debug", unwind_debug_cmdline); + +static void unwind_dump(struct unwind_state *state) +{ + static bool dumped_before; + unsigned long word, *sp; + struct stack_info stack_info = {0}; + unsigned long visit_mask = 0; + + if (dumped_before) + return; + + dumped_before = true; + + printk_deferred("unwind stack type:%d next_sp:%p mask:0x%lx graph_idx:%d\n", + state->stack_info.type, state->stack_info.next_sp, + state->stack_mask, state->graph_idx); + + for (sp = __builtin_frame_address(0); sp; + sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { + if (get_stack_info(sp, state->task, &stack_info, &visit_mask)) + break; + + for (; sp < stack_info.end; sp++) { + + word = READ_ONCE_NOCHECK(*sp); + + printk_deferred("%0*lx: %0*lx (%pB)\n", BITS_PER_LONG/4, + (unsigned long)sp, BITS_PER_LONG/4, + word, (void *)word); + } + } +} + static inline unsigned long orc_ip(const int *ip) { return (unsigned long)ip + *ip; @@ -136,21 +186,6 @@ static struct orc_entry null_orc_entry = { .type = ORC_TYPE_CALL }; -#ifdef CONFIG_CALL_THUNKS -static struct orc_entry *orc_callthunk_find(unsigned long ip) -{ - if (!is_callthunk((void *)ip)) - return NULL; - - return &null_orc_entry; -} -#else -static struct orc_entry *orc_callthunk_find(unsigned long ip) -{ - return NULL; -} -#endif - /* Fake frame pointer entry -- used as a fallback for generated code */ static struct orc_entry orc_fp_entry = { .type = ORC_TYPE_CALL, @@ -203,11 +238,7 @@ static struct orc_entry *orc_find(unsigned long ip) if (orc) return orc; - orc = orc_ftrace_find(ip); - if (orc) - return orc; - - return orc_callthunk_find(ip); + return orc_ftrace_find(ip); } #ifdef CONFIG_MODULES @@ -219,7 +250,6 @@ static struct orc_entry *cur_orc_table = __start_orc_unwind; static void orc_sort_swap(void *_a, void *_b, int size) { struct orc_entry *orc_a, *orc_b; - struct orc_entry orc_tmp; int *a = _a, *b = _b, tmp; int delta = _b - _a; @@ -231,9 +261,7 @@ static void orc_sort_swap(void *_a, void *_b, int size) /* Swap the corresponding .orc_unwind entries: */ orc_a = cur_orc_table + (a - cur_orc_ip_table); orc_b = cur_orc_table + (b - cur_orc_ip_table); - orc_tmp = *orc_a; - *orc_a = *orc_b; - *orc_b = orc_tmp; + swap(*orc_a, *orc_b); } static int orc_sort_cmp(const void *_a, const void *_b) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 25f155205770..03c885d3640f 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -508,4 +508,8 @@ INIT_PER_CPU(irq_stack_backing_store); "fixed_percpu_data is not at start of per-cpu area"); #endif +#ifdef CONFIG_RETHUNK +. = ASSERT((__x86_return_thunk & 0x3f) == 0, "__x86_return_thunk not cacheline-aligned"); +#endif + #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index d82f4fa2f1bf..a37ebd3b4773 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -126,12 +126,13 @@ struct x86_init_ops x86_init __initdata = { struct x86_cpuinit_ops x86_cpuinit = { .early_percpu_clock_init = x86_init_noop, .setup_percpu_clockev = setup_secondary_APIC_clock, + .parallel_bringup = true, }; static void default_nmi_init(void) { }; -static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { } -static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; } +static bool enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return true; } +static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return true; } static bool enc_tlb_flush_required_noop(bool enc) { return false; } static bool enc_cache_flush_required_noop(void) { return false; } static bool is_private_mmio_noop(u64 addr) {return false; } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 123bf8b97a4b..0c9660a07b23 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -253,7 +253,6 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e int nent) { struct kvm_cpuid_entry2 *best; - u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent); best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT); if (best) { @@ -292,21 +291,6 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT); } - - /* - * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate - * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's - * requested XCR0 value. The enclave's XFRM must be a subset of XCRO - * at the time of EENTER, thus adjust the allowed XFRM by the guest's - * supported XCR0. Similar to XCR0 handling, FP and SSE are forced to - * '1' even on CPUs that don't support XSAVE. - */ - best = cpuid_entry2_find(entries, nent, 0x12, 0x1); - if (best) { - best->ecx &= guest_supported_xcr0 & 0xffffffff; - best->edx &= guest_supported_xcr0 >> 32; - best->ecx |= XFEATURE_MASK_FPSSE; - } } void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e542cf285b51..3c300a196bdf 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -229,6 +229,23 @@ static int kvm_recalculate_phys_map(struct kvm_apic_map *new, u32 physical_id; /* + * For simplicity, KVM always allocates enough space for all possible + * xAPIC IDs. Yell, but don't kill the VM, as KVM can continue on + * without the optimized map. + */ + if (WARN_ON_ONCE(xapic_id > new->max_apic_id)) + return -EINVAL; + + /* + * Bail if a vCPU was added and/or enabled its APIC between allocating + * the map and doing the actual calculations for the map. Note, KVM + * hardcodes the x2APIC ID to vcpu_id, i.e. there's no TOCTOU bug if + * the compiler decides to reload x2apic_id after this check. + */ + if (x2apic_id > new->max_apic_id) + return -E2BIG; + + /* * Deliberately truncate the vCPU ID when detecting a mismatched APIC * ID to avoid false positives if the vCPU ID, i.e. x2APIC ID, is a * 32-bit value. Any unwanted aliasing due to truncation results will @@ -253,8 +270,7 @@ static int kvm_recalculate_phys_map(struct kvm_apic_map *new, */ if (vcpu->kvm->arch.x2apic_format) { /* See also kvm_apic_match_physical_addr(). */ - if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) && - x2apic_id <= new->max_apic_id) + if (apic_x2apic_mode(apic) || x2apic_id > 0xff) new->phys_map[x2apic_id] = apic; if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id]) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index c8961f45e3b1..6eaa3d6994ae 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -7091,7 +7091,10 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm) */ slot = NULL; if (atomic_read(&kvm->nr_memslots_dirty_logging)) { - slot = gfn_to_memslot(kvm, sp->gfn); + struct kvm_memslots *slots; + + slots = kvm_memslots_for_spte_role(kvm, sp->role); + slot = __gfn_to_memslot(slots, sp->gfn); WARN_ON_ONCE(!slot); } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index ca32389f3c36..54089f990c8f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3510,7 +3510,7 @@ static bool svm_is_vnmi_pending(struct kvm_vcpu *vcpu) if (!is_vnmi_enabled(svm)) return false; - return !!(svm->vmcb->control.int_ctl & V_NMI_BLOCKING_MASK); + return !!(svm->vmcb->control.int_ctl & V_NMI_PENDING_MASK); } static bool svm_set_vnmi_pending(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c index 0574030b071f..2261b684a7d4 100644 --- a/arch/x86/kvm/vmx/sgx.c +++ b/arch/x86/kvm/vmx/sgx.c @@ -170,12 +170,19 @@ static int __handle_encls_ecreate(struct kvm_vcpu *vcpu, return 1; } - /* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. */ + /* + * Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. Note + * that the allowed XFRM (XFeature Request Mask) isn't strictly bound + * by the supported XCR0. FP+SSE *must* be set in XFRM, even if XSAVE + * is unsupported, i.e. even if XCR0 itself is completely unsupported. + */ if ((u32)miscselect & ~sgx_12_0->ebx || (u32)attributes & ~sgx_12_1->eax || (u32)(attributes >> 32) & ~sgx_12_1->ebx || (u32)xfrm & ~sgx_12_1->ecx || - (u32)(xfrm >> 32) & ~sgx_12_1->edx) { + (u32)(xfrm >> 32) & ~sgx_12_1->edx || + xfrm & ~(vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE) || + (xfrm & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { kvm_inject_gp(vcpu, 0); return 1; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ceb7c5e9cf9e..7f70207e8689 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1446,7 +1446,7 @@ static const u32 msrs_to_save_base[] = { #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, - MSR_IA32_SPEC_CTRL, + MSR_IA32_SPEC_CTRL, MSR_IA32_TSX_CTRL, MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH, MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK, MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B, @@ -2799,14 +2799,13 @@ static u64 read_tsc(void) static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp, int *mode) { - long v; u64 tsc_pg_val; + long v; switch (clock->vclock_mode) { case VDSO_CLOCKMODE_HVCLOCK: - tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(), - tsc_timestamp); - if (tsc_pg_val != U64_MAX) { + if (hv_read_tsc_page_tsc(hv_get_tsc_page(), + tsc_timestamp, &tsc_pg_val)) { /* TSC page valid */ *mode = VDSO_CLOCKMODE_HVCLOCK; v = (tsc_pg_val - clock->cycle_last) & @@ -7155,6 +7154,10 @@ static void kvm_probe_msr_to_save(u32 msr_index) if (!kvm_cpu_cap_has(X86_FEATURE_XFD)) return; break; + case MSR_IA32_TSX_CTRL: + if (!(kvm_get_arch_capabilities() & ARCH_CAP_TSX_CTRL_MSR)) + return; + break; default: break; } @@ -10754,6 +10757,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED; break; } + + /* Note, VM-Exits that go down the "slow" path are accounted below. */ + ++vcpu->stat.exits; } /* @@ -13155,7 +13161,7 @@ EXPORT_SYMBOL_GPL(kvm_arch_end_assignment); bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm) { - return arch_atomic_read(&kvm->arch.assigned_device_count); + return raw_atomic_read(&kvm->arch.assigned_device_count); } EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device); diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 01932af64193..ea3a28e7b613 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -61,8 +61,9 @@ ifeq ($(CONFIG_X86_32),y) lib-y += strstr_32.o lib-y += string_32.o lib-y += memmove_32.o + lib-y += cmpxchg8b_emu.o ifneq ($(CONFIG_X86_CMPXCHG64),y) - lib-y += cmpxchg8b_emu.o atomic64_386_32.o + lib-y += atomic64_386_32.o endif else obj-y += iomap_copy_64.o diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S index 33c70c0160ea..6962df315793 100644 --- a/arch/x86/lib/cmpxchg16b_emu.S +++ b/arch/x86/lib/cmpxchg16b_emu.S @@ -1,47 +1,54 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include <linux/linkage.h> #include <asm/percpu.h> +#include <asm/processor-flags.h> .text /* + * Emulate 'cmpxchg16b %gs:(%rsi)' + * * Inputs: * %rsi : memory location to compare * %rax : low 64 bits of old value * %rdx : high 64 bits of old value * %rbx : low 64 bits of new value * %rcx : high 64 bits of new value - * %al : Operation successful + * + * Notably this is not LOCK prefixed and is not safe against NMIs */ SYM_FUNC_START(this_cpu_cmpxchg16b_emu) -# -# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not -# via the ZF. Caller will access %al to get result. -# -# Note that this is only useful for a cpuops operation. Meaning that we -# do *not* have a fully atomic operation but just an operation that is -# *atomic* on a single cpu (as provided by the this_cpu_xx class of -# macros). -# pushfq cli - cmpq PER_CPU_VAR((%rsi)), %rax - jne .Lnot_same - cmpq PER_CPU_VAR(8(%rsi)), %rdx - jne .Lnot_same + /* if (*ptr == old) */ + cmpq PER_CPU_VAR(0(%rsi)), %rax + jne .Lnot_same + cmpq PER_CPU_VAR(8(%rsi)), %rdx + jne .Lnot_same - movq %rbx, PER_CPU_VAR((%rsi)) - movq %rcx, PER_CPU_VAR(8(%rsi)) + /* *ptr = new */ + movq %rbx, PER_CPU_VAR(0(%rsi)) + movq %rcx, PER_CPU_VAR(8(%rsi)) + + /* set ZF in EFLAGS to indicate success */ + orl $X86_EFLAGS_ZF, (%rsp) popfq - mov $1, %al RET .Lnot_same: + /* *ptr != old */ + + /* old = *ptr */ + movq PER_CPU_VAR(0(%rsi)), %rax + movq PER_CPU_VAR(8(%rsi)), %rdx + + /* clear ZF in EFLAGS to indicate failure */ + andl $(~X86_EFLAGS_ZF), (%rsp) + popfq - xor %al,%al RET SYM_FUNC_END(this_cpu_cmpxchg16b_emu) diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S index 6a912d58fecc..49805257b125 100644 --- a/arch/x86/lib/cmpxchg8b_emu.S +++ b/arch/x86/lib/cmpxchg8b_emu.S @@ -2,10 +2,16 @@ #include <linux/linkage.h> #include <asm/export.h> +#include <asm/percpu.h> +#include <asm/processor-flags.h> .text +#ifndef CONFIG_X86_CMPXCHG64 + /* + * Emulate 'cmpxchg8b (%esi)' on UP + * * Inputs: * %esi : memory location to compare * %eax : low 32 bits of old value @@ -15,32 +21,65 @@ */ SYM_FUNC_START(cmpxchg8b_emu) -# -# Emulate 'cmpxchg8b (%esi)' on UP except we don't -# set the whole ZF thing (caller will just compare -# eax:edx with the expected value) -# pushfl cli - cmpl (%esi), %eax - jne .Lnot_same - cmpl 4(%esi), %edx - jne .Lhalf_same + cmpl 0(%esi), %eax + jne .Lnot_same + cmpl 4(%esi), %edx + jne .Lnot_same + + movl %ebx, 0(%esi) + movl %ecx, 4(%esi) - movl %ebx, (%esi) - movl %ecx, 4(%esi) + orl $X86_EFLAGS_ZF, (%esp) popfl RET .Lnot_same: - movl (%esi), %eax -.Lhalf_same: - movl 4(%esi), %edx + movl 0(%esi), %eax + movl 4(%esi), %edx + + andl $(~X86_EFLAGS_ZF), (%esp) popfl RET SYM_FUNC_END(cmpxchg8b_emu) EXPORT_SYMBOL(cmpxchg8b_emu) + +#endif + +#ifndef CONFIG_UML + +SYM_FUNC_START(this_cpu_cmpxchg8b_emu) + + pushfl + cli + + cmpl PER_CPU_VAR(0(%esi)), %eax + jne .Lnot_same2 + cmpl PER_CPU_VAR(4(%esi)), %edx + jne .Lnot_same2 + + movl %ebx, PER_CPU_VAR(0(%esi)) + movl %ecx, PER_CPU_VAR(4(%esi)) + + orl $X86_EFLAGS_ZF, (%esp) + + popfl + RET + +.Lnot_same2: + movl PER_CPU_VAR(0(%esi)), %eax + movl PER_CPU_VAR(4(%esi)), %edx + + andl $(~X86_EFLAGS_ZF), (%esp) + + popfl + RET + +SYM_FUNC_END(this_cpu_cmpxchg8b_emu) + +#endif diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 4fc5c2de2de4..01c5de4c279b 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -7,6 +7,8 @@ */ #include <linux/linkage.h> +#include <asm/cpufeatures.h> +#include <asm/alternative.h> #include <asm/asm.h> #include <asm/export.h> @@ -29,7 +31,7 @@ */ SYM_FUNC_START(rep_movs_alternative) cmpq $64,%rcx - jae .Lunrolled + jae .Llarge cmp $8,%ecx jae .Lword @@ -65,6 +67,12 @@ SYM_FUNC_START(rep_movs_alternative) _ASM_EXTABLE_UA( 2b, .Lcopy_user_tail) _ASM_EXTABLE_UA( 3b, .Lcopy_user_tail) +.Llarge: +0: ALTERNATIVE "jmp .Lunrolled", "rep movsb", X86_FEATURE_ERMS +1: RET + + _ASM_EXTABLE_UA( 0b, 1b) + .p2align 4 .Lunrolled: 10: movq (%rsi),%r8 diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c index 50734a23034c..cea25ca8b8cf 100644 --- a/arch/x86/lib/csum-partial_64.c +++ b/arch/x86/lib/csum-partial_64.c @@ -5,22 +5,34 @@ * This file contains network checksum routines that are better done * in an architecture-specific manner due to speed. */ - + #include <linux/compiler.h> #include <linux/export.h> #include <asm/checksum.h> #include <asm/word-at-a-time.h> -static inline unsigned short from32to16(unsigned a) +static inline unsigned short from32to16(unsigned a) { - unsigned short b = a >> 16; + unsigned short b = a >> 16; asm("addw %w2,%w0\n\t" - "adcw $0,%w0\n" + "adcw $0,%w0\n" : "=r" (b) : "0" (b), "r" (a)); return b; } +static inline __wsum csum_tail(u64 temp64, int odd) +{ + unsigned int result; + + result = add32_with_carry(temp64 >> 32, temp64 & 0xffffffff); + if (unlikely(odd)) { + result = from32to16(result); + result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); + } + return (__force __wsum)result; +} + /* * Do a checksum on an arbitrary memory area. * Returns a 32bit checksum. @@ -35,7 +47,7 @@ static inline unsigned short from32to16(unsigned a) __wsum csum_partial(const void *buff, int len, __wsum sum) { u64 temp64 = (__force u64)sum; - unsigned odd, result; + unsigned odd; odd = 1 & (unsigned long) buff; if (unlikely(odd)) { @@ -47,21 +59,52 @@ __wsum csum_partial(const void *buff, int len, __wsum sum) buff++; } - while (unlikely(len >= 64)) { + /* + * len == 40 is the hot case due to IPv6 headers, but annotating it likely() + * has noticeable negative affect on codegen for all other cases with + * minimal performance benefit here. + */ + if (len == 40) { asm("addq 0*8(%[src]),%[res]\n\t" "adcq 1*8(%[src]),%[res]\n\t" "adcq 2*8(%[src]),%[res]\n\t" "adcq 3*8(%[src]),%[res]\n\t" "adcq 4*8(%[src]),%[res]\n\t" - "adcq 5*8(%[src]),%[res]\n\t" - "adcq 6*8(%[src]),%[res]\n\t" - "adcq 7*8(%[src]),%[res]\n\t" "adcq $0,%[res]" - : [res] "+r" (temp64) - : [src] "r" (buff) - : "memory"); - buff += 64; - len -= 64; + : [res] "+r"(temp64) + : [src] "r"(buff), "m"(*(const char(*)[40])buff)); + return csum_tail(temp64, odd); + } + if (unlikely(len >= 64)) { + /* + * Extra accumulators for better ILP in the loop. + */ + u64 tmp_accum, tmp_carries; + + asm("xorl %k[tmp_accum],%k[tmp_accum]\n\t" + "xorl %k[tmp_carries],%k[tmp_carries]\n\t" + "subl $64, %[len]\n\t" + "1:\n\t" + "addq 0*8(%[src]),%[res]\n\t" + "adcq 1*8(%[src]),%[res]\n\t" + "adcq 2*8(%[src]),%[res]\n\t" + "adcq 3*8(%[src]),%[res]\n\t" + "adcl $0,%k[tmp_carries]\n\t" + "addq 4*8(%[src]),%[tmp_accum]\n\t" + "adcq 5*8(%[src]),%[tmp_accum]\n\t" + "adcq 6*8(%[src]),%[tmp_accum]\n\t" + "adcq 7*8(%[src]),%[tmp_accum]\n\t" + "adcl $0,%k[tmp_carries]\n\t" + "addq $64, %[src]\n\t" + "subl $64, %[len]\n\t" + "jge 1b\n\t" + "addq %[tmp_accum],%[res]\n\t" + "adcq %[tmp_carries],%[res]\n\t" + "adcq $0,%[res]" + : [tmp_accum] "=&r"(tmp_accum), + [tmp_carries] "=&r"(tmp_carries), [res] "+r"(temp64), + [len] "+r"(len), [src] "+r"(buff) + : "m"(*(const char *)buff)); } if (len & 32) { @@ -70,45 +113,37 @@ __wsum csum_partial(const void *buff, int len, __wsum sum) "adcq 2*8(%[src]),%[res]\n\t" "adcq 3*8(%[src]),%[res]\n\t" "adcq $0,%[res]" - : [res] "+r" (temp64) - : [src] "r" (buff) - : "memory"); + : [res] "+r"(temp64) + : [src] "r"(buff), "m"(*(const char(*)[32])buff)); buff += 32; } if (len & 16) { asm("addq 0*8(%[src]),%[res]\n\t" "adcq 1*8(%[src]),%[res]\n\t" "adcq $0,%[res]" - : [res] "+r" (temp64) - : [src] "r" (buff) - : "memory"); + : [res] "+r"(temp64) + : [src] "r"(buff), "m"(*(const char(*)[16])buff)); buff += 16; } if (len & 8) { asm("addq 0*8(%[src]),%[res]\n\t" "adcq $0,%[res]" - : [res] "+r" (temp64) - : [src] "r" (buff) - : "memory"); + : [res] "+r"(temp64) + : [src] "r"(buff), "m"(*(const char(*)[8])buff)); buff += 8; } if (len & 7) { - unsigned int shift = (8 - (len & 7)) * 8; + unsigned int shift = (-len << 3) & 63; unsigned long trail; trail = (load_unaligned_zeropad(buff) << shift) >> shift; asm("addq %[trail],%[res]\n\t" "adcq $0,%[res]" - : [res] "+r" (temp64) - : [trail] "r" (trail)); + : [res] "+r"(temp64) + : [trail] "r"(trail)); } - result = add32_with_carry(temp64 >> 32, temp64 & 0xffffffff); - if (unlikely(odd)) { - result = from32to16(result); - result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); - } - return (__force __wsum)result; + return csum_tail(temp64, odd); } EXPORT_SYMBOL(csum_partial); @@ -118,6 +153,6 @@ EXPORT_SYMBOL(csum_partial); */ __sum16 ip_compute_csum(const void *buff, int len) { - return csum_fold(csum_partial(buff,len,0)); + return csum_fold(csum_partial(buff, len, 0)); } EXPORT_SYMBOL(ip_compute_csum); diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index b64a2bd1a1ef..9c63713477bb 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -143,43 +143,43 @@ SYM_FUNC_END(__get_user_nocheck_8) EXPORT_SYMBOL(__get_user_nocheck_8) -SYM_CODE_START_LOCAL(.Lbad_get_user_clac) +SYM_CODE_START_LOCAL(__get_user_handle_exception) ASM_CLAC .Lbad_get_user: xor %edx,%edx mov $(-EFAULT),%_ASM_AX RET -SYM_CODE_END(.Lbad_get_user_clac) +SYM_CODE_END(__get_user_handle_exception) #ifdef CONFIG_X86_32 -SYM_CODE_START_LOCAL(.Lbad_get_user_8_clac) +SYM_CODE_START_LOCAL(__get_user_8_handle_exception) ASM_CLAC bad_get_user_8: xor %edx,%edx xor %ecx,%ecx mov $(-EFAULT),%_ASM_AX RET -SYM_CODE_END(.Lbad_get_user_8_clac) +SYM_CODE_END(__get_user_8_handle_exception) #endif /* get_user */ - _ASM_EXTABLE(1b, .Lbad_get_user_clac) - _ASM_EXTABLE(2b, .Lbad_get_user_clac) - _ASM_EXTABLE(3b, .Lbad_get_user_clac) + _ASM_EXTABLE(1b, __get_user_handle_exception) + _ASM_EXTABLE(2b, __get_user_handle_exception) + _ASM_EXTABLE(3b, __get_user_handle_exception) #ifdef CONFIG_X86_64 - _ASM_EXTABLE(4b, .Lbad_get_user_clac) + _ASM_EXTABLE(4b, __get_user_handle_exception) #else - _ASM_EXTABLE(4b, .Lbad_get_user_8_clac) - _ASM_EXTABLE(5b, .Lbad_get_user_8_clac) + _ASM_EXTABLE(4b, __get_user_8_handle_exception) + _ASM_EXTABLE(5b, __get_user_8_handle_exception) #endif /* __get_user */ - _ASM_EXTABLE(6b, .Lbad_get_user_clac) - _ASM_EXTABLE(7b, .Lbad_get_user_clac) - _ASM_EXTABLE(8b, .Lbad_get_user_clac) + _ASM_EXTABLE(6b, __get_user_handle_exception) + _ASM_EXTABLE(7b, __get_user_handle_exception) + _ASM_EXTABLE(8b, __get_user_handle_exception) #ifdef CONFIG_X86_64 - _ASM_EXTABLE(9b, .Lbad_get_user_clac) + _ASM_EXTABLE(9b, __get_user_handle_exception) #else - _ASM_EXTABLE(9b, .Lbad_get_user_8_clac) - _ASM_EXTABLE(10b, .Lbad_get_user_8_clac) + _ASM_EXTABLE(9b, __get_user_8_handle_exception) + _ASM_EXTABLE(10b, __get_user_8_handle_exception) #endif diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 02661861e5dd..0559b206fb11 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -38,10 +38,12 @@ SYM_FUNC_START(__memmove) cmp %rdi, %r8 jg 2f - /* FSRM implies ERMS => no length checks, do the copy directly */ +#define CHECK_LEN cmp $0x20, %rdx; jb 1f +#define MEMMOVE_BYTES movq %rdx, %rcx; rep movsb; RET .Lmemmove_begin_forward: - ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM - ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS + ALTERNATIVE_2 __stringify(CHECK_LEN), \ + __stringify(CHECK_LEN; MEMMOVE_BYTES), X86_FEATURE_ERMS, \ + __stringify(MEMMOVE_BYTES), X86_FEATURE_FSRM /* * movsq instruction have many startup latency @@ -207,11 +209,6 @@ SYM_FUNC_START(__memmove) movb %r11b, (%rdi) 13: RET - -.Lmemmove_erms: - movq %rdx, %rcx - rep movsb - RET SYM_FUNC_END(__memmove) EXPORT_SYMBOL(__memmove) diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index b09cd2ad426c..47fd9bd6b91d 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -27,14 +27,14 @@ void msrs_free(struct msr *msrs) EXPORT_SYMBOL(msrs_free); /** - * Read an MSR with error handling - * + * msr_read - Read an MSR with error handling * @msr: MSR to read * @m: value to read into * * It returns read data only on success, otherwise it doesn't change the output * argument @m. * + * Return: %0 for success, otherwise an error code */ static int msr_read(u32 msr, struct msr *m) { @@ -49,10 +49,12 @@ static int msr_read(u32 msr, struct msr *m) } /** - * Write an MSR with error handling + * msr_write - Write an MSR with error handling * * @msr: MSR to write * @m: value to write + * + * Return: %0 for success, otherwise an error code */ static int msr_write(u32 msr, struct msr *m) { @@ -88,12 +90,14 @@ static inline int __flip_bit(u32 msr, u8 bit, bool set) } /** - * Set @bit in a MSR @msr. + * msr_set_bit - Set @bit in a MSR @msr. + * @msr: MSR to write + * @bit: bit number to set * - * Retval: - * < 0: An error was encountered. - * = 0: Bit was already set. - * > 0: Hardware accepted the MSR write. + * Return: + * * < 0: An error was encountered. + * * = 0: Bit was already set. + * * > 0: Hardware accepted the MSR write. */ int msr_set_bit(u32 msr, u8 bit) { @@ -101,12 +105,14 @@ int msr_set_bit(u32 msr, u8 bit) } /** - * Clear @bit in a MSR @msr. + * msr_clear_bit - Clear @bit in a MSR @msr. + * @msr: MSR to write + * @bit: bit number to clear * - * Retval: - * < 0: An error was encountered. - * = 0: Bit was already cleared. - * > 0: Hardware accepted the MSR write. + * Return: + * * < 0: An error was encountered. + * * = 0: Bit was already cleared. + * * > 0: Hardware accepted the MSR write. */ int msr_clear_bit(u32 msr, u8 bit) { diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index 3062d09a776d..1451e0c4ae22 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -131,22 +131,22 @@ SYM_FUNC_START(__put_user_nocheck_8) SYM_FUNC_END(__put_user_nocheck_8) EXPORT_SYMBOL(__put_user_nocheck_8) -SYM_CODE_START_LOCAL(.Lbad_put_user_clac) +SYM_CODE_START_LOCAL(__put_user_handle_exception) ASM_CLAC .Lbad_put_user: movl $-EFAULT,%ecx RET -SYM_CODE_END(.Lbad_put_user_clac) +SYM_CODE_END(__put_user_handle_exception) - _ASM_EXTABLE(1b, .Lbad_put_user_clac) - _ASM_EXTABLE(2b, .Lbad_put_user_clac) - _ASM_EXTABLE(3b, .Lbad_put_user_clac) - _ASM_EXTABLE(4b, .Lbad_put_user_clac) - _ASM_EXTABLE(5b, .Lbad_put_user_clac) - _ASM_EXTABLE(6b, .Lbad_put_user_clac) - _ASM_EXTABLE(7b, .Lbad_put_user_clac) - _ASM_EXTABLE(9b, .Lbad_put_user_clac) + _ASM_EXTABLE(1b, __put_user_handle_exception) + _ASM_EXTABLE(2b, __put_user_handle_exception) + _ASM_EXTABLE(3b, __put_user_handle_exception) + _ASM_EXTABLE(4b, __put_user_handle_exception) + _ASM_EXTABLE(5b, __put_user_handle_exception) + _ASM_EXTABLE(6b, __put_user_handle_exception) + _ASM_EXTABLE(7b, __put_user_handle_exception) + _ASM_EXTABLE(9b, __put_user_handle_exception) #ifdef CONFIG_X86_32 - _ASM_EXTABLE(8b, .Lbad_put_user_clac) - _ASM_EXTABLE(10b, .Lbad_put_user_clac) + _ASM_EXTABLE(8b, __put_user_handle_exception) + _ASM_EXTABLE(10b, __put_user_handle_exception) #endif diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index b3b1e376dce8..3fd066d42ec0 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -143,7 +143,7 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array) * from re-poisioning the BTB prediction. */ .align 64 - .skip 63, 0xcc + .skip 64 - (__x86_return_thunk - zen_untrain_ret), 0xcc SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) ANNOTATE_NOENDBR /* diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 003d90138e20..e9251b89a9e9 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -9,6 +9,7 @@ #include <linux/export.h> #include <linux/uaccess.h> #include <linux/highmem.h> +#include <linux/libnvdimm.h> /* * Zero Userspace diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 7fe56c594aa6..91c52ead1226 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -32,6 +32,7 @@ #include <asm/traps.h> #include <asm/user.h> #include <asm/fpu/api.h> +#include <asm/fpu/regset.h> #include "fpu_system.h" #include "fpu_emu.h" diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 2c54b76d8f84..d9efa35711ee 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -3,6 +3,7 @@ #include <linux/export.h> #include <linux/swap.h> /* for totalram_pages */ #include <linux/memblock.h> +#include <asm/numa.h> void __init set_highmem_pages_init(void) { diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 3cdac0f0055d..8192452d1d2d 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -9,6 +9,7 @@ #include <linux/sched/task.h> #include <asm/set_memory.h> +#include <asm/cpu_device_id.h> #include <asm/e820/api.h> #include <asm/init.h> #include <asm/page.h> @@ -261,6 +262,24 @@ static void __init probe_page_size_mask(void) } } +#define INTEL_MATCH(_model) { .vendor = X86_VENDOR_INTEL, \ + .family = 6, \ + .model = _model, \ + } +/* + * INVLPG may not properly flush Global entries + * on these CPUs when PCIDs are enabled. + */ +static const struct x86_cpu_id invlpg_miss_ids[] = { + INTEL_MATCH(INTEL_FAM6_ALDERLAKE ), + INTEL_MATCH(INTEL_FAM6_ALDERLAKE_L ), + INTEL_MATCH(INTEL_FAM6_ALDERLAKE_N ), + INTEL_MATCH(INTEL_FAM6_RAPTORLAKE ), + INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_P), + INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_S), + {} +}; + static void setup_pcid(void) { if (!IS_ENABLED(CONFIG_X86_64)) @@ -269,6 +288,12 @@ static void setup_pcid(void) if (!boot_cpu_has(X86_FEATURE_PCID)) return; + if (x86_match_cpu(invlpg_miss_ids)) { + pr_info("Incomplete global flushes, disabling PCID"); + setup_clear_cpu_cap(X86_FEATURE_PCID); + return; + } + if (boot_cpu_has(X86_FEATURE_PGE)) { /* * This can't be cr4_set_bits_and_update_boot() -- the diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index d4e2648a1dfb..b63403d7179d 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -45,7 +45,6 @@ #include <asm/olpc_ofw.h> #include <asm/pgalloc.h> #include <asm/sections.h> -#include <asm/paravirt.h> #include <asm/setup.h> #include <asm/set_memory.h> #include <asm/page_types.h> @@ -74,7 +73,6 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) #ifdef CONFIG_X86_PAE if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { pmd_table = (pmd_t *)alloc_low_page(); - paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); p4d = p4d_offset(pgd, 0); pud = pud_offset(p4d, 0); @@ -99,7 +97,6 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { pte_t *page_table = (pte_t *)alloc_low_page(); - paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); BUG_ON(page_table != pte_offset_kernel(pmd, 0)); } @@ -181,12 +178,10 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, set_pte(newpte + i, pte[i]); *adr = (void *)(((unsigned long)(*adr)) + PAGE_SIZE); - paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT); set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE)); BUG_ON(newpte != pte_offset_kernel(pmd, 0)); __flush_tlb_all(); - paravirt_release_pte(__pa(pte) >> PAGE_SHIFT); pte = newpte; } BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1) @@ -482,7 +477,6 @@ void __init native_pagetable_init(void) pfn, pmd, __pa(pmd), pte, __pa(pte)); pte_clear(NULL, va, pte); } - paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); paging_init(); } @@ -491,15 +485,8 @@ void __init native_pagetable_init(void) * point, we've been running on some set of pagetables constructed by * the boot process. * - * If we're booting on native hardware, this will be a pagetable - * constructed in arch/x86/kernel/head_32.S. The root of the - * pagetable will be swapper_pg_dir. - * - * If we're booting paravirtualized under a hypervisor, then there are - * more options: we may already be running PAE, and the pagetable may - * or may not be based in swapper_pg_dir. In any case, - * paravirt_pagetable_init() will set up swapper_pg_dir - * appropriately for the rest of the initialization to work. + * This will be a pagetable constructed in arch/x86/kernel/head_32.S. + * The root of the pagetable will be swapper_pg_dir. * * In general, pagetable_init() assumes that the pagetable may already * be partially populated, and so it avoids stomping on any existing diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 557f0fe25dff..37db264866b6 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -172,10 +172,10 @@ void __meminit init_trampoline_kaslr(void) set_p4d(p4d_tramp, __p4d(_KERNPG_TABLE | __pa(pud_page_tramp))); - set_pgd(&trampoline_pgd_entry, - __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp))); + trampoline_pgd_entry = + __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)); } else { - set_pgd(&trampoline_pgd_entry, - __pgd(_KERNPG_TABLE | __pa(pud_page_tramp))); + trampoline_pgd_entry = + __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)); } } diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index e0b51c09109f..54bbd5163e8d 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -319,7 +319,7 @@ static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) #endif } -static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) +static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) { /* * To maintain the security guarantees of SEV-SNP guests, make sure @@ -327,6 +327,8 @@ static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool */ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc) snp_set_memory_shared(vaddr, npages); + + return true; } /* Return true unconditionally: return value doesn't matter for the SEV side */ @@ -501,6 +503,21 @@ void __init sme_early_init(void) x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish; x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required; x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required; + + /* + * AMD-SEV-ES intercepts the RDMSR to read the X2APIC ID in the + * parallel bringup low level code. That raises #VC which cannot be + * handled there. + * It does not provide a RDMSR GHCB protocol so the early startup + * code cannot directly communicate with the secure firmware. The + * alternative solution to retrieve the APIC ID via CPUID(0xb), + * which is covered by the GHCB protocol, is not viable either + * because there is no enforcement of the CPUID(0xb) provided + * "initial" APIC ID to be the same as the real APIC ID. + * Disable parallel bootup. + */ + if (sev_status & MSR_AMD64_SEV_ES_ENABLED) + x86_cpuinit.parallel_bringup = false; } void __init mem_encrypt_free_decrypted_mem(void) diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index c6efcf559d88..bfe22fd5a1d7 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -612,7 +612,7 @@ void __init sme_enable(struct boot_params *bp) out: if (sme_me_mask) { physical_mask &= ~sme_me_mask; - cc_set_vendor(CC_VENDOR_AMD); + cc_vendor = CC_VENDOR_AMD; cc_set_mask(sme_me_mask); } } diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 7159cf787613..df4182b6449f 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -9,6 +9,7 @@ #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/seq_file.h> +#include <linux/proc_fs.h> #include <linux/debugfs.h> #include <linux/pfn.h> #include <linux/percpu.h> @@ -231,7 +232,7 @@ within_inclusive(unsigned long addr, unsigned long start, unsigned long end) * points to #2, but almost all physical-to-virtual translations point to #1. * * This is so that we can have both a directmap of all physical memory *and* - * take full advantage of the the limited (s32) immediate addressing range (2G) + * take full advantage of the limited (s32) immediate addressing range (2G) * of x86_64. * * See Documentation/arch/x86/x86_64/mm.rst for more detail. @@ -2151,7 +2152,8 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required()); /* Notify hypervisor that we are about to set/clr encryption attribute. */ - x86_platform.guest.enc_status_change_prepare(addr, numpages, enc); + if (!x86_platform.guest.enc_status_change_prepare(addr, numpages, enc)) + return -EIO; ret = __change_page_attr_set_clr(&cpa, 1); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index e4f499eb0f29..15a8009a4480 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -702,14 +702,8 @@ void p4d_clear_huge(p4d_t *p4d) * pud_set_huge - setup kernel PUD mapping * * MTRRs can override PAT memory types with 4KiB granularity. Therefore, this - * function sets up a huge page only if any of the following conditions are met: - * - * - MTRRs are disabled, or - * - * - MTRRs are enabled and the range is completely covered by a single MTRR, or - * - * - MTRRs are enabled and the corresponding MTRR memory type is WB, which - * has no effect on the requested PAT memory type. + * function sets up a huge page only if the complete range has the same MTRR + * caching mode. * * Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger * page mapping attempt fails. @@ -718,11 +712,10 @@ void p4d_clear_huge(p4d_t *p4d) */ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) { - u8 mtrr, uniform; + u8 uniform; - mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform); - if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) && - (mtrr != MTRR_TYPE_WRBACK)) + mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform); + if (!uniform) return 0; /* Bail out if we are we on a populated non-leaf entry: */ @@ -745,11 +738,10 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) */ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) { - u8 mtrr, uniform; + u8 uniform; - mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform); - if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) && - (mtrr != MTRR_TYPE_WRBACK)) { + mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform); + if (!uniform) { pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n", __func__, addr, addr + PMD_SIZE); return 0; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 1056bbf55b17..438adb695daa 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -2570,7 +2570,7 @@ out_image: } if (bpf_jit_enable > 1) - bpf_jit_dump(prog->len, proglen, pass + 1, image); + bpf_jit_dump(prog->len, proglen, pass + 1, rw_image); if (image) { if (!prog->is_func || extra_pass) { diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c index 584c25b588b4..87313701f069 100644 --- a/arch/x86/pci/ce4100.c +++ b/arch/x86/pci/ce4100.c @@ -83,7 +83,7 @@ static void ehci_reg_read(struct sim_dev_reg *reg, u32 *value) *value |= 0x100; } -void sata_revid_init(struct sim_dev_reg *reg) +static void sata_revid_init(struct sim_dev_reg *reg) { reg->sim_reg.value = 0x01060100; reg->sim_reg.mask = 0; @@ -172,7 +172,7 @@ static inline void extract_bytes(u32 *value, int reg, int len) *value &= mask; } -int bridge_read(unsigned int devfn, int reg, int len, u32 *value) +static int bridge_read(unsigned int devfn, int reg, int len, u32 *value) { u32 av_bridge_base, av_bridge_limit; int retval = 0; diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 8babce71915f..014c508e914d 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -198,7 +198,7 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) i++; } kfree(v); - return 0; + return msi_device_populate_sysfs(&dev->dev); error: if (ret == -ENOSYS) @@ -254,7 +254,7 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) dev_dbg(&dev->dev, "xen: msi --> pirq=%d --> irq=%d\n", pirq, irq); } - return 0; + return msi_device_populate_sysfs(&dev->dev); error: dev_err(&dev->dev, "Failed to create MSI%s! ret=%d!\n", @@ -346,7 +346,7 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) if (ret < 0) goto out; } - ret = 0; + ret = msi_device_populate_sysfs(&dev->dev); out: return ret; } @@ -394,6 +394,8 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev) xen_destroy_irq(msidesc->irq + i); msidesc->irq = 0; } + + msi_device_destroy_sysfs(&dev->dev); } static void xen_pv_teardown_msi_irqs(struct pci_dev *dev) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index f3f2d87cce1b..e9f99c56f3ce 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -96,6 +96,9 @@ static const unsigned long * const efi_tables[] = { #ifdef CONFIG_EFI_COCO_SECRET &efi.coco_secret, #endif +#ifdef CONFIG_UNACCEPTED_MEMORY + &efi.unaccepted, +#endif }; u64 efi_setup; /* efi setup_data physical address */ diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c index 75e3319e8bee..74ebd6882690 100644 --- a/arch/x86/platform/olpc/olpc_dt.c +++ b/arch/x86/platform/olpc/olpc_dt.c @@ -234,7 +234,7 @@ static int __init olpc_dt_compatible_match(phandle node, const char *compat) return 0; } -void __init olpc_dt_fixup(void) +static void __init olpc_dt_fixup(void) { phandle node; u32 board_rev; diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 7a4d5e911415..63230ff8cf4f 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -351,43 +351,6 @@ static int bsp_pm_callback(struct notifier_block *nb, unsigned long action, case PM_HIBERNATION_PREPARE: ret = bsp_check(); break; -#ifdef CONFIG_DEBUG_HOTPLUG_CPU0 - case PM_RESTORE_PREPARE: - /* - * When system resumes from hibernation, online CPU0 because - * 1. it's required for resume and - * 2. the CPU was online before hibernation - */ - if (!cpu_online(0)) - _debug_hotplug_cpu(0, 1); - break; - case PM_POST_RESTORE: - /* - * When a resume really happens, this code won't be called. - * - * This code is called only when user space hibernation software - * prepares for snapshot device during boot time. So we just - * call _debug_hotplug_cpu() to restore to CPU0's state prior to - * preparing the snapshot device. - * - * This works for normal boot case in our CPU0 hotplug debug - * mode, i.e. CPU0 is offline and user mode hibernation - * software initializes during boot time. - * - * If CPU0 is online and user application accesses snapshot - * device after boot time, this will offline CPU0 and user may - * see different CPU0 state before and after accessing - * the snapshot device. But hopefully this is not a case when - * user debugging CPU0 hotplug. Even if users hit this case, - * they can easily online CPU0 back. - * - * To simplify this debug code, we only consider normal boot - * case. Otherwise we need to remember CPU0's state and restore - * to that state and resolve racy conditions etc. - */ - _debug_hotplug_cpu(0, 0); - break; -#endif default: break; } diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 005324d6c76b..c2a29be35c01 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -14,6 +14,11 @@ $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY +# When profile-guided optimization is enabled, llvm emits two different +# overlapping text sections, which is not supported by kexec. Remove profile +# optimization flags. +KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS)) + # When linking purgatory.ro with -r unresolved symbols are not checked, # also link a purgatory.chk binary without -r to check for unresolved symbols. PURGATORY_LDFLAGS := -e purgatory_start -z nodefaultlib diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index af565816d2ba..788e5559549f 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -154,6 +154,9 @@ static void __init setup_real_mode(void) trampoline_header->flags = 0; + trampoline_lock = &trampoline_header->lock; + *trampoline_lock = 0; + trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); /* Map the real mode stub as virtual == physical */ diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index e38d61d6562e..c9f76fae902e 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -37,6 +37,24 @@ .text .code16 +.macro LOCK_AND_LOAD_REALMODE_ESP lock_pa=0 + /* + * Make sure only one CPU fiddles with the realmode stack + */ +.Llock_rm\@: + .if \lock_pa + lock btsl $0, pa_tr_lock + .else + lock btsl $0, tr_lock + .endif + jnc 2f + pause + jmp .Llock_rm\@ +2: + # Setup stack + movl $rm_stack_end, %esp +.endm + .balign PAGE_SIZE SYM_CODE_START(trampoline_start) cli # We should be safe anyway @@ -49,8 +67,7 @@ SYM_CODE_START(trampoline_start) mov %ax, %es mov %ax, %ss - # Setup stack - movl $rm_stack_end, %esp + LOCK_AND_LOAD_REALMODE_ESP call verify_cpu # Verify the cpu supports long mode testl %eax, %eax # Check for return code @@ -93,8 +110,7 @@ SYM_CODE_START(sev_es_trampoline_start) mov %ax, %es mov %ax, %ss - # Setup stack - movl $rm_stack_end, %esp + LOCK_AND_LOAD_REALMODE_ESP jmp .Lswitch_to_protected SYM_CODE_END(sev_es_trampoline_start) @@ -177,7 +193,7 @@ SYM_CODE_START(pa_trampoline_compat) * In compatibility mode. Prep ESP and DX for startup_32, then disable * paging and complete the switch to legacy 32-bit mode. */ - movl $rm_stack_end, %esp + LOCK_AND_LOAD_REALMODE_ESP lock_pa=1 movw $__KERNEL_DS, %dx movl $(CR0_STATE & ~X86_CR0_PG), %eax @@ -241,6 +257,7 @@ SYM_DATA_START(trampoline_header) SYM_DATA(tr_efer, .space 8) SYM_DATA(tr_cr4, .space 4) SYM_DATA(tr_flags, .space 4) + SYM_DATA(tr_lock, .space 4) SYM_DATA_END(trampoline_header) #include "trampoline_common.S" diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c index 9fd24846d094..9e9143085d19 100644 --- a/arch/x86/video/fbdev.c +++ b/arch/x86/video/fbdev.c @@ -10,6 +10,7 @@ #include <linux/pci.h> #include <linux/module.h> #include <linux/vgaarb.h> +#include <asm/fb.h> int fb_is_primary_device(struct fb_info *info) { diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index 7d7ffb9c826a..863d0d6b3edc 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -16,6 +16,8 @@ #include <asm/setup.h> #include <asm/xen/hypercall.h> +#include "xen-ops.h" + static efi_char16_t vendor[100] __initdata; static efi_system_table_t efi_systab_xen __initdata = { diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index c1cd28e915a3..a6820ca940bf 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -161,13 +161,12 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu) int rc = 0; /* - * This can happen if CPU was offlined earlier and - * offlining timed out in common_cpu_die(). + * If a CPU was offlined earlier and offlining timed out then the + * lock mechanism is still initialized. Uninit it unconditionally + * as it's safe to call even if already uninited. Interrupts and + * timer have already been handled in xen_cpu_dead_hvm(). */ - if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) { - xen_smp_intr_free(cpu); - xen_uninit_lock_cpu(cpu); - } + xen_uninit_lock_cpu(cpu); if (cpu_acpi_id(cpu) != U32_MAX) per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 093b78c8bbec..93b658248d01 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -68,6 +68,7 @@ #include <asm/reboot.h> #include <asm/hypervisor.h> #include <asm/mach_traps.h> +#include <asm/mtrr.h> #include <asm/mwait.h> #include <asm/pci_x86.h> #include <asm/cpu.h> @@ -119,6 +120,54 @@ static int __init parse_xen_msr_safe(char *str) } early_param("xen_msr_safe", parse_xen_msr_safe); +/* Get MTRR settings from Xen and put them into mtrr_state. */ +static void __init xen_set_mtrr_data(void) +{ +#ifdef CONFIG_MTRR + struct xen_platform_op op = { + .cmd = XENPF_read_memtype, + .interface_version = XENPF_INTERFACE_VERSION, + }; + unsigned int reg; + unsigned long mask; + uint32_t eax, width; + static struct mtrr_var_range var[MTRR_MAX_VAR_RANGES] __initdata; + + /* Get physical address width (only 64-bit cpus supported). */ + width = 36; + eax = cpuid_eax(0x80000000); + if ((eax >> 16) == 0x8000 && eax >= 0x80000008) { + eax = cpuid_eax(0x80000008); + width = eax & 0xff; + } + + for (reg = 0; reg < MTRR_MAX_VAR_RANGES; reg++) { + op.u.read_memtype.reg = reg; + if (HYPERVISOR_platform_op(&op)) + break; + + /* + * Only called in dom0, which has all RAM PFNs mapped at + * RAM MFNs, and all PCI space etc. is identity mapped. + * This means we can treat MFN == PFN regarding MTRR settings. + */ + var[reg].base_lo = op.u.read_memtype.type; + var[reg].base_lo |= op.u.read_memtype.mfn << PAGE_SHIFT; + var[reg].base_hi = op.u.read_memtype.mfn >> (32 - PAGE_SHIFT); + mask = ~((op.u.read_memtype.nr_mfns << PAGE_SHIFT) - 1); + mask &= (1UL << width) - 1; + if (mask) + mask |= MTRR_PHYSMASK_V; + var[reg].mask_lo = mask; + var[reg].mask_hi = mask >> 32; + } + + /* Only overwrite MTRR state if any MTRR could be got from Xen. */ + if (reg) + mtrr_overwrite_state(var, reg, MTRR_TYPE_UNCACHABLE); +#endif +} + static void __init xen_pv_init_platform(void) { /* PV guests can't operate virtio devices without grants. */ @@ -135,6 +184,11 @@ static void __init xen_pv_init_platform(void) /* pvclock is in shared info area */ xen_init_time_ops(); + + if (xen_initial_domain()) + xen_set_mtrr_data(); + else + mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK); } static void __init xen_pv_guest_late_init(void) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index b3b8d289b9ab..e0a975165de7 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -86,6 +86,22 @@ #include "mmu.h" #include "debugfs.h" +/* + * Prototypes for functions called via PV_CALLEE_SAVE_REGS_THUNK() in order + * to avoid warnings with "-Wmissing-prototypes". + */ +pteval_t xen_pte_val(pte_t pte); +pgdval_t xen_pgd_val(pgd_t pgd); +pmdval_t xen_pmd_val(pmd_t pmd); +pudval_t xen_pud_val(pud_t pud); +p4dval_t xen_p4d_val(p4d_t p4d); +pte_t xen_make_pte(pteval_t pte); +pgd_t xen_make_pgd(pgdval_t pgd); +pmd_t xen_make_pmd(pmdval_t pmd); +pud_t xen_make_pud(pudval_t pud); +p4d_t xen_make_p4d(p4dval_t p4d); +pte_t xen_make_pte_init(pteval_t pte); + #ifdef CONFIG_X86_VSYSCALL_EMULATION /* l3 pud for userspace vsyscall mapping */ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index c2be3efb2ba0..8b5cf7bb1f47 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -6,6 +6,7 @@ */ #include <linux/init.h> +#include <linux/iscsi_ibft.h> #include <linux/sched.h> #include <linux/kstrtox.h> #include <linux/mm.h> @@ -764,17 +765,26 @@ char * __init xen_memory_setup(void) BUG_ON(memmap.nr_entries == 0); xen_e820_table.nr_entries = memmap.nr_entries; - /* - * Xen won't allow a 1:1 mapping to be created to UNUSABLE - * regions, so if we're using the machine memory map leave the - * region as RAM as it is in the pseudo-physical map. - * - * UNUSABLE regions in domUs are not handled and will need - * a patch in the future. - */ - if (xen_initial_domain()) + if (xen_initial_domain()) { + /* + * Xen won't allow a 1:1 mapping to be created to UNUSABLE + * regions, so if we're using the machine memory map leave the + * region as RAM as it is in the pseudo-physical map. + * + * UNUSABLE regions in domUs are not handled and will need + * a patch in the future. + */ xen_ignore_unusable(); +#ifdef CONFIG_ISCSI_IBFT_FIND + /* Reserve 0.5 MiB to 1 MiB region so iBFT can be found */ + xen_e820_table.entries[xen_e820_table.nr_entries].addr = IBFT_START; + xen_e820_table.entries[xen_e820_table.nr_entries].size = IBFT_END - IBFT_START; + xen_e820_table.entries[xen_e820_table.nr_entries].type = E820_TYPE_RESERVED; + xen_e820_table.nr_entries++; +#endif + } + /* Make sure the Xen-supplied memory map is well-ordered. */ e820__update_table(&xen_e820_table); diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h index 22fb982ff971..c20cbb14c82b 100644 --- a/arch/x86/xen/smp.h +++ b/arch/x86/xen/smp.h @@ -2,6 +2,10 @@ #ifndef _XEN_SMP_H #ifdef CONFIG_SMP + +void asm_cpu_bringup_and_idle(void); +asmlinkage void cpu_bringup_and_idle(void); + extern void xen_send_IPI_mask(const struct cpumask *mask, int vector); extern void xen_send_IPI_mask_allbutself(const struct cpumask *mask, diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index b70afdff419c..ac95d1981cc0 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -55,18 +55,16 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) } #ifdef CONFIG_HOTPLUG_CPU -static void xen_hvm_cpu_die(unsigned int cpu) +static void xen_hvm_cleanup_dead_cpu(unsigned int cpu) { - if (common_cpu_die(cpu) == 0) { - if (xen_have_vector_callback) { - xen_smp_intr_free(cpu); - xen_uninit_lock_cpu(cpu); - xen_teardown_timer(cpu); - } + if (xen_have_vector_callback) { + xen_smp_intr_free(cpu); + xen_uninit_lock_cpu(cpu); + xen_teardown_timer(cpu); } } #else -static void xen_hvm_cpu_die(unsigned int cpu) +static void xen_hvm_cleanup_dead_cpu(unsigned int cpu) { BUG(); } @@ -77,7 +75,7 @@ void __init xen_hvm_smp_init(void) smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu; smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; smp_ops.smp_cpus_done = xen_smp_cpus_done; - smp_ops.cpu_die = xen_hvm_cpu_die; + smp_ops.cleanup_dead_cpu = xen_hvm_cleanup_dead_cpu; if (!xen_have_vector_callback) { #ifdef CONFIG_PARAVIRT_SPINLOCKS diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index a9cf8c8fa074..d5ae5de2daa2 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -55,13 +55,13 @@ static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 }; static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 }; static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id); -void asm_cpu_bringup_and_idle(void); static void cpu_bringup(void) { int cpu; cr4_init(); + cpuhp_ap_sync_alive(); cpu_init(); touch_softlockup_watchdog(); @@ -83,7 +83,7 @@ static void cpu_bringup(void) set_cpu_online(cpu, true); - cpu_set_state_online(cpu); /* Implies full memory barrier. */ + smp_mb(); /* We can take interrupts now: we're officially "up". */ local_irq_enable(); @@ -254,15 +254,12 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) struct desc_struct *gdt; unsigned long gdt_mfn; - /* used to tell cpu_init() that it can proceed with initialization */ - cpumask_set_cpu(cpu, cpu_callout_mask); if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) return 0; ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (ctxt == NULL) { cpumask_clear_cpu(cpu, xen_cpu_initialized_map); - cpumask_clear_cpu(cpu, cpu_callout_mask); return -ENOMEM; } @@ -316,7 +313,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) return 0; } -static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle) +static int xen_pv_kick_ap(unsigned int cpu, struct task_struct *idle) { int rc; @@ -326,14 +323,6 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle) xen_setup_runstate_info(cpu); - /* - * PV VCPUs are always successfully taken down (see 'while' loop - * in xen_cpu_die()), so -EBUSY is an error. - */ - rc = cpu_check_up_prepare(cpu); - if (rc) - return rc; - /* make sure interrupts start blocked */ per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; @@ -343,15 +332,20 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle) xen_pmu_init(cpu); - rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL); - BUG_ON(rc); - - while (cpu_report_state(cpu) != CPU_ONLINE) - HYPERVISOR_sched_op(SCHEDOP_yield, NULL); + /* + * Why is this a BUG? If the hypercall fails then everything can be + * rolled back, no? + */ + BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)); return 0; } +static void xen_pv_poll_sync_state(void) +{ + HYPERVISOR_sched_op(SCHEDOP_yield, NULL); +} + #ifdef CONFIG_HOTPLUG_CPU static int xen_pv_cpu_disable(void) { @@ -367,18 +361,18 @@ static int xen_pv_cpu_disable(void) static void xen_pv_cpu_die(unsigned int cpu) { - while (HYPERVISOR_vcpu_op(VCPUOP_is_up, - xen_vcpu_nr(cpu), NULL)) { + while (HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu), NULL)) { __set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(HZ/10); } +} - if (common_cpu_die(cpu) == 0) { - xen_smp_intr_free(cpu); - xen_uninit_lock_cpu(cpu); - xen_teardown_timer(cpu); - xen_pmu_finish(cpu); - } +static void xen_pv_cleanup_dead_cpu(unsigned int cpu) +{ + xen_smp_intr_free(cpu); + xen_uninit_lock_cpu(cpu); + xen_teardown_timer(cpu); + xen_pmu_finish(cpu); } static void __noreturn xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */ @@ -400,6 +394,11 @@ static void xen_pv_cpu_die(unsigned int cpu) BUG(); } +static void xen_pv_cleanup_dead_cpu(unsigned int cpu) +{ + BUG(); +} + static void __noreturn xen_pv_play_dead(void) { BUG(); @@ -438,8 +437,10 @@ static const struct smp_ops xen_smp_ops __initconst = { .smp_prepare_cpus = xen_pv_smp_prepare_cpus, .smp_cpus_done = xen_smp_cpus_done, - .cpu_up = xen_pv_cpu_up, + .kick_ap_alive = xen_pv_kick_ap, .cpu_die = xen_pv_cpu_die, + .cleanup_dead_cpu = xen_pv_cleanup_dead_cpu, + .poll_sync_state = xen_pv_poll_sync_state, .cpu_disable = xen_pv_cpu_disable, .play_dead = xen_pv_play_dead, diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index b74ac2562cfb..52fa5609b7f6 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -66,11 +66,10 @@ static noinstr u64 xen_sched_clock(void) struct pvclock_vcpu_time_info *src; u64 ret; - preempt_disable_notrace(); src = &__this_cpu_read(xen_vcpu)->time; ret = pvclock_clocksource_read_nowd(src); ret -= xen_sched_clock_offset; - preempt_enable_notrace(); + return ret; } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index a10903785a33..408a2aa66c69 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -72,8 +72,6 @@ void xen_restore_time_memory_area(void); void xen_init_time_ops(void); void xen_hvm_init_time_ops(void); -irqreturn_t xen_debug_interrupt(int irq, void *dev_id); - bool xen_vcpu_stolen(int vcpu); void xen_vcpu_setup(int cpu); @@ -148,9 +146,12 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int), void xen_pin_vcpu(int cpu); void xen_emergency_restart(void); +void xen_force_evtchn_callback(void); + #ifdef CONFIG_XEN_PV void xen_pv_pre_suspend(void); void xen_pv_post_suspend(int suspend_cancelled); +void xen_start_kernel(struct start_info *si); #else static inline void xen_pv_pre_suspend(void) {} static inline void xen_pv_post_suspend(int suspend_cancelled) {} diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 3c6e5471f025..c1bcfc2a8581 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -16,7 +16,6 @@ config XTENSA select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS - select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS @@ -35,6 +34,7 @@ config XTENSA select HAVE_ARCH_KCSAN select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK + select HAVE_ASM_MODVERSIONS select HAVE_CONTEXT_TRACKING_USER select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS @@ -203,6 +203,18 @@ config XTENSA_UNALIGNED_USER Say Y here to enable unaligned memory access in user space. +config XTENSA_LOAD_STORE + bool "Load/store exception handler for memory only readable with l32" + help + The Xtensa architecture only allows reading memory attached to its + instruction bus with l32r and l32i instructions, all other + instructions raise an exception with the LoadStoreErrorCause code. + This makes it hard to use some configurations, e.g. store string + literals in FLASH memory attached to the instruction bus. + + Say Y here to enable exception handler that allows transparent + byte and 2-byte access to memory attached to instruction bus. + config HAVE_SMP bool "System Supports SMP (MX)" depends on XTENSA_VARIANT_CUSTOM diff --git a/arch/xtensa/Kconfig.debug b/arch/xtensa/Kconfig.debug index 83cc8d12fa0e..e84172a7763c 100644 --- a/arch/xtensa/Kconfig.debug +++ b/arch/xtensa/Kconfig.debug @@ -38,3 +38,11 @@ config PRINT_STACK_DEPTH help This option allows you to set the stack depth that the kernel prints in stack traces. + +config PRINT_USER_CODE_ON_UNHANDLED_EXCEPTION + bool "Dump user code around unhandled exception address" + help + Enable this option to display user code around PC of the unhandled + exception (starting at address aligned on 16 byte boundary). + This may simplify finding faulting code in the absence of other + debug facilities. diff --git a/arch/xtensa/boot/boot-redboot/Makefile b/arch/xtensa/boot/boot-redboot/Makefile index 1d1d46215b1c..c0eef3f6f32d 100644 --- a/arch/xtensa/boot/boot-redboot/Makefile +++ b/arch/xtensa/boot/boot-redboot/Makefile @@ -6,16 +6,12 @@ OBJCOPY_ARGS := -O $(if $(CONFIG_CPU_BIG_ENDIAN),elf32-xtensa-be,elf32-xtensa-le) -LD_ARGS = -T $(srctree)/$(obj)/boot.ld - boot-y := bootstrap.o targets += $(boot-y) OBJS := $(addprefix $(obj)/,$(boot-y)) LIBS := arch/xtensa/boot/lib/lib.a arch/xtensa/lib/lib.a -LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) - $(obj)/zImage.o: $(obj)/../vmlinux.bin.gz $(OBJS) $(Q)$(OBJCOPY) $(OBJCOPY_ARGS) -R .comment \ --add-section image=$< \ @@ -23,7 +19,10 @@ $(obj)/zImage.o: $(obj)/../vmlinux.bin.gz $(OBJS) $(OBJS) $@ $(obj)/zImage.elf: $(obj)/zImage.o $(LIBS) - $(Q)$(LD) $(LD_ARGS) -o $@ $^ -L/xtensa-elf/lib $(LIBGCC) + $(Q)$(LD) $(KBUILD_LDFLAGS) \ + -T $(srctree)/$(obj)/boot.ld \ + --build-id=none \ + -o $@ $^ $(obj)/../zImage.redboot: $(obj)/zImage.elf $(Q)$(OBJCOPY) -S -O binary $< $@ diff --git a/arch/xtensa/include/asm/asm-prototypes.h b/arch/xtensa/include/asm/asm-prototypes.h new file mode 100644 index 000000000000..b0da61812b85 --- /dev/null +++ b/arch/xtensa/include/asm/asm-prototypes.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_PROTOTYPES_H +#define __ASM_PROTOTYPES_H + +#include <asm/cacheflush.h> +#include <asm/checksum.h> +#include <asm/ftrace.h> +#include <asm/page.h> +#include <asm/string.h> +#include <asm/uaccess.h> + +#include <asm-generic/asm-prototypes.h> + +/* + * gcc internal math functions + */ +long long __ashrdi3(long long, int); +long long __ashldi3(long long, int); +long long __bswapdi2(long long); +int __bswapsi2(int); +long long __lshrdi3(long long, int); +int __divsi3(int, int); +int __modsi3(int, int); +int __mulsi3(int, int); +unsigned int __udivsi3(unsigned int, unsigned int); +unsigned int __umodsi3(unsigned int, unsigned int); +unsigned long long __umulsidi3(unsigned int, unsigned int); + +#endif /* __ASM_PROTOTYPES_H */ diff --git a/arch/xtensa/include/asm/asmmacro.h b/arch/xtensa/include/asm/asmmacro.h index e3474ca411ff..01bf7d9dbb19 100644 --- a/arch/xtensa/include/asm/asmmacro.h +++ b/arch/xtensa/include/asm/asmmacro.h @@ -11,6 +11,7 @@ #ifndef _XTENSA_ASMMACRO_H #define _XTENSA_ASMMACRO_H +#include <asm-generic/export.h> #include <asm/core.h> /* diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h index 52da614f953c..7308b7f777d7 100644 --- a/arch/xtensa/include/asm/atomic.h +++ b/arch/xtensa/include/asm/atomic.h @@ -245,6 +245,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t * v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + #undef ATOMIC_OPS #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) @@ -252,12 +257,13 @@ ATOMIC_OPS(and) ATOMIC_OPS(or) ATOMIC_OPS(xor) +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n))) -#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new)) - #endif /* _XTENSA_ATOMIC_H */ diff --git a/arch/xtensa/include/asm/bugs.h b/arch/xtensa/include/asm/bugs.h deleted file mode 100644 index 69b29d198249..000000000000 --- a/arch/xtensa/include/asm/bugs.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * include/asm-xtensa/bugs.h - * - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Xtensa processors don't have any bugs. :) - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file "COPYING" in the main directory of - * this archive for more details. - */ - -#ifndef _XTENSA_BUGS_H -#define _XTENSA_BUGS_H - -static void check_bugs(void) { } - -#endif /* _XTENSA_BUGS_H */ diff --git a/arch/xtensa/include/asm/core.h b/arch/xtensa/include/asm/core.h index f856d2bcb9f3..0e1bb6f019d6 100644 --- a/arch/xtensa/include/asm/core.h +++ b/arch/xtensa/include/asm/core.h @@ -26,6 +26,14 @@ #define XCHAL_SPANNING_WAY 0 #endif +#ifndef XCHAL_HAVE_TRAX +#define XCHAL_HAVE_TRAX 0 +#endif + +#ifndef XCHAL_NUM_PERF_COUNTERS +#define XCHAL_NUM_PERF_COUNTERS 0 +#endif + #if XCHAL_HAVE_WINDOWED #if defined(CONFIG_USER_ABI_DEFAULT) || defined(CONFIG_USER_ABI_CALL0_PROBE) /* Whether windowed ABI is supported in userspace. */ diff --git a/arch/xtensa/include/asm/ftrace.h b/arch/xtensa/include/asm/ftrace.h index 6c6d9a9f185f..0ea4f84cd558 100644 --- a/arch/xtensa/include/asm/ftrace.h +++ b/arch/xtensa/include/asm/ftrace.h @@ -13,17 +13,8 @@ #include <asm/processor.h> #ifndef __ASSEMBLY__ -#define ftrace_return_address0 ({ unsigned long a0, a1; \ - __asm__ __volatile__ ( \ - "mov %0, a0\n" \ - "mov %1, a1\n" \ - : "=r"(a0), "=r"(a1)); \ - MAKE_PC_FROM_RA(a0, a1); }) - -#ifdef CONFIG_FRAME_POINTER extern unsigned long return_address(unsigned level); #define ftrace_return_address(n) return_address(n) -#endif #endif /* __ASSEMBLY__ */ #ifdef CONFIG_FUNCTION_TRACER diff --git a/arch/xtensa/include/asm/platform.h b/arch/xtensa/include/asm/platform.h index 354ca942de40..94f13fabf7cd 100644 --- a/arch/xtensa/include/asm/platform.h +++ b/arch/xtensa/include/asm/platform.h @@ -28,31 +28,11 @@ extern void platform_init(bp_tag_t*); extern void platform_setup (char **); /* - * platform_restart is called to restart the system. - */ -extern void platform_restart (void); - -/* - * platform_halt is called to stop the system and halt. - */ -extern void platform_halt (void); - -/* - * platform_power_off is called to stop the system and power it off. - */ -extern void platform_power_off (void); - -/* * platform_idle is called from the idle function. */ extern void platform_idle (void); /* - * platform_heartbeat is called every HZ - */ -extern void platform_heartbeat (void); - -/* * platform_calibrate_ccount calibrates cpu clock freq (CONFIG_XTENSA_CALIBRATE) */ extern void platform_calibrate_ccount (void); diff --git a/arch/xtensa/include/asm/string.h b/arch/xtensa/include/asm/string.h index 89b51a0c752f..ffce43513fa2 100644 --- a/arch/xtensa/include/asm/string.h +++ b/arch/xtensa/include/asm/string.h @@ -118,9 +118,6 @@ extern void *__memcpy(void *__to, __const__ void *__from, size_t __n); extern void *memmove(void *__dest, __const__ void *__src, size_t __n); extern void *__memmove(void *__dest, __const__ void *__src, size_t __n); -/* Don't build bcopy at all ... */ -#define __HAVE_ARCH_BCOPY - #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) /* diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h index 6f74ccc0c7ea..212c3b9ff407 100644 --- a/arch/xtensa/include/asm/traps.h +++ b/arch/xtensa/include/asm/traps.h @@ -47,6 +47,7 @@ __init trap_set_handler(int cause, xtensa_exception_handler *handler); asmlinkage void fast_illegal_instruction_user(void); asmlinkage void fast_syscall_user(void); asmlinkage void fast_alloca(void); +asmlinkage void fast_load_store(void); asmlinkage void fast_unaligned(void); asmlinkage void fast_second_level_miss(void); asmlinkage void fast_store_prohibited(void); @@ -64,8 +65,14 @@ void do_unhandled(struct pt_regs *regs); static inline void __init early_trap_init(void) { static struct exc_table init_exc_table __initdata = { +#ifdef CONFIG_XTENSA_LOAD_STORE + .fast_kernel_handler[EXCCAUSE_LOAD_STORE_ERROR] = + fast_load_store, +#endif +#ifdef CONFIG_MMU .fast_kernel_handler[EXCCAUSE_DTLB_MISS] = fast_second_level_miss, +#endif }; xtensa_set_sr(&init_exc_table, excsave1); } diff --git a/arch/xtensa/kernel/align.S b/arch/xtensa/kernel/align.S index d062c732ef18..20d6b4961001 100644 --- a/arch/xtensa/kernel/align.S +++ b/arch/xtensa/kernel/align.S @@ -22,7 +22,17 @@ #include <asm/asmmacro.h> #include <asm/processor.h> -#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION +#if XCHAL_UNALIGNED_LOAD_EXCEPTION || defined CONFIG_XTENSA_LOAD_STORE +#define LOAD_EXCEPTION_HANDLER +#endif + +#if XCHAL_UNALIGNED_STORE_EXCEPTION || defined LOAD_EXCEPTION_HANDLER +#define ANY_EXCEPTION_HANDLER +#endif + +#if XCHAL_HAVE_WINDOWED +#define UNALIGNED_USER_EXCEPTION +#endif /* First-level exception handler for unaligned exceptions. * @@ -58,10 +68,6 @@ * BE shift left / mask 0 0 X X */ -#if XCHAL_HAVE_WINDOWED -#define UNALIGNED_USER_EXCEPTION -#endif - #if XCHAL_HAVE_BE #define HWORD_START 16 @@ -103,7 +109,7 @@ * * 23 0 * ----------------------------- - * res 0000 0010 + * L8UI xxxx xxxx 0000 ssss tttt 0010 * L16UI xxxx xxxx 0001 ssss tttt 0010 * L32I xxxx xxxx 0010 ssss tttt 0010 * XXX 0011 ssss tttt 0010 @@ -128,9 +134,11 @@ #define OP0_L32I_N 0x8 /* load immediate narrow */ #define OP0_S32I_N 0x9 /* store immediate narrow */ +#define OP0_LSAI 0x2 /* load/store */ #define OP1_SI_MASK 0x4 /* OP1 bit set for stores */ #define OP1_SI_BIT 2 /* OP1 bit number for stores */ +#define OP1_L8UI 0x0 #define OP1_L32I 0x2 #define OP1_L16UI 0x1 #define OP1_L16SI 0x9 @@ -155,60 +163,74 @@ */ .literal_position -ENTRY(fast_unaligned) +#ifdef CONFIG_XTENSA_LOAD_STORE +ENTRY(fast_load_store) - /* Note: We don't expect the address to be aligned on a word - * boundary. After all, the processor generated that exception - * and it would be a hardware fault. - */ + call0 .Lsave_and_load_instruction - /* Save some working register */ + /* Analyze the instruction (load or store?). */ - s32i a4, a2, PT_AREG4 - s32i a5, a2, PT_AREG5 - s32i a6, a2, PT_AREG6 - s32i a7, a2, PT_AREG7 - s32i a8, a2, PT_AREG8 + extui a0, a4, INSN_OP0, 4 # get insn.op0 nibble - rsr a0, depc - s32i a0, a2, PT_AREG2 - s32i a3, a2, PT_AREG3 +#if XCHAL_HAVE_DENSITY + _beqi a0, OP0_L32I_N, 1f # L32I.N, jump +#endif + bnei a0, OP0_LSAI, .Linvalid_instruction + /* 'store indicator bit' set, jump */ + bbsi.l a4, OP1_SI_BIT + INSN_OP1, .Linvalid_instruction - rsr a3, excsave1 - movi a4, fast_unaligned_fixup - s32i a4, a3, EXC_TABLE_FIXUP +1: + movi a3, ~3 + and a3, a3, a8 # align memory address - /* Keep value of SAR in a0 */ + __ssa8 a8 - rsr a0, sar - rsr a8, excvaddr # load unaligned memory address +#ifdef CONFIG_MMU + /* l32e can't be used here even when it's available. */ + /* TODO access_ok(a3) could be used here */ + j .Linvalid_instruction +#endif + l32i a5, a3, 0 + l32i a6, a3, 4 + __src_b a3, a5, a6 # a3 has the data word - /* Now, identify one of the following load/store instructions. - * - * The only possible danger of a double exception on the - * following l32i instructions is kernel code in vmalloc - * memory. The processor was just executing at the EPC_1 - * address, and indeed, already fetched the instruction. That - * guarantees a TLB mapping, which hasn't been replaced by - * this unaligned exception handler that uses only static TLB - * mappings. However, high-level interrupt handlers might - * modify TLB entries, so for the generic case, we register a - * TABLE_FIXUP handler here, too. - */ +#if XCHAL_HAVE_DENSITY + addi a7, a7, 2 # increment PC (assume 16-bit insn) + _beqi a0, OP0_L32I_N, .Lload_w# l32i.n: jump + addi a7, a7, 1 +#else + addi a7, a7, 3 +#endif - /* a3...a6 saved on stack, a2 = SP */ + extui a5, a4, INSN_OP1, 4 + _beqi a5, OP1_L32I, .Lload_w + bnei a5, OP1_L8UI, .Lload16 + extui a3, a3, 0, 8 + j .Lload_w - /* Extract the instruction that caused the unaligned access. */ +ENDPROC(fast_load_store) +#endif - rsr a7, epc1 # load exception address - movi a3, ~3 - and a3, a3, a7 # mask lower bits +/* + * Entry condition: + * + * a0: trashed, original value saved on stack (PT_AREG0) + * a1: a1 + * a2: new stack pointer, original in DEPC + * a3: a3 + * depc: a2, original value saved on stack (PT_DEPC) + * excsave_1: dispatch table + * + * PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC + * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception + */ - l32i a4, a3, 0 # load 2 words - l32i a5, a3, 4 +#ifdef ANY_EXCEPTION_HANDLER +ENTRY(fast_unaligned) - __ssa8 a7 - __src_b a4, a4, a5 # a4 has the instruction +#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION + + call0 .Lsave_and_load_instruction /* Analyze the instruction (load or store?). */ @@ -222,12 +244,17 @@ ENTRY(fast_unaligned) /* 'store indicator bit' not set, jump */ _bbci.l a4, OP1_SI_BIT + INSN_OP1, .Lload +#endif +#if XCHAL_UNALIGNED_STORE_EXCEPTION + /* Store: Jump to table entry to get the value in the source register.*/ .Lstore:movi a5, .Lstore_table # table extui a6, a4, INSN_T, 4 # get source register addx8 a5, a6, a5 jx a5 # jump into table +#endif +#if XCHAL_UNALIGNED_LOAD_EXCEPTION /* Load: Load memory address. */ @@ -249,7 +276,7 @@ ENTRY(fast_unaligned) addi a7, a7, 2 # increment PC (assume 16-bit insn) extui a5, a4, INSN_OP0, 4 - _beqi a5, OP0_L32I_N, 1f # l32i.n: jump + _beqi a5, OP0_L32I_N, .Lload_w# l32i.n: jump addi a7, a7, 1 #else @@ -257,21 +284,26 @@ ENTRY(fast_unaligned) #endif extui a5, a4, INSN_OP1, 4 - _beqi a5, OP1_L32I, 1f # l32i: jump - + _beqi a5, OP1_L32I, .Lload_w # l32i: jump +#endif +#ifdef LOAD_EXCEPTION_HANDLER +.Lload16: extui a3, a3, 0, 16 # extract lower 16 bits - _beqi a5, OP1_L16UI, 1f + _beqi a5, OP1_L16UI, .Lload_w addi a5, a5, -OP1_L16SI - _bnez a5, .Linvalid_instruction_load + _bnez a5, .Linvalid_instruction /* sign extend value */ - +#if XCHAL_HAVE_SEXT + sext a3, a3, 15 +#else slli a3, a3, 16 srai a3, a3, 16 +#endif /* Set target register. */ -1: +.Lload_w: extui a4, a4, INSN_T, 4 # extract target register movi a5, .Lload_table addx8 a4, a4, a5 @@ -295,30 +327,32 @@ ENTRY(fast_unaligned) mov a13, a3 ; _j .Lexit; .align 8 mov a14, a3 ; _j .Lexit; .align 8 mov a15, a3 ; _j .Lexit; .align 8 - +#endif +#if XCHAL_UNALIGNED_STORE_EXCEPTION .Lstore_table: - l32i a3, a2, PT_AREG0; _j 1f; .align 8 - mov a3, a1; _j 1f; .align 8 # fishy?? - l32i a3, a2, PT_AREG2; _j 1f; .align 8 - l32i a3, a2, PT_AREG3; _j 1f; .align 8 - l32i a3, a2, PT_AREG4; _j 1f; .align 8 - l32i a3, a2, PT_AREG5; _j 1f; .align 8 - l32i a3, a2, PT_AREG6; _j 1f; .align 8 - l32i a3, a2, PT_AREG7; _j 1f; .align 8 - l32i a3, a2, PT_AREG8; _j 1f; .align 8 - mov a3, a9 ; _j 1f; .align 8 - mov a3, a10 ; _j 1f; .align 8 - mov a3, a11 ; _j 1f; .align 8 - mov a3, a12 ; _j 1f; .align 8 - mov a3, a13 ; _j 1f; .align 8 - mov a3, a14 ; _j 1f; .align 8 - mov a3, a15 ; _j 1f; .align 8 + l32i a3, a2, PT_AREG0; _j .Lstore_w; .align 8 + mov a3, a1; _j .Lstore_w; .align 8 # fishy?? + l32i a3, a2, PT_AREG2; _j .Lstore_w; .align 8 + l32i a3, a2, PT_AREG3; _j .Lstore_w; .align 8 + l32i a3, a2, PT_AREG4; _j .Lstore_w; .align 8 + l32i a3, a2, PT_AREG5; _j .Lstore_w; .align 8 + l32i a3, a2, PT_AREG6; _j .Lstore_w; .align 8 + l32i a3, a2, PT_AREG7; _j .Lstore_w; .align 8 + l32i a3, a2, PT_AREG8; _j .Lstore_w; .align 8 + mov a3, a9 ; _j .Lstore_w; .align 8 + mov a3, a10 ; _j .Lstore_w; .align 8 + mov a3, a11 ; _j .Lstore_w; .align 8 + mov a3, a12 ; _j .Lstore_w; .align 8 + mov a3, a13 ; _j .Lstore_w; .align 8 + mov a3, a14 ; _j .Lstore_w; .align 8 + mov a3, a15 ; _j .Lstore_w; .align 8 +#endif +#ifdef ANY_EXCEPTION_HANDLER /* We cannot handle this exception. */ .extern _kernel_exception -.Linvalid_instruction_load: -.Linvalid_instruction_store: +.Linvalid_instruction: movi a4, 0 rsr a3, excsave1 @@ -326,6 +360,7 @@ ENTRY(fast_unaligned) /* Restore a4...a8 and SAR, set SP, and jump to default exception. */ + l32i a0, a2, PT_SAR l32i a8, a2, PT_AREG8 l32i a7, a2, PT_AREG7 l32i a6, a2, PT_AREG6 @@ -342,9 +377,11 @@ ENTRY(fast_unaligned) 2: movi a0, _user_exception jx a0 +#endif +#if XCHAL_UNALIGNED_STORE_EXCEPTION -1: # a7: instruction pointer, a4: instruction, a3: value - + # a7: instruction pointer, a4: instruction, a3: value +.Lstore_w: movi a6, 0 # mask: ffffffff:00000000 #if XCHAL_HAVE_DENSITY @@ -361,7 +398,7 @@ ENTRY(fast_unaligned) extui a5, a4, INSN_OP1, 4 # extract OP1 _beqi a5, OP1_S32I, 1f # jump if 32 bit store - _bnei a5, OP1_S16I, .Linvalid_instruction_store + _bnei a5, OP1_S16I, .Linvalid_instruction movi a5, -1 __extl a3, a3 # get 16-bit value @@ -406,7 +443,8 @@ ENTRY(fast_unaligned) #else s32i a6, a4, 4 #endif - +#endif +#ifdef ANY_EXCEPTION_HANDLER .Lexit: #if XCHAL_HAVE_LOOPS rsr a4, lend # check if we reached LEND @@ -434,6 +472,7 @@ ENTRY(fast_unaligned) /* Restore working register */ + l32i a0, a2, PT_SAR l32i a8, a2, PT_AREG8 l32i a7, a2, PT_AREG7 l32i a6, a2, PT_AREG6 @@ -448,6 +487,59 @@ ENTRY(fast_unaligned) l32i a2, a2, PT_AREG2 rfe + .align 4 +.Lsave_and_load_instruction: + + /* Save some working register */ + + s32i a3, a2, PT_AREG3 + s32i a4, a2, PT_AREG4 + s32i a5, a2, PT_AREG5 + s32i a6, a2, PT_AREG6 + s32i a7, a2, PT_AREG7 + s32i a8, a2, PT_AREG8 + + rsr a4, depc + s32i a4, a2, PT_AREG2 + + rsr a5, sar + s32i a5, a2, PT_SAR + + rsr a3, excsave1 + movi a4, fast_unaligned_fixup + s32i a4, a3, EXC_TABLE_FIXUP + + rsr a8, excvaddr # load unaligned memory address + + /* Now, identify one of the following load/store instructions. + * + * The only possible danger of a double exception on the + * following l32i instructions is kernel code in vmalloc + * memory. The processor was just executing at the EPC_1 + * address, and indeed, already fetched the instruction. That + * guarantees a TLB mapping, which hasn't been replaced by + * this unaligned exception handler that uses only static TLB + * mappings. However, high-level interrupt handlers might + * modify TLB entries, so for the generic case, we register a + * TABLE_FIXUP handler here, too. + */ + + /* a3...a6 saved on stack, a2 = SP */ + + /* Extract the instruction that caused the unaligned access. */ + + rsr a7, epc1 # load exception address + movi a3, ~3 + and a3, a3, a7 # mask lower bits + + l32i a4, a3, 0 # load 2 words + l32i a5, a3, 4 + + __ssa8 a7 + __src_b a4, a4, a5 # a4 has the instruction + + ret +#endif ENDPROC(fast_unaligned) ENTRY(fast_unaligned_fixup) @@ -459,10 +551,11 @@ ENTRY(fast_unaligned_fixup) l32i a7, a2, PT_AREG7 l32i a6, a2, PT_AREG6 l32i a5, a2, PT_AREG5 - l32i a4, a2, PT_AREG4 + l32i a4, a2, PT_SAR l32i a0, a2, PT_AREG2 - xsr a0, depc # restore depc and a0 - wsr a0, sar + wsr a4, sar + wsr a0, depc # restore depc and a0 + l32i a4, a2, PT_AREG4 rsr a0, exccause s32i a0, a2, PT_DEPC # mark as a regular exception @@ -483,5 +576,4 @@ ENTRY(fast_unaligned_fixup) jx a0 ENDPROC(fast_unaligned_fixup) - -#endif /* XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION */ +#endif diff --git a/arch/xtensa/kernel/mcount.S b/arch/xtensa/kernel/mcount.S index 51daaf4e0b82..309b3298258f 100644 --- a/arch/xtensa/kernel/mcount.S +++ b/arch/xtensa/kernel/mcount.S @@ -78,6 +78,7 @@ ENTRY(_mcount) #error Unsupported Xtensa ABI #endif ENDPROC(_mcount) +EXPORT_SYMBOL(_mcount) ENTRY(ftrace_stub) abi_entry_default diff --git a/arch/xtensa/kernel/platform.c b/arch/xtensa/kernel/platform.c index ac1e0e566995..926b8bf0f14c 100644 --- a/arch/xtensa/kernel/platform.c +++ b/arch/xtensa/kernel/platform.c @@ -17,27 +17,28 @@ #include <asm/platform.h> #include <asm/timex.h> -#define _F(r,f,a,b) \ - r __platform_##f a b; \ - r platform_##f a __attribute__((weak, alias("__platform_"#f))) - /* * Default functions that are used if no platform specific function is defined. - * (Please, refer to include/asm-xtensa/platform.h for more information) + * (Please, refer to arch/xtensa/include/asm/platform.h for more information) */ -_F(void, init, (bp_tag_t *first), { }); -_F(void, setup, (char** cmd), { }); -_F(void, restart, (void), { while(1); }); -_F(void, halt, (void), { while(1); }); -_F(void, power_off, (void), { while(1); }); -_F(void, idle, (void), { __asm__ __volatile__ ("waiti 0" ::: "memory"); }); -_F(void, heartbeat, (void), { }); +void __weak __init platform_init(bp_tag_t *first) +{ +} + +void __weak __init platform_setup(char **cmd) +{ +} + +void __weak platform_idle(void) +{ + __asm__ __volatile__ ("waiti 0" ::: "memory"); +} #ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT -_F(void, calibrate_ccount, (void), +void __weak platform_calibrate_ccount(void) { pr_err("ERROR: Cannot calibrate cpu frequency! Assuming 10MHz.\n"); ccount_freq = 10 * 1000000UL; -}); +} #endif diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 9191738f9941..aba3ff4e60d8 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -22,6 +22,7 @@ #include <linux/screen_info.h> #include <linux/kernel.h> #include <linux/percpu.h> +#include <linux/reboot.h> #include <linux/cpu.h> #include <linux/of.h> #include <linux/of_fdt.h> @@ -46,6 +47,7 @@ #include <asm/smp.h> #include <asm/sysmem.h> #include <asm/timex.h> +#include <asm/traps.h> #if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE) struct screen_info screen_info = { @@ -241,6 +243,12 @@ void __init early_init_devtree(void *params) void __init init_arch(bp_tag_t *bp_start) { + /* Initialize basic exception handling if configuration may need it */ + + if (IS_ENABLED(CONFIG_KASAN) || + IS_ENABLED(CONFIG_XTENSA_LOAD_STORE)) + early_trap_init(); + /* Initialize MMU. */ init_mmu(); @@ -522,19 +530,30 @@ void cpu_reset(void) void machine_restart(char * cmd) { - platform_restart(); + local_irq_disable(); + smp_send_stop(); + do_kernel_restart(cmd); + pr_err("Reboot failed -- System halted\n"); + while (1) + cpu_relax(); } void machine_halt(void) { - platform_halt(); - while (1); + local_irq_disable(); + smp_send_stop(); + do_kernel_power_off(); + while (1) + cpu_relax(); } void machine_power_off(void) { - platform_power_off(); - while (1); + local_irq_disable(); + smp_send_stop(); + do_kernel_power_off(); + while (1) + cpu_relax(); } #ifdef CONFIG_PROC_FS @@ -574,6 +593,12 @@ c_show(struct seq_file *f, void *slot) # if XCHAL_HAVE_OCD "ocd " # endif +#if XCHAL_HAVE_TRAX + "trax " +#endif +#if XCHAL_NUM_PERF_COUNTERS + "perf " +#endif #endif #if XCHAL_HAVE_DENSITY "density " @@ -623,11 +648,13 @@ c_show(struct seq_file *f, void *slot) seq_printf(f,"physical aregs\t: %d\n" "misc regs\t: %d\n" "ibreak\t\t: %d\n" - "dbreak\t\t: %d\n", + "dbreak\t\t: %d\n" + "perf counters\t: %d\n", XCHAL_NUM_AREGS, XCHAL_NUM_MISC_REGS, XCHAL_NUM_IBREAK, - XCHAL_NUM_DBREAK); + XCHAL_NUM_DBREAK, + XCHAL_NUM_PERF_COUNTERS); /* Interrupt. */ diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index 876d5df157ed..5c01d7e70d90 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -343,7 +343,19 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, struct rt_sigframe *frame; int err = 0, sig = ksig->sig; unsigned long sp, ra, tp, ps; + unsigned long handler = (unsigned long)ksig->ka.sa.sa_handler; + unsigned long handler_fdpic_GOT = 0; unsigned int base; + bool fdpic = IS_ENABLED(CONFIG_BINFMT_ELF_FDPIC) && + (current->personality & FDPIC_FUNCPTRS); + + if (fdpic) { + unsigned long __user *fdpic_func_desc = + (unsigned long __user *)handler; + if (__get_user(handler, &fdpic_func_desc[0]) || + __get_user(handler_fdpic_GOT, &fdpic_func_desc[1])) + return -EFAULT; + } sp = regs->areg[1]; @@ -373,20 +385,26 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (ksig->ka.sa.sa_flags & SA_RESTORER) { - ra = (unsigned long)ksig->ka.sa.sa_restorer; + if (fdpic) { + unsigned long __user *fdpic_func_desc = + (unsigned long __user *)ksig->ka.sa.sa_restorer; + + err |= __get_user(ra, fdpic_func_desc); + } else { + ra = (unsigned long)ksig->ka.sa.sa_restorer; + } } else { /* Create sys_rt_sigreturn syscall in stack frame */ err |= gen_return_code(frame->retcode); - - if (err) { - return -EFAULT; - } ra = (unsigned long) frame->retcode; } - /* + if (err) + return -EFAULT; + + /* * Create signal handler execution context. * Return context not modified until this point. */ @@ -394,8 +412,7 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, /* Set up registers for signal handler; preserve the threadptr */ tp = regs->threadptr; ps = regs->ps; - start_thread(regs, (unsigned long) ksig->ka.sa.sa_handler, - (unsigned long) frame); + start_thread(regs, handler, (unsigned long)frame); /* Set up a stack frame for a call4 if userspace uses windowed ABI */ if (ps & PS_WOE_MASK) { @@ -413,6 +430,8 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, regs->areg[base + 4] = (unsigned long) &frame->uc; regs->threadptr = tp; regs->ps = ps; + if (fdpic) + regs->areg[base + 11] = handler_fdpic_GOT; pr_debug("SIG rt deliver (%s:%d): signal=%d sp=%p pc=%08lx\n", current->comm, current->pid, sig, frame, regs->pc); diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c index 7f7755cd28f0..f643ea5e36da 100644 --- a/arch/xtensa/kernel/stacktrace.c +++ b/arch/xtensa/kernel/stacktrace.c @@ -237,8 +237,6 @@ EXPORT_SYMBOL_GPL(save_stack_trace); #endif -#ifdef CONFIG_FRAME_POINTER - struct return_addr_data { unsigned long addr; unsigned skip; @@ -271,5 +269,3 @@ unsigned long return_address(unsigned level) return r.addr; } EXPORT_SYMBOL(return_address); - -#endif diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 16b8a6273772..1c3dfea843ec 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -121,10 +121,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) set_linux_timer(get_linux_timer()); evt->event_handler(evt); - - /* Allow platform to do something useful (Wdog). */ - platform_heartbeat(); - return IRQ_HANDLED; } diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index f0a7d1c2641e..17eb180eff7c 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -54,9 +54,10 @@ static void do_interrupt(struct pt_regs *regs); #if XTENSA_FAKE_NMI static void do_nmi(struct pt_regs *regs); #endif -#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION -static void do_unaligned_user(struct pt_regs *regs); +#ifdef CONFIG_XTENSA_LOAD_STORE +static void do_load_store(struct pt_regs *regs); #endif +static void do_unaligned_user(struct pt_regs *regs); static void do_multihit(struct pt_regs *regs); #if XTENSA_HAVE_COPROCESSORS static void do_coprocessor(struct pt_regs *regs); @@ -91,7 +92,10 @@ static dispatch_init_table_t __initdata dispatch_init_table[] = { { EXCCAUSE_SYSTEM_CALL, USER, fast_syscall_user }, { EXCCAUSE_SYSTEM_CALL, 0, system_call }, /* EXCCAUSE_INSTRUCTION_FETCH unhandled */ -/* EXCCAUSE_LOAD_STORE_ERROR unhandled*/ +#ifdef CONFIG_XTENSA_LOAD_STORE +{ EXCCAUSE_LOAD_STORE_ERROR, USER|KRNL, fast_load_store }, +{ EXCCAUSE_LOAD_STORE_ERROR, 0, do_load_store }, +#endif { EXCCAUSE_LEVEL1_INTERRUPT, 0, do_interrupt }, #ifdef SUPPORT_WINDOWED { EXCCAUSE_ALLOCA, USER|KRNL, fast_alloca }, @@ -102,9 +106,9 @@ static dispatch_init_table_t __initdata dispatch_init_table[] = { #ifdef CONFIG_XTENSA_UNALIGNED_USER { EXCCAUSE_UNALIGNED, USER, fast_unaligned }, #endif -{ EXCCAUSE_UNALIGNED, 0, do_unaligned_user }, { EXCCAUSE_UNALIGNED, KRNL, fast_unaligned }, #endif +{ EXCCAUSE_UNALIGNED, 0, do_unaligned_user }, #ifdef CONFIG_MMU { EXCCAUSE_ITLB_MISS, 0, do_page_fault }, { EXCCAUSE_ITLB_MISS, USER|KRNL, fast_second_level_miss}, @@ -171,6 +175,23 @@ __die_if_kernel(const char *str, struct pt_regs *regs, long err) die(str, regs, err); } +#ifdef CONFIG_PRINT_USER_CODE_ON_UNHANDLED_EXCEPTION +static inline void dump_user_code(struct pt_regs *regs) +{ + char buf[32]; + + if (copy_from_user(buf, (void __user *)(regs->pc & -16), sizeof(buf)) == 0) { + print_hex_dump(KERN_INFO, " ", DUMP_PREFIX_NONE, + 32, 1, buf, sizeof(buf), false); + + } +} +#else +static inline void dump_user_code(struct pt_regs *regs) +{ +} +#endif + /* * Unhandled Exceptions. Kill user task or panic if in kernel space. */ @@ -186,6 +207,7 @@ void do_unhandled(struct pt_regs *regs) "\tEXCCAUSE is %ld\n", current->comm, task_pid_nr(current), regs->pc, regs->exccause); + dump_user_code(regs); force_sig(SIGILL); } @@ -349,6 +371,19 @@ static void do_div0(struct pt_regs *regs) force_sig_fault(SIGFPE, FPE_INTDIV, (void __user *)regs->pc); } +#ifdef CONFIG_XTENSA_LOAD_STORE +static void do_load_store(struct pt_regs *regs) +{ + __die_if_kernel("Unhandled load/store exception in kernel", + regs, SIGKILL); + + pr_info_ratelimited("Load/store error to %08lx in '%s' (pid = %d, pc = %#010lx)\n", + regs->excvaddr, current->comm, + task_pid_nr(current), regs->pc); + force_sig_fault(SIGBUS, BUS_ADRERR, (void *)regs->excvaddr); +} +#endif + /* * Handle unaligned memory accesses from user space. Kill task. * @@ -356,7 +391,6 @@ static void do_div0(struct pt_regs *regs) * accesses causes from user space. */ -#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION static void do_unaligned_user(struct pt_regs *regs) { __die_if_kernel("Unhandled unaligned exception in kernel", @@ -368,7 +402,6 @@ static void do_unaligned_user(struct pt_regs *regs) task_pid_nr(current), regs->pc); force_sig_fault(SIGBUS, BUS_ADRALN, (void *) regs->excvaddr); } -#endif #if XTENSA_HAVE_COPROCESSORS static void do_coprocessor(struct pt_regs *regs) @@ -534,31 +567,58 @@ static void show_trace(struct task_struct *task, unsigned long *sp, } #define STACK_DUMP_ENTRY_SIZE 4 -#define STACK_DUMP_LINE_SIZE 32 +#define STACK_DUMP_LINE_SIZE 16 static size_t kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH; -void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) +struct stack_fragment { - size_t len, off = 0; - - if (!sp) - sp = stack_pointer(task); + size_t len; + size_t off; + u8 *sp; + const char *loglvl; +}; - len = min((-(size_t)sp) & (THREAD_SIZE - STACK_DUMP_ENTRY_SIZE), - kstack_depth_to_print * STACK_DUMP_ENTRY_SIZE); +static int show_stack_fragment_cb(struct stackframe *frame, void *data) +{ + struct stack_fragment *sf = data; - printk("%sStack:\n", loglvl); - while (off < len) { + while (sf->off < sf->len) { u8 line[STACK_DUMP_LINE_SIZE]; - size_t line_len = len - off > STACK_DUMP_LINE_SIZE ? - STACK_DUMP_LINE_SIZE : len - off; + size_t line_len = sf->len - sf->off > STACK_DUMP_LINE_SIZE ? + STACK_DUMP_LINE_SIZE : sf->len - sf->off; + bool arrow = sf->off == 0; - __memcpy(line, (u8 *)sp + off, line_len); - print_hex_dump(loglvl, " ", DUMP_PREFIX_NONE, + if (frame && frame->sp == (unsigned long)(sf->sp + sf->off)) + arrow = true; + + __memcpy(line, sf->sp + sf->off, line_len); + print_hex_dump(sf->loglvl, arrow ? "> " : " ", DUMP_PREFIX_NONE, STACK_DUMP_LINE_SIZE, STACK_DUMP_ENTRY_SIZE, line, line_len, false); - off += STACK_DUMP_LINE_SIZE; + sf->off += STACK_DUMP_LINE_SIZE; + if (arrow) + return 0; } + return 1; +} + +void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) +{ + struct stack_fragment sf; + + if (!sp) + sp = stack_pointer(task); + + sf.len = min((-(size_t)sp) & (THREAD_SIZE - STACK_DUMP_ENTRY_SIZE), + kstack_depth_to_print * STACK_DUMP_ENTRY_SIZE); + sf.off = 0; + sf.sp = (u8 *)sp; + sf.loglvl = loglvl; + + printk("%sStack:\n", loglvl); + walk_stackframe(sp, show_stack_fragment_cb, &sf); + while (sf.off < sf.len) + show_stack_fragment_cb(NULL, &sf); show_trace(task, sp, loglvl); } diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index 2a31b1ab0c9f..62d81e76e18e 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -13,67 +13,10 @@ */ #include <linux/module.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <asm/irq.h> -#include <linux/in6.h> - -#include <linux/uaccess.h> -#include <asm/cacheflush.h> -#include <asm/checksum.h> -#include <asm/dma.h> -#include <asm/io.h> -#include <asm/page.h> -#include <asm/ftrace.h> -#ifdef CONFIG_BLK_DEV_FD -#include <asm/floppy.h> -#endif -#ifdef CONFIG_NET -#include <net/checksum.h> -#endif /* CONFIG_NET */ - - -/* - * String functions - */ -EXPORT_SYMBOL(memset); -EXPORT_SYMBOL(memcpy); -EXPORT_SYMBOL(memmove); -EXPORT_SYMBOL(__memset); -EXPORT_SYMBOL(__memcpy); -EXPORT_SYMBOL(__memmove); -#ifdef CONFIG_ARCH_HAS_STRNCPY_FROM_USER -EXPORT_SYMBOL(__strncpy_user); -#endif -EXPORT_SYMBOL(clear_page); -EXPORT_SYMBOL(copy_page); +#include <asm/pgtable.h> EXPORT_SYMBOL(empty_zero_page); -/* - * gcc internal math functions - */ -extern long long __ashrdi3(long long, int); -extern long long __ashldi3(long long, int); -extern long long __lshrdi3(long long, int); -extern int __divsi3(int, int); -extern int __modsi3(int, int); -extern int __mulsi3(int, int); -extern unsigned int __udivsi3(unsigned int, unsigned int); -extern unsigned int __umodsi3(unsigned int, unsigned int); -extern unsigned long long __umulsidi3(unsigned int, unsigned int); - -EXPORT_SYMBOL(__ashldi3); -EXPORT_SYMBOL(__ashrdi3); -EXPORT_SYMBOL(__lshrdi3); -EXPORT_SYMBOL(__divsi3); -EXPORT_SYMBOL(__modsi3); -EXPORT_SYMBOL(__mulsi3); -EXPORT_SYMBOL(__udivsi3); -EXPORT_SYMBOL(__umodsi3); -EXPORT_SYMBOL(__umulsidi3); - unsigned int __sync_fetch_and_and_4(volatile void *p, unsigned int v) { BUG(); @@ -85,35 +28,3 @@ unsigned int __sync_fetch_and_or_4(volatile void *p, unsigned int v) BUG(); } EXPORT_SYMBOL(__sync_fetch_and_or_4); - -/* - * Networking support - */ -EXPORT_SYMBOL(csum_partial); -EXPORT_SYMBOL(csum_partial_copy_generic); - -/* - * Architecture-specific symbols - */ -EXPORT_SYMBOL(__xtensa_copy_user); -EXPORT_SYMBOL(__invalidate_icache_range); - -/* - * Kernel hacking ... - */ - -#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE) -// FIXME EXPORT_SYMBOL(screen_info); -#endif - -extern long common_exception_return; -EXPORT_SYMBOL(common_exception_return); - -#ifdef CONFIG_FUNCTION_TRACER -EXPORT_SYMBOL(_mcount); -#endif - -EXPORT_SYMBOL(__invalidate_dcache_range); -#if XCHAL_DCACHE_IS_WRITEBACK -EXPORT_SYMBOL(__flush_dcache_range); -#endif diff --git a/arch/xtensa/lib/Makefile b/arch/xtensa/lib/Makefile index 7ecef0519a27..6e5b2232668c 100644 --- a/arch/xtensa/lib/Makefile +++ b/arch/xtensa/lib/Makefile @@ -4,9 +4,10 @@ # lib-y += memcopy.o memset.o checksum.o \ - ashldi3.o ashrdi3.o lshrdi3.o \ + ashldi3.o ashrdi3.o bswapdi2.o bswapsi2.o lshrdi3.o \ divsi3.o udivsi3.o modsi3.o umodsi3.o mulsi3.o umulsidi3.o \ - usercopy.o strncpy_user.o strnlen_user.o + usercopy.o strnlen_user.o +lib-$(CONFIG_ARCH_HAS_STRNCPY_FROM_USER) += strncpy_user.o lib-$(CONFIG_PCI) += pci-auto.o lib-$(CONFIG_KCSAN) += kcsan-stubs.o KCSAN_SANITIZE_kcsan-stubs.o := n diff --git a/arch/xtensa/lib/ashldi3.S b/arch/xtensa/lib/ashldi3.S index 67fb0da9e432..cd6b731215d3 100644 --- a/arch/xtensa/lib/ashldi3.S +++ b/arch/xtensa/lib/ashldi3.S @@ -26,3 +26,4 @@ ENTRY(__ashldi3) abi_ret_default ENDPROC(__ashldi3) +EXPORT_SYMBOL(__ashldi3) diff --git a/arch/xtensa/lib/ashrdi3.S b/arch/xtensa/lib/ashrdi3.S index cbf052c512cc..07bc6e758020 100644 --- a/arch/xtensa/lib/ashrdi3.S +++ b/arch/xtensa/lib/ashrdi3.S @@ -26,3 +26,4 @@ ENTRY(__ashrdi3) abi_ret_default ENDPROC(__ashrdi3) +EXPORT_SYMBOL(__ashrdi3) diff --git a/arch/xtensa/lib/bswapdi2.S b/arch/xtensa/lib/bswapdi2.S new file mode 100644 index 000000000000..5d94a9352887 --- /dev/null +++ b/arch/xtensa/lib/bswapdi2.S @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ +#include <linux/linkage.h> +#include <asm/asmmacro.h> +#include <asm/core.h> + +ENTRY(__bswapdi2) + + abi_entry_default + ssai 8 + srli a4, a2, 16 + src a4, a4, a2 + src a4, a4, a4 + src a4, a2, a4 + srli a2, a3, 16 + src a2, a2, a3 + src a2, a2, a2 + src a2, a3, a2 + mov a3, a4 + abi_ret_default + +ENDPROC(__bswapdi2) +EXPORT_SYMBOL(__bswapdi2) diff --git a/arch/xtensa/lib/bswapsi2.S b/arch/xtensa/lib/bswapsi2.S new file mode 100644 index 000000000000..fbfb8613d410 --- /dev/null +++ b/arch/xtensa/lib/bswapsi2.S @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */ +#include <linux/linkage.h> +#include <asm/asmmacro.h> +#include <asm/core.h> + +ENTRY(__bswapsi2) + + abi_entry_default + ssai 8 + srli a3, a2, 16 + src a3, a3, a2 + src a3, a3, a3 + src a2, a2, a3 + abi_ret_default + +ENDPROC(__bswapsi2) +EXPORT_SYMBOL(__bswapsi2) diff --git a/arch/xtensa/lib/checksum.S b/arch/xtensa/lib/checksum.S index cf1bed1a5bd6..ffee6f94c8f8 100644 --- a/arch/xtensa/lib/checksum.S +++ b/arch/xtensa/lib/checksum.S @@ -169,6 +169,7 @@ ENTRY(csum_partial) j 5b /* branch to handle the remaining byte */ ENDPROC(csum_partial) +EXPORT_SYMBOL(csum_partial) /* * Copy from ds while checksumming, otherwise like csum_partial @@ -346,6 +347,7 @@ EX(10f) s8i a8, a3, 1 j 4b /* process the possible trailing odd byte */ ENDPROC(csum_partial_copy_generic) +EXPORT_SYMBOL(csum_partial_copy_generic) # Exception handler: diff --git a/arch/xtensa/lib/divsi3.S b/arch/xtensa/lib/divsi3.S index b044b4744a8b..edb3c4ad971b 100644 --- a/arch/xtensa/lib/divsi3.S +++ b/arch/xtensa/lib/divsi3.S @@ -72,3 +72,4 @@ ENTRY(__divsi3) abi_ret_default ENDPROC(__divsi3) +EXPORT_SYMBOL(__divsi3) diff --git a/arch/xtensa/lib/lshrdi3.S b/arch/xtensa/lib/lshrdi3.S index 129ef8d1725b..e432e1a40702 100644 --- a/arch/xtensa/lib/lshrdi3.S +++ b/arch/xtensa/lib/lshrdi3.S @@ -26,3 +26,4 @@ ENTRY(__lshrdi3) abi_ret_default ENDPROC(__lshrdi3) +EXPORT_SYMBOL(__lshrdi3) diff --git a/arch/xtensa/lib/memcopy.S b/arch/xtensa/lib/memcopy.S index b20d206bcb71..f60760396cee 100644 --- a/arch/xtensa/lib/memcopy.S +++ b/arch/xtensa/lib/memcopy.S @@ -273,21 +273,8 @@ WEAK(memcpy) abi_ret_default ENDPROC(__memcpy) - -/* - * void bcopy(const void *src, void *dest, size_t n); - */ - -ENTRY(bcopy) - - abi_entry_default - # a2=src, a3=dst, a4=len - mov a5, a3 - mov a3, a2 - mov a2, a5 - j .Lmovecommon # go to common code for memmove+bcopy - -ENDPROC(bcopy) +EXPORT_SYMBOL(__memcpy) +EXPORT_SYMBOL(memcpy) /* * void *memmove(void *dst, const void *src, size_t len); @@ -551,3 +538,5 @@ WEAK(memmove) abi_ret_default ENDPROC(__memmove) +EXPORT_SYMBOL(__memmove) +EXPORT_SYMBOL(memmove) diff --git a/arch/xtensa/lib/memset.S b/arch/xtensa/lib/memset.S index 59b1524fd601..262c3f39f945 100644 --- a/arch/xtensa/lib/memset.S +++ b/arch/xtensa/lib/memset.S @@ -142,6 +142,8 @@ EX(10f) s8i a3, a5, 0 abi_ret_default ENDPROC(__memset) +EXPORT_SYMBOL(__memset) +EXPORT_SYMBOL(memset) .section .fixup, "ax" .align 4 diff --git a/arch/xtensa/lib/modsi3.S b/arch/xtensa/lib/modsi3.S index d00e77181e20..c5f4295c6868 100644 --- a/arch/xtensa/lib/modsi3.S +++ b/arch/xtensa/lib/modsi3.S @@ -60,6 +60,7 @@ ENTRY(__modsi3) abi_ret_default ENDPROC(__modsi3) +EXPORT_SYMBOL(__modsi3) #if !XCHAL_HAVE_NSA .section .rodata diff --git a/arch/xtensa/lib/mulsi3.S b/arch/xtensa/lib/mulsi3.S index 91a9d7c62f96..c6b4fd46bfa9 100644 --- a/arch/xtensa/lib/mulsi3.S +++ b/arch/xtensa/lib/mulsi3.S @@ -131,3 +131,4 @@ ENTRY(__mulsi3) abi_ret_default ENDPROC(__mulsi3) +EXPORT_SYMBOL(__mulsi3) diff --git a/arch/xtensa/lib/strncpy_user.S b/arch/xtensa/lib/strncpy_user.S index 0731912227d3..9841d1694cdf 100644 --- a/arch/xtensa/lib/strncpy_user.S +++ b/arch/xtensa/lib/strncpy_user.S @@ -201,6 +201,7 @@ EX(10f) s8i a9, a11, 0 abi_ret_default ENDPROC(__strncpy_user) +EXPORT_SYMBOL(__strncpy_user) .section .fixup, "ax" .align 4 diff --git a/arch/xtensa/lib/strnlen_user.S b/arch/xtensa/lib/strnlen_user.S index 3d391dca3efb..cdcf57474164 100644 --- a/arch/xtensa/lib/strnlen_user.S +++ b/arch/xtensa/lib/strnlen_user.S @@ -133,6 +133,7 @@ EX(10f) l32i a9, a4, 0 # get word with first two bytes of string abi_ret_default ENDPROC(__strnlen_user) +EXPORT_SYMBOL(__strnlen_user) .section .fixup, "ax" .align 4 diff --git a/arch/xtensa/lib/udivsi3.S b/arch/xtensa/lib/udivsi3.S index d2477e0786cf..59ea2dfc3f72 100644 --- a/arch/xtensa/lib/udivsi3.S +++ b/arch/xtensa/lib/udivsi3.S @@ -66,3 +66,4 @@ ENTRY(__udivsi3) abi_ret_default ENDPROC(__udivsi3) +EXPORT_SYMBOL(__udivsi3) diff --git a/arch/xtensa/lib/umodsi3.S b/arch/xtensa/lib/umodsi3.S index 5f031bfa0354..d39a7e56a971 100644 --- a/arch/xtensa/lib/umodsi3.S +++ b/arch/xtensa/lib/umodsi3.S @@ -55,3 +55,4 @@ ENTRY(__umodsi3) abi_ret_default ENDPROC(__umodsi3) +EXPORT_SYMBOL(__umodsi3) diff --git a/arch/xtensa/lib/umulsidi3.S b/arch/xtensa/lib/umulsidi3.S index 136081647942..8c7a94a0c5d0 100644 --- a/arch/xtensa/lib/umulsidi3.S +++ b/arch/xtensa/lib/umulsidi3.S @@ -228,3 +228,4 @@ ENTRY(__umulsidi3) #endif /* XCHAL_NO_MUL */ ENDPROC(__umulsidi3) +EXPORT_SYMBOL(__umulsidi3) diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S index 16128c094c62..2c665c0b408e 100644 --- a/arch/xtensa/lib/usercopy.S +++ b/arch/xtensa/lib/usercopy.S @@ -283,6 +283,7 @@ EX(10f) s8i a6, a5, 0 abi_ret(STACK_SIZE) ENDPROC(__xtensa_copy_user) +EXPORT_SYMBOL(__xtensa_copy_user) .section .fixup, "ax" .align 4 diff --git a/arch/xtensa/mm/kasan_init.c b/arch/xtensa/mm/kasan_init.c index 1fef24db2ff6..f00d122aa806 100644 --- a/arch/xtensa/mm/kasan_init.c +++ b/arch/xtensa/mm/kasan_init.c @@ -14,7 +14,6 @@ #include <linux/kernel.h> #include <asm/initialize_mmu.h> #include <asm/tlbflush.h> -#include <asm/traps.h> void __init kasan_early_init(void) { @@ -31,7 +30,6 @@ void __init kasan_early_init(void) BUG_ON(!pmd_none(*pmd)); set_pmd(pmd, __pmd((unsigned long)kasan_early_shadow_pte)); } - early_trap_init(); } static void __init populate(void *start, void *end) diff --git a/arch/xtensa/mm/misc.S b/arch/xtensa/mm/misc.S index 0527bf6e3211..ec36f73c4765 100644 --- a/arch/xtensa/mm/misc.S +++ b/arch/xtensa/mm/misc.S @@ -47,6 +47,7 @@ ENTRY(clear_page) abi_ret_default ENDPROC(clear_page) +EXPORT_SYMBOL(clear_page) /* * copy_page and copy_user_page are the same for non-cache-aliased configs. @@ -89,6 +90,7 @@ ENTRY(copy_page) abi_ret_default ENDPROC(copy_page) +EXPORT_SYMBOL(copy_page) #ifdef CONFIG_MMU /* @@ -367,6 +369,7 @@ ENTRY(__invalidate_icache_range) abi_ret_default ENDPROC(__invalidate_icache_range) +EXPORT_SYMBOL(__invalidate_icache_range) /* * void __flush_invalidate_dcache_range(ulong start, ulong size) @@ -397,6 +400,7 @@ ENTRY(__flush_dcache_range) abi_ret_default ENDPROC(__flush_dcache_range) +EXPORT_SYMBOL(__flush_dcache_range) /* * void _invalidate_dcache_range(ulong start, ulong size) @@ -411,6 +415,7 @@ ENTRY(__invalidate_dcache_range) abi_ret_default ENDPROC(__invalidate_dcache_range) +EXPORT_SYMBOL(__invalidate_dcache_range) /* * void _invalidate_icache_all(void) diff --git a/arch/xtensa/platforms/iss/setup.c b/arch/xtensa/platforms/iss/setup.c index d3433e1bb94e..0f1fe132691e 100644 --- a/arch/xtensa/platforms/iss/setup.c +++ b/arch/xtensa/platforms/iss/setup.c @@ -16,6 +16,7 @@ #include <linux/notifier.h> #include <linux/panic_notifier.h> #include <linux/printk.h> +#include <linux/reboot.h> #include <linux/string.h> #include <asm/platform.h> @@ -24,26 +25,27 @@ #include <platform/simcall.h> -void platform_halt(void) -{ - pr_info(" ** Called platform_halt() **\n"); - simc_exit(0); -} - -void platform_power_off(void) +static int iss_power_off(struct sys_off_data *unused) { pr_info(" ** Called platform_power_off() **\n"); simc_exit(0); + return NOTIFY_DONE; } -void platform_restart(void) +static int iss_restart(struct notifier_block *this, + unsigned long event, void *ptr) { /* Flush and reset the mmu, simulate a processor reset, and * jump to the reset vector. */ cpu_reset(); - /* control never gets here */ + + return NOTIFY_DONE; } +static struct notifier_block iss_restart_block = { + .notifier_call = iss_restart, +}; + static int iss_panic_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -82,4 +84,8 @@ void __init platform_setup(char **p_cmdline) } atomic_notifier_chain_register(&panic_notifier_list, &iss_panic_block); + register_restart_handler(&iss_restart_block); + register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, + SYS_OFF_PRIO_PLATFORM, + iss_power_off, NULL); } diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index f50caaa1c249..178cf96ca10a 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -120,9 +120,9 @@ static void simdisk_submit_bio(struct bio *bio) bio_endio(bio); } -static int simdisk_open(struct block_device *bdev, fmode_t mode) +static int simdisk_open(struct gendisk *disk, blk_mode_t mode) { - struct simdisk *dev = bdev->bd_disk->private_data; + struct simdisk *dev = disk->private_data; spin_lock(&dev->lock); ++dev->users; @@ -130,7 +130,7 @@ static int simdisk_open(struct block_device *bdev, fmode_t mode) return 0; } -static void simdisk_release(struct gendisk *disk, fmode_t mode) +static void simdisk_release(struct gendisk *disk) { struct simdisk *dev = disk->private_data; spin_lock(&dev->lock); diff --git a/arch/xtensa/platforms/xt2000/setup.c b/arch/xtensa/platforms/xt2000/setup.c index 0dc22c371614..258e01a51fd8 100644 --- a/arch/xtensa/platforms/xt2000/setup.c +++ b/arch/xtensa/platforms/xt2000/setup.c @@ -23,6 +23,7 @@ #include <linux/platform_device.h> #include <linux/serial.h> #include <linux/serial_8250.h> +#include <linux/timer.h> #include <asm/processor.h> #include <asm/platform.h> @@ -41,51 +42,46 @@ static void led_print (int f, char *s) break; } -void platform_halt(void) -{ - led_print (0, " HALT "); - local_irq_disable(); - while (1); -} - -void platform_power_off(void) +static int xt2000_power_off(struct sys_off_data *unused) { led_print (0, "POWEROFF"); local_irq_disable(); while (1); + return NOTIFY_DONE; } -void platform_restart(void) +static int xt2000_restart(struct notifier_block *this, + unsigned long event, void *ptr) { /* Flush and reset the mmu, simulate a processor reset, and * jump to the reset vector. */ cpu_reset(); - /* control never gets here */ + + return NOTIFY_DONE; } +static struct notifier_block xt2000_restart_block = { + .notifier_call = xt2000_restart, +}; + void __init platform_setup(char** cmdline) { led_print (0, "LINUX "); } -/* early initialization */ +/* Heartbeat. Let the LED blink. */ -void __init platform_init(bp_tag_t *first) -{ -} +static void xt2000_heartbeat(struct timer_list *unused); -/* Heartbeat. Let the LED blink. */ +static DEFINE_TIMER(heartbeat_timer, xt2000_heartbeat); -void platform_heartbeat(void) +static void xt2000_heartbeat(struct timer_list *unused) { - static int i, t; + static int i; - if (--t < 0) - { - t = 59; - led_print(7, i ? ".": " "); - i ^= 1; - } + led_print(7, i ? "." : " "); + i ^= 1; + mod_timer(&heartbeat_timer, jiffies + HZ / 2); } //#define RS_TABLE_SIZE 2 @@ -143,7 +139,11 @@ static int __init xt2000_setup_devinit(void) { platform_device_register(&xt2000_serial8250_device); platform_device_register(&xt2000_sonic_device); - + mod_timer(&heartbeat_timer, jiffies + HZ / 2); + register_restart_handler(&xt2000_restart_block); + register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, + SYS_OFF_PRIO_DEFAULT, + xt2000_power_off, NULL); return 0; } diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c index c79c1d09ea86..a2432f081710 100644 --- a/arch/xtensa/platforms/xtfpga/setup.c +++ b/arch/xtensa/platforms/xtfpga/setup.c @@ -33,23 +33,17 @@ #include <platform/lcd.h> #include <platform/hardware.h> -void platform_halt(void) -{ - lcd_disp_at_pos(" HALT ", 0); - local_irq_disable(); - while (1) - cpu_relax(); -} - -void platform_power_off(void) +static int xtfpga_power_off(struct sys_off_data *unused) { lcd_disp_at_pos("POWEROFF", 0); local_irq_disable(); while (1) cpu_relax(); + return NOTIFY_DONE; } -void platform_restart(void) +static int xtfpga_restart(struct notifier_block *this, + unsigned long event, void *ptr) { /* Try software reset first. */ WRITE_ONCE(*(u32 *)XTFPGA_SWRST_VADDR, 0xdead); @@ -58,9 +52,14 @@ void platform_restart(void) * simulate a processor reset, and jump to the reset vector. */ cpu_reset(); - /* control never gets here */ + + return NOTIFY_DONE; } +static struct notifier_block xtfpga_restart_block = { + .notifier_call = xtfpga_restart, +}; + #ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT void __init platform_calibrate_ccount(void) @@ -70,6 +69,14 @@ void __init platform_calibrate_ccount(void) #endif +static void __init xtfpga_register_handlers(void) +{ + register_restart_handler(&xtfpga_restart_block); + register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, + SYS_OFF_PRIO_DEFAULT, + xtfpga_power_off, NULL); +} + #ifdef CONFIG_USE_OF static void __init xtfpga_clk_setup(struct device_node *np) @@ -134,6 +141,9 @@ static int __init machine_setup(void) if ((eth = of_find_compatible_node(eth, NULL, "opencores,ethoc"))) update_local_mac(eth); of_node_put(eth); + + xtfpga_register_handlers(); + return 0; } arch_initcall(machine_setup); @@ -281,6 +291,8 @@ static int __init xtavnet_init(void) pr_info("XTFPGA: Ethernet MAC %pM\n", ethoc_pdata.hwaddr); ethoc_pdata.eth_clkfreq = *(long *)XTFPGA_CLKFRQ_VADDR; + xtfpga_register_handlers(); + return 0; } |