diff options
141 files changed, 2290 insertions, 1507 deletions
@@ -803,6 +803,7 @@ Tvrtko Ursulin <tursulin@ursulin.net> <tvrtko.ursulin@onelan.co.uk> Tvrtko Ursulin <tursulin@ursulin.net> <tvrtko@ursulin.net> Tycho Andersen <tycho@tycho.pizza> <tycho@tycho.ws> Tzung-Bi Shih <tzungbi@kernel.org> <tzungbi@google.com> +Umang Jain <uajain@igalia.com> <umang.jain@ideasonboard.com> Uwe Kleine-König <ukleinek@informatik.uni-freiburg.de> Uwe Kleine-König <u.kleine-koenig@baylibre.com> <ukleinek@baylibre.com> Uwe Kleine-König <u.kleine-koenig@pengutronix.de> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a51ab4656854..6c42061ca20e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -608,6 +608,24 @@ ccw_timeout_log [S390] See Documentation/arch/s390/common_io.rst for details. + cfi= [X86-64] Set Control Flow Integrity checking features + when CONFIG_FINEIBT is enabled. + Format: feature[,feature...] + Default: auto + + auto: Use FineIBT if IBT available, otherwise kCFI. + Under FineIBT, enable "paranoid" mode when + FRED is not available. + off: Turn off CFI checking. + kcfi: Use kCFI (disable FineIBT). + fineibt: Use FineIBT (even if IBT not available). + norand: Do not re-randomize CFI hashes. + paranoid: Add caller hash checking under FineIBT. + bhi: Enable register poisoning to stop speculation + across FineIBT. (Disabled by default.) + warn: Do not enforce CFI checking: warn only. + debug: Report CFI initialization details. + cgroup_disable= [KNL] Disable a particular controller or optional feature Format: {name of the controller(s) or feature(s) to disable} The effects of cgroup_disable=foo are: diff --git a/Documentation/devicetree/bindings/rtc/apm,xgene-rtc.yaml b/Documentation/devicetree/bindings/rtc/apm,xgene-rtc.yaml new file mode 100644 index 000000000000..b8f46536fd5a --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/apm,xgene-rtc.yaml @@ -0,0 +1,45 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/apm,xgene-rtc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: APM X-Gene Real Time Clock + +maintainers: + - Khuong Dinh <khuong@os.amperecomputing.com> + +properties: + compatible: + const: apm,xgene-rtc + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + '#clock-cells': + const: 1 + + clocks: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - '#clock-cells' + - clocks + +additionalProperties: false + +examples: + - | + rtc@10510000 { + compatible = "apm,xgene-rtc"; + reg = <0x10510000 0x400>; + interrupts = <0x0 0x46 0x4>; + #clock-cells = <1>; + clocks = <&rtcclk 0>; + }; diff --git a/Documentation/devicetree/bindings/rtc/isil,isl12057.txt b/Documentation/devicetree/bindings/rtc/isil,isl12057.txt deleted file mode 100644 index ff7c43555199..000000000000 --- a/Documentation/devicetree/bindings/rtc/isil,isl12057.txt +++ /dev/null @@ -1,74 +0,0 @@ -Intersil ISL12057 I2C RTC/Alarm chip - -ISL12057 is a trivial I2C device (it has simple device tree bindings, -consisting of a compatible field, an address and possibly an interrupt -line). - -Nonetheless, it also supports an option boolean property -("wakeup-source") to handle the specific use-case found -on at least three in-tree users of the chip (NETGEAR ReadyNAS 102, 104 -and 2120 ARM-based NAS); On those devices, the IRQ#2 pin of the chip -(associated with the alarm supported by the driver) is not connected -to the SoC but to a PMIC. It allows the device to be powered up when -RTC alarm rings. In order to mark the device has a wakeup source and -get access to the 'wakealarm' sysfs entry, this specific property can -be set when the IRQ#2 pin of the chip is not connected to the SoC but -can wake up the device. - -Required properties supported by the device: - - - "compatible": must be "isil,isl12057" - - "reg": I2C bus address of the device - -Optional properties: - - - "wakeup-source": mark the chip as a wakeup source, independently of - the availability of an IRQ line connected to the SoC. - - -Example isl12057 node without IRQ#2 pin connected (no alarm support): - - isl12057: isl12057@68 { - compatible = "isil,isl12057"; - reg = <0x68>; - }; - - -Example isl12057 node with IRQ#2 pin connected to main SoC via MPP6 (note -that the pinctrl-related properties below are given for completeness and -may not be required or may be different depending on your system or -SoC, and the main function of the MPP used as IRQ line, i.e. -"interrupt-parent" and "interrupts" are usually sufficient): - - pinctrl { - ... - - rtc_alarm_pin: rtc_alarm_pin { - marvell,pins = "mpp6"; - marvell,function = "gpio"; - }; - - ... - - }; - - ... - - isl12057: isl12057@68 { - compatible = "isil,isl12057"; - reg = <0x68>; - pinctrl-0 = <&rtc_alarm_pin>; - pinctrl-names = "default"; - interrupt-parent = <&gpio0>; - interrupts = <6 IRQ_TYPE_EDGE_FALLING>; - }; - - -Example isl12057 node without IRQ#2 pin connected to the SoC but to a -PMIC, allowing the device to be started based on configured alarm: - - isl12057: isl12057@68 { - compatible = "isil,isl12057"; - reg = <0x68>; - wakeup-source; - }; diff --git a/Documentation/devicetree/bindings/rtc/nxp,pcf85063.yaml b/Documentation/devicetree/bindings/rtc/nxp,pcf85063.yaml index 1e6277e524c2..f7013cd8fc20 100644 --- a/Documentation/devicetree/bindings/rtc/nxp,pcf85063.yaml +++ b/Documentation/devicetree/bindings/rtc/nxp,pcf85063.yaml @@ -65,16 +65,6 @@ allOf: - if: properties: compatible: - contains: - enum: - - nxp,pcf85063 - then: - properties: - quartz-load-femtofarads: - const: 7000 - - if: - properties: - compatible: not: contains: enum: diff --git a/Documentation/devicetree/bindings/rtc/s3c-rtc.yaml b/Documentation/devicetree/bindings/rtc/s3c-rtc.yaml index bf4e11d6dffb..338874e7ed7f 100644 --- a/Documentation/devicetree/bindings/rtc/s3c-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/s3c-rtc.yaml @@ -13,9 +13,6 @@ properties: compatible: oneOf: - enum: - - samsung,s3c2410-rtc - - samsung,s3c2416-rtc - - samsung,s3c2443-rtc - samsung,s3c6410-rtc - items: - enum: @@ -29,19 +26,12 @@ properties: maxItems: 1 clocks: - description: - Must contain a list of phandle and clock specifier for the rtc - clock and in the case of a s3c6410 compatible controller, also - a source clock. - minItems: 1 maxItems: 2 clock-names: - description: - Must contain "rtc" and for a s3c6410 compatible controller - also "rtc_src". - minItems: 1 - maxItems: 2 + items: + - const: rtc + - const: rtc_src interrupts: description: @@ -54,30 +44,6 @@ properties: allOf: - $ref: rtc.yaml# - - if: - properties: - compatible: - contains: - enum: - - samsung,s3c6410-rtc - - samsung,exynos3250-rtc - then: - properties: - clocks: - minItems: 2 - maxItems: 2 - clock-names: - items: - - const: rtc - - const: rtc_src - else: - properties: - clocks: - minItems: 1 - maxItems: 1 - clock-names: - items: - - const: rtc unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml b/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml index 5e0c7cd25cc6..b47822370d6f 100644 --- a/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml @@ -38,6 +38,8 @@ properties: - dallas,ds1672 # Extremely Accurate I²C RTC with Integrated Crystal and SRAM - dallas,ds3232 + # Dallas m41t00 Real-time Clock + - dallas,m41t00 # SD2405AL Real-Time Clock - dfrobot,sd2405al # EM Microelectronic EM3027 RTC @@ -83,8 +85,8 @@ properties: - via,vt8500-rtc # I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC - whwave,sd3078 - # Xircom X1205 I2C RTC - - xircom,x1205 + # Xicor/Intersil X1205 I2C RTC + - xicor,x1205 reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/rtc/xgene-rtc.txt b/Documentation/devicetree/bindings/rtc/xgene-rtc.txt deleted file mode 100644 index fd195c358446..000000000000 --- a/Documentation/devicetree/bindings/rtc/xgene-rtc.txt +++ /dev/null @@ -1,28 +0,0 @@ -* APM X-Gene Real Time Clock - -RTC controller for the APM X-Gene Real Time Clock - -Required properties: -- compatible : Should be "apm,xgene-rtc" -- reg: physical base address of the controller and length of memory mapped - region. -- interrupts: IRQ line for the RTC. -- #clock-cells: Should be 1. -- clocks: Reference to the clock entry. - -Example: - -rtcclk: rtcclk { - compatible = "fixed-clock"; - #clock-cells = <1>; - clock-frequency = <100000000>; - clock-output-names = "rtcclk"; -}; - -rtc: rtc@10510000 { - compatible = "apm,xgene-rtc"; - reg = <0x0 0x10510000 0x0 0x400>; - interrupts = <0x0 0x46 0x4>; - #clock-cells = <1>; - clocks = <&rtcclk 0>; -}; diff --git a/MAINTAINERS b/MAINTAINERS index e4886604631d..46126ce2f968 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23911,7 +23911,7 @@ F: drivers/media/i2c/imx274.c SONY IMX283 SENSOR DRIVER M: Kieran Bingham <kieran.bingham@ideasonboard.com> -M: Umang Jain <umang.jain@ideasonboard.com> +R: Umang Jain <uajain@igalia.com> L: linux-media@vger.kernel.org S: Maintained T: git git://linuxtv.org/media.git diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index feecf1a6ddb4..d74d4c52ccd0 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -215,6 +215,28 @@ SECTIONS ELF_DETAILS /* + * Make sure that the .got.plt is either completely empty or it + * contains only the three reserved double words. + */ + .got.plt : { + *(.got.plt) + } + ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18, "Unexpected GOT/PLT entries detected!") + + /* + * Sections that should stay zero sized, which is safer to + * explicitly check instead of blindly discarding. + */ + .plt : { + *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt) + } + ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!") + .rela.dyn : { + *(.rela.*) *(.rela_*) + } + ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!") + + /* * uncompressed image info used by the decompressor * it should match struct vmlinux_info */ @@ -244,28 +266,6 @@ SECTIONS #endif } :NONE - /* - * Make sure that the .got.plt is either completely empty or it - * contains only the three reserved double words. - */ - .got.plt : { - *(.got.plt) - } - ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18, "Unexpected GOT/PLT entries detected!") - - /* - * Sections that should stay zero sized, which is safer to - * explicitly check instead of blindly discarding. - */ - .plt : { - *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt) - } - ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!") - .rela.dyn : { - *(.rela.*) *(.rela_*) - } - ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!") - /* Sections to be discarded */ DISCARDS /DISCARD/ : { diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9d034a987c6e..fa3b616af03a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -412,10 +412,6 @@ config HAVE_INTEL_TXT def_bool y depends on INTEL_IOMMU && ACPI -config X86_64_SMP - def_bool y - depends on X86_64 && SMP - config ARCH_SUPPORTS_UPROBES def_bool y diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler index c827f694fb72..b1c59fb0a4c9 100644 --- a/arch/x86/Kconfig.assembler +++ b/arch/x86/Kconfig.assembler @@ -1,26 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright (C) 2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -config AS_AVX512 - def_bool $(as-instr,vpmovm2b %k1$(comma)%zmm5) - help - Supported by binutils >= 2.25 and LLVM integrated assembler - -config AS_GFNI - def_bool $(as-instr,vgf2p8mulb %xmm0$(comma)%xmm1$(comma)%xmm2) - help - Supported by binutils >= 2.30 and LLVM integrated assembler - -config AS_VAES - def_bool $(as-instr,vaesenc %ymm0$(comma)%ymm1$(comma)%ymm2) - help - Supported by binutils >= 2.30 and LLVM integrated assembler - -config AS_VPCLMULQDQ - def_bool $(as-instr,vpclmulqdq \$0x10$(comma)%ymm0$(comma)%ymm1$(comma)%ymm2) - help - Supported by binutils >= 2.30 and LLVM integrated assembler - config AS_WRUSS def_bool $(as-instr64,wrussq %rax$(comma)(%rbx)) help diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h index 8518ae214c9b..79e15971529d 100644 --- a/arch/x86/boot/bitops.h +++ b/arch/x86/boot/bitops.h @@ -27,7 +27,7 @@ static inline bool variable_test_bit(int nr, const void *addr) bool v; const u32 *p = addr; - asm("btl %2,%1" CC_SET(c) : CC_OUT(c) (v) : "m" (*p), "Ir" (nr)); + asm("btl %2,%1" : "=@ccc" (v) : "m" (*p), "Ir" (nr)); return v; } diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 60580836daf7..a3c58ebe3662 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -155,15 +155,15 @@ static inline void wrgs32(u32 v, addr_t addr) static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len) { bool diff; - asm volatile("fs repe cmpsb" CC_SET(nz) - : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len)); + asm volatile("fs repe cmpsb" + : "=@ccnz" (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; } static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len) { bool diff; - asm volatile("gs repe cmpsb" CC_SET(nz) - : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len)); + asm volatile("gs repe cmpsb" + : "=@ccnz" (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; } diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index f35369bb14c5..b25c6a9303b7 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -32,8 +32,8 @@ int memcmp(const void *s1, const void *s2, size_t len) { bool diff; - asm("repe cmpsb" CC_SET(nz) - : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len)); + asm("repe cmpsb" + : "=@ccnz" (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; } diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig index d9c6fc78cf33..48d3076b6053 100644 --- a/arch/x86/crypto/Kconfig +++ b/arch/x86/crypto/Kconfig @@ -306,7 +306,7 @@ config CRYPTO_ARIA_AESNI_AVX2_X86_64 config CRYPTO_ARIA_GFNI_AVX512_X86_64 tristate "Ciphers: ARIA with modes: ECB, CTR (AVX512/GFNI)" - depends on 64BIT && AS_GFNI + depends on 64BIT select CRYPTO_SKCIPHER select CRYPTO_ALGAPI select CRYPTO_ARIA diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index dfba7e5e88ea..2d30d5d36145 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -46,10 +46,8 @@ obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o aesni-intel-$(CONFIG_64BIT) += aes-ctr-avx-x86_64.o \ aes-gcm-aesni-x86_64.o \ - aes-xts-avx-x86_64.o -ifeq ($(CONFIG_AS_VAES)$(CONFIG_AS_VPCLMULQDQ),yy) -aesni-intel-$(CONFIG_64BIT) += aes-gcm-avx10-x86_64.o -endif + aes-xts-avx-x86_64.o \ + aes-gcm-avx10-x86_64.o obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o diff --git a/arch/x86/crypto/aes-ctr-avx-x86_64.S b/arch/x86/crypto/aes-ctr-avx-x86_64.S index bbbfd80f5a50..2745918f68ee 100644 --- a/arch/x86/crypto/aes-ctr-avx-x86_64.S +++ b/arch/x86/crypto/aes-ctr-avx-x86_64.S @@ -552,7 +552,6 @@ SYM_TYPED_FUNC_START(aes_xctr_crypt_aesni_avx) _aes_ctr_crypt 1 SYM_FUNC_END(aes_xctr_crypt_aesni_avx) -#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) .set VL, 32 .set USE_AVX512, 0 SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx2) @@ -570,4 +569,3 @@ SYM_FUNC_END(aes_ctr64_crypt_vaes_avx512) SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx512) _aes_ctr_crypt 1 SYM_FUNC_END(aes_xctr_crypt_vaes_avx512) -#endif // CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S index db79cdf81588..a30753a3e207 100644 --- a/arch/x86/crypto/aes-xts-avx-x86_64.S +++ b/arch/x86/crypto/aes-xts-avx-x86_64.S @@ -886,7 +886,6 @@ SYM_TYPED_FUNC_START(aes_xts_decrypt_aesni_avx) _aes_xts_crypt 0 SYM_FUNC_END(aes_xts_decrypt_aesni_avx) -#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) .set VL, 32 .set USE_AVX512, 0 SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx2) @@ -904,4 +903,3 @@ SYM_FUNC_END(aes_xts_encrypt_vaes_avx512) SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx512) _aes_xts_crypt 0 SYM_FUNC_END(aes_xts_decrypt_vaes_avx512) -#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 061b1ced93c5..d953ac470aae 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -828,10 +828,8 @@ static struct skcipher_alg skcipher_algs_##suffix[] = {{ \ }} DEFINE_AVX_SKCIPHER_ALGS(aesni_avx, "aesni-avx", 500); -#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) DEFINE_AVX_SKCIPHER_ALGS(vaes_avx2, "vaes-avx2", 600); DEFINE_AVX_SKCIPHER_ALGS(vaes_avx512, "vaes-avx512", 800); -#endif /* The common part of the x86_64 AES-GCM key struct */ struct aes_gcm_key { @@ -912,17 +910,8 @@ struct aes_gcm_key_avx10 { #define FLAG_RFC4106 BIT(0) #define FLAG_ENC BIT(1) #define FLAG_AVX BIT(2) -#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) -# define FLAG_AVX10_256 BIT(3) -# define FLAG_AVX10_512 BIT(4) -#else - /* - * This should cause all calls to the AVX10 assembly functions to be - * optimized out, avoiding the need to ifdef each call individually. - */ -# define FLAG_AVX10_256 0 -# define FLAG_AVX10_512 0 -#endif +#define FLAG_AVX10_256 BIT(3) +#define FLAG_AVX10_512 BIT(4) static inline struct aes_gcm_key * aes_gcm_key_get(struct crypto_aead *tfm, int flags) @@ -1519,7 +1508,6 @@ DEFINE_GCM_ALGS(aesni_avx, FLAG_AVX, "generic-gcm-aesni-avx", "rfc4106-gcm-aesni-avx", AES_GCM_KEY_AESNI_SIZE, 500); -#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) /* aes_gcm_algs_vaes_avx10_256 */ DEFINE_GCM_ALGS(vaes_avx10_256, FLAG_AVX10_256, "generic-gcm-vaes-avx10_256", "rfc4106-gcm-vaes-avx10_256", @@ -1529,7 +1517,6 @@ DEFINE_GCM_ALGS(vaes_avx10_256, FLAG_AVX10_256, DEFINE_GCM_ALGS(vaes_avx10_512, FLAG_AVX10_512, "generic-gcm-vaes-avx10_512", "rfc4106-gcm-vaes-avx10_512", AES_GCM_KEY_AVX10_SIZE, 800); -#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ static int __init register_avx_algs(void) { @@ -1551,7 +1538,6 @@ static int __init register_avx_algs(void) * Similarly, the assembler support was added at about the same time. * For simplicity, just always check for VAES and VPCLMULQDQ together. */ -#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_VAES) || !boot_cpu_has(X86_FEATURE_VPCLMULQDQ) || @@ -1592,7 +1578,7 @@ static int __init register_avx_algs(void) ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512)); if (err) return err; -#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ + return 0; } @@ -1607,12 +1593,10 @@ static void unregister_avx_algs(void) { unregister_skciphers(skcipher_algs_aesni_avx); unregister_aeads(aes_gcm_algs_aesni_avx); -#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) unregister_skciphers(skcipher_algs_vaes_avx2); unregister_skciphers(skcipher_algs_vaes_avx512); unregister_aeads(aes_gcm_algs_vaes_avx10_256); unregister_aeads(aes_gcm_algs_vaes_avx10_512); -#endif } #else /* CONFIG_X86_64 */ static struct aead_alg aes_gcm_algs_aesni[0]; diff --git a/arch/x86/crypto/aria-aesni-avx-asm_64.S b/arch/x86/crypto/aria-aesni-avx-asm_64.S index 9556dacd9841..932fb17308e7 100644 --- a/arch/x86/crypto/aria-aesni-avx-asm_64.S +++ b/arch/x86/crypto/aria-aesni-avx-asm_64.S @@ -295,7 +295,6 @@ vpshufb t1, t0, t2; \ vpxor t2, x7, x7; -#ifdef CONFIG_AS_GFNI #define aria_sbox_8way_gfni(x0, x1, x2, x3, \ x4, x5, x6, x7, \ t0, t1, t2, t3, \ @@ -318,8 +317,6 @@ vgf2p8affineinvqb $0, t2, x3, x3; \ vgf2p8affineinvqb $0, t2, x7, x7 -#endif /* CONFIG_AS_GFNI */ - #define aria_sbox_8way(x0, x1, x2, x3, \ x4, x5, x6, x7, \ t0, t1, t2, t3, \ @@ -561,7 +558,6 @@ y4, y5, y6, y7, \ mem_tmp, 8); -#ifdef CONFIG_AS_GFNI #define aria_fe_gfni(x0, x1, x2, x3, \ x4, x5, x6, x7, \ y0, y1, y2, y3, \ @@ -719,8 +715,6 @@ y4, y5, y6, y7, \ mem_tmp, 8); -#endif /* CONFIG_AS_GFNI */ - /* NB: section is mergeable, all elements must be aligned 16-byte blocks */ .section .rodata.cst16, "aM", @progbits, 16 .align 16 @@ -772,7 +766,6 @@ .Ltf_hi__x2__and__fwd_aff: .octa 0x3F893781E95FE1576CDA64D2BA0CB204 -#ifdef CONFIG_AS_GFNI /* AES affine: */ #define tf_aff_const BV8(1, 1, 0, 0, 0, 1, 1, 0) .Ltf_aff_bitmatrix: @@ -871,7 +864,6 @@ BV8(0, 0, 0, 0, 0, 1, 0, 0), BV8(0, 0, 0, 0, 0, 0, 1, 0), BV8(0, 0, 0, 0, 0, 0, 0, 1)) -#endif /* CONFIG_AS_GFNI */ /* 4-bit mask */ .section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4 @@ -1140,7 +1132,6 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_ctr_crypt_16way) RET; SYM_FUNC_END(aria_aesni_avx_ctr_crypt_16way) -#ifdef CONFIG_AS_GFNI SYM_FUNC_START_LOCAL(__aria_aesni_avx_gfni_crypt_16way) /* input: * %r9: rk @@ -1359,4 +1350,3 @@ SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_ctr_crypt_16way) FRAME_END RET; SYM_FUNC_END(aria_aesni_avx_gfni_ctr_crypt_16way) -#endif /* CONFIG_AS_GFNI */ diff --git a/arch/x86/crypto/aria-aesni-avx2-asm_64.S b/arch/x86/crypto/aria-aesni-avx2-asm_64.S index c60fa2980630..ed53d4f46bd7 100644 --- a/arch/x86/crypto/aria-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/aria-aesni-avx2-asm_64.S @@ -302,7 +302,6 @@ vpbroadcastb ((round * 16) + idx + 4)(rk), t0; \ vpxor t0, x7, x7; -#ifdef CONFIG_AS_GFNI #define aria_sbox_8way_gfni(x0, x1, x2, x3, \ x4, x5, x6, x7, \ t0, t1, t2, t3, \ @@ -325,7 +324,6 @@ vgf2p8affineinvqb $0, t2, x3, x3; \ vgf2p8affineinvqb $0, t2, x7, x7 -#endif /* CONFIG_AS_GFNI */ #define aria_sbox_8way(x0, x1, x2, x3, \ x4, x5, x6, x7, \ t0, t1, t2, t3, \ @@ -598,7 +596,7 @@ aria_load_state_8way(y0, y1, y2, y3, \ y4, y5, y6, y7, \ mem_tmp, 8); -#ifdef CONFIG_AS_GFNI + #define aria_fe_gfni(x0, x1, x2, x3, \ x4, x5, x6, x7, \ y0, y1, y2, y3, \ @@ -752,7 +750,6 @@ aria_load_state_8way(y0, y1, y2, y3, \ y4, y5, y6, y7, \ mem_tmp, 8); -#endif /* CONFIG_AS_GFNI */ .section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32 .align 32 @@ -806,7 +803,6 @@ .Ltf_hi__x2__and__fwd_aff: .octa 0x3F893781E95FE1576CDA64D2BA0CB204 -#ifdef CONFIG_AS_GFNI .section .rodata.cst8, "aM", @progbits, 8 .align 8 /* AES affine: */ @@ -868,8 +864,6 @@ BV8(0, 0, 0, 0, 0, 0, 1, 0), BV8(0, 0, 0, 0, 0, 0, 0, 1)) -#endif /* CONFIG_AS_GFNI */ - /* 4-bit mask */ .section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4 .align 4 @@ -1219,7 +1213,6 @@ SYM_TYPED_FUNC_START(aria_aesni_avx2_ctr_crypt_32way) RET; SYM_FUNC_END(aria_aesni_avx2_ctr_crypt_32way) -#ifdef CONFIG_AS_GFNI SYM_FUNC_START_LOCAL(__aria_aesni_avx2_gfni_crypt_32way) /* input: * %r9: rk @@ -1438,4 +1431,3 @@ SYM_TYPED_FUNC_START(aria_aesni_avx2_gfni_ctr_crypt_32way) FRAME_END RET; SYM_FUNC_END(aria_aesni_avx2_gfni_ctr_crypt_32way) -#endif /* CONFIG_AS_GFNI */ diff --git a/arch/x86/crypto/aria_aesni_avx2_glue.c b/arch/x86/crypto/aria_aesni_avx2_glue.c index 007b250f774c..1487a49bfbac 100644 --- a/arch/x86/crypto/aria_aesni_avx2_glue.c +++ b/arch/x86/crypto/aria_aesni_avx2_glue.c @@ -26,7 +26,6 @@ asmlinkage void aria_aesni_avx2_ctr_crypt_32way(const void *ctx, u8 *dst, const u8 *src, u8 *keystream, u8 *iv); EXPORT_SYMBOL_GPL(aria_aesni_avx2_ctr_crypt_32way); -#ifdef CONFIG_AS_GFNI asmlinkage void aria_aesni_avx2_gfni_encrypt_32way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(aria_aesni_avx2_gfni_encrypt_32way); @@ -37,7 +36,6 @@ asmlinkage void aria_aesni_avx2_gfni_ctr_crypt_32way(const void *ctx, u8 *dst, const u8 *src, u8 *keystream, u8 *iv); EXPORT_SYMBOL_GPL(aria_aesni_avx2_gfni_ctr_crypt_32way); -#endif /* CONFIG_AS_GFNI */ static struct aria_avx_ops aria_ops; @@ -213,7 +211,7 @@ static int __init aria_avx2_init(void) return -ENODEV; } - if (boot_cpu_has(X86_FEATURE_GFNI) && IS_ENABLED(CONFIG_AS_GFNI)) { + if (boot_cpu_has(X86_FEATURE_GFNI)) { aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way; aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way; aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way; diff --git a/arch/x86/crypto/aria_aesni_avx_glue.c b/arch/x86/crypto/aria_aesni_avx_glue.c index 4c88ef4eba82..e4e3d78915a5 100644 --- a/arch/x86/crypto/aria_aesni_avx_glue.c +++ b/arch/x86/crypto/aria_aesni_avx_glue.c @@ -26,7 +26,6 @@ asmlinkage void aria_aesni_avx_ctr_crypt_16way(const void *ctx, u8 *dst, const u8 *src, u8 *keystream, u8 *iv); EXPORT_SYMBOL_GPL(aria_aesni_avx_ctr_crypt_16way); -#ifdef CONFIG_AS_GFNI asmlinkage void aria_aesni_avx_gfni_encrypt_16way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_encrypt_16way); @@ -37,7 +36,6 @@ asmlinkage void aria_aesni_avx_gfni_ctr_crypt_16way(const void *ctx, u8 *dst, const u8 *src, u8 *keystream, u8 *iv); EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_ctr_crypt_16way); -#endif /* CONFIG_AS_GFNI */ static struct aria_avx_ops aria_ops; @@ -199,7 +197,7 @@ static int __init aria_avx_init(void) return -ENODEV; } - if (boot_cpu_has(X86_FEATURE_GFNI) && IS_ENABLED(CONFIG_AS_GFNI)) { + if (boot_cpu_has(X86_FEATURE_GFNI)) { aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way; aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way; aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way; diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 94519688b007..77e2d920a640 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -99,7 +99,7 @@ For 32-bit we have the following conventions - kernel is built with .endif .endm -.macro CLEAR_REGS clear_bp=1 +.macro CLEAR_REGS clear_callee=1 /* * Sanitize registers of values that a speculation attack might * otherwise want to exploit. The lower registers are likely clobbered @@ -113,20 +113,19 @@ For 32-bit we have the following conventions - kernel is built with xorl %r9d, %r9d /* nospec r9 */ xorl %r10d, %r10d /* nospec r10 */ xorl %r11d, %r11d /* nospec r11 */ + .if \clear_callee xorl %ebx, %ebx /* nospec rbx */ - .if \clear_bp xorl %ebp, %ebp /* nospec rbp */ - .endif xorl %r12d, %r12d /* nospec r12 */ xorl %r13d, %r13d /* nospec r13 */ xorl %r14d, %r14d /* nospec r14 */ xorl %r15d, %r15d /* nospec r15 */ - + .endif .endm -.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 clear_bp=1 unwind_hint=1 +.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 clear_callee=1 unwind_hint=1 PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret unwind_hint=\unwind_hint - CLEAR_REGS clear_bp=\clear_bp + CLEAR_REGS clear_callee=\clear_callee .endm .macro POP_REGS pop_rdi=1 diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S index 907bd233c6c1..fafbd3e68cb8 100644 --- a/arch/x86/entry/entry_64_fred.S +++ b/arch/x86/entry/entry_64_fred.S @@ -97,8 +97,7 @@ SYM_FUNC_START(asm_fred_entry_from_kvm) push %rdi /* fred_ss handed in by the caller */ push %rbp pushf - mov $__KERNEL_CS, %rax - push %rax + push $__KERNEL_CS /* * Unlike the IDT event delivery, FRED _always_ pushes an error code @@ -112,18 +111,37 @@ SYM_FUNC_START(asm_fred_entry_from_kvm) push %rax /* Return RIP */ push $0 /* Error code, 0 for IRQ/NMI */ - PUSH_AND_CLEAR_REGS clear_bp=0 unwind_hint=0 + PUSH_AND_CLEAR_REGS clear_callee=0 unwind_hint=0 + movq %rsp, %rdi /* %rdi -> pt_regs */ + /* + * At this point: {rdi, rsi, rdx, rcx, r8, r9}, {r10, r11}, {rax, rdx} + * are clobbered, which corresponds to: arguments, extra caller-saved + * and return. All registers a C function is allowed to clobber. + * + * Notably, the callee-saved registers: {rbx, r12, r13, r14, r15} + * are untouched, with the exception of rbp, which carries the stack + * frame and will be restored before exit. + * + * Further calling another C function will not alter this state. + */ call __fred_entry_from_kvm /* Call the C entry point */ - POP_REGS - ERETS -1: + /* - * Objtool doesn't understand what ERETS does, this hint tells it that - * yes, we'll reach here and with what stack state. A save/restore pair - * isn't strictly needed, but it's the simplest form. + * When FRED, use ERETS to potentially clear NMIs, otherwise simply + * restore the stack pointer. + */ + ALTERNATIVE "nop; nop; mov %rbp, %rsp", \ + __stringify(add $C_PTREGS_SIZE, %rsp; ERETS), \ + X86_FEATURE_FRED + +1: /* + * Objtool doesn't understand ERETS, and the cfi register state is + * different from initial_func_cfi due to PUSH_REGS. Tell it the state + * is similar to where UNWIND_HINT_SAVE is. */ UNWIND_HINT_RESTORE + pop %rbp RET diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index afdbda2dd7b7..e890fd37e9c2 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -17,7 +17,6 @@ #include <asm/desc.h> #include <asm/e820/api.h> #include <asm/sev.h> -#include <asm/ibt.h> #include <asm/hypervisor.h> #include <hyperv/hvhdk.h> #include <asm/mshyperv.h> @@ -37,7 +36,45 @@ #include <linux/export.h> void *hv_hypercall_pg; + +#ifdef CONFIG_X86_64 +static u64 __hv_hyperfail(u64 control, u64 param1, u64 param2) +{ + return U64_MAX; +} + +DEFINE_STATIC_CALL(__hv_hypercall, __hv_hyperfail); + +u64 hv_std_hypercall(u64 control, u64 param1, u64 param2) +{ + u64 hv_status; + + register u64 __r8 asm("r8") = param2; + asm volatile ("call " STATIC_CALL_TRAMP_STR(__hv_hypercall) + : "=a" (hv_status), ASM_CALL_CONSTRAINT, + "+c" (control), "+d" (param1), "+r" (__r8) + : : "cc", "memory", "r9", "r10", "r11"); + + return hv_status; +} + +typedef u64 (*hv_hypercall_f)(u64 control, u64 param1, u64 param2); + +static inline void hv_set_hypercall_pg(void *ptr) +{ + hv_hypercall_pg = ptr; + + if (!ptr) + ptr = &__hv_hyperfail; + static_call_update(__hv_hypercall, (hv_hypercall_f)ptr); +} +#else +static inline void hv_set_hypercall_pg(void *ptr) +{ + hv_hypercall_pg = ptr; +} EXPORT_SYMBOL_GPL(hv_hypercall_pg); +#endif union hv_ghcb * __percpu *hv_ghcb_pg; @@ -330,7 +367,7 @@ static int hv_suspend(void) * pointer is restored on resume. */ hv_hypercall_pg_saved = hv_hypercall_pg; - hv_hypercall_pg = NULL; + hv_set_hypercall_pg(NULL); /* Disable the hypercall page in the hypervisor */ rdmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); @@ -356,7 +393,7 @@ static void hv_resume(void) vmalloc_to_pfn(hv_hypercall_pg_saved); wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); - hv_hypercall_pg = hv_hypercall_pg_saved; + hv_set_hypercall_pg(hv_hypercall_pg_saved); hv_hypercall_pg_saved = NULL; /* @@ -476,8 +513,8 @@ void __init hyperv_init(void) if (hv_isolation_type_tdx() && !ms_hyperv.paravisor_present) goto skip_hypercall_pg_init; - hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, - VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX, + hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, MODULES_VADDR, + MODULES_END, GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, __builtin_return_address(0)); if (hv_hypercall_pg == NULL) @@ -515,27 +552,9 @@ void __init hyperv_init(void) wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); } -skip_hypercall_pg_init: - /* - * Some versions of Hyper-V that provide IBT in guest VMs have a bug - * in that there's no ENDBR64 instruction at the entry to the - * hypercall page. Because hypercalls are invoked via an indirect call - * to the hypercall page, all hypercall attempts fail when IBT is - * enabled, and Linux panics. For such buggy versions, disable IBT. - * - * Fixed versions of Hyper-V always provide ENDBR64 on the hypercall - * page, so if future Linux kernel versions enable IBT for 32-bit - * builds, additional hypercall page hackery will be required here - * to provide an ENDBR32. - */ -#ifdef CONFIG_X86_KERNEL_IBT - if (cpu_feature_enabled(X86_FEATURE_IBT) && - *(u32 *)hv_hypercall_pg != gen_endbr()) { - setup_clear_cpu_cap(X86_FEATURE_IBT); - pr_warn("Disabling IBT because of Hyper-V bug\n"); - } -#endif + hv_set_hypercall_pg(hv_hypercall_pg); +skip_hypercall_pg_init: /* * hyperv_init() is called before LAPIC is initialized: see * apic_intr_mode_init() -> x86_platform.apic_post_init() and diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index a4615b889f3e..651771534cae 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -385,9 +385,23 @@ int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu) return ret; } +u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2) +{ + u64 hv_status; + + register u64 __r8 asm("r8") = param2; + asm volatile("vmmcall" + : "=a" (hv_status), ASM_CALL_CONSTRAINT, + "+c" (control), "+d" (param1), "+r" (__r8) + : : "cc", "memory", "r9", "r10", "r11"); + + return hv_status; +} + #else static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} +u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2) { return U64_MAX; } #endif /* CONFIG_AMD_MEM_ENCRYPT */ #ifdef CONFIG_INTEL_TDX_GUEST @@ -437,6 +451,7 @@ u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) #else static inline void hv_tdx_msr_write(u64 msr, u64 value) {} static inline void hv_tdx_msr_read(u64 msr, u64 *value) {} +u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) { return U64_MAX; } #endif /* CONFIG_INTEL_TDX_GUEST */ #if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index 02bae8e0758b..4c305305871b 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -23,8 +23,7 @@ static inline bool __must_check rdrand_long(unsigned long *v) unsigned int retry = RDRAND_RETRY_LOOPS; do { asm volatile("rdrand %[out]" - CC_SET(c) - : CC_OUT(c) (ok), [out] "=r" (*v)); + : "=@ccc" (ok), [out] "=r" (*v)); if (ok) return true; } while (--retry); @@ -35,8 +34,7 @@ static inline bool __must_check rdseed_long(unsigned long *v) { bool ok; asm volatile("rdseed %[out]" - CC_SET(c) - : CC_OUT(c) (ok), [out] "=r" (*v)); + : "=@ccc" (ok), [out] "=r" (*v)); return ok; } diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index f963848024a5..d5c8d3afe196 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -122,18 +122,6 @@ static __always_inline __pure void *rip_rel_ptr(void *p) } #endif -/* - * Macros to generate condition code outputs from inline assembly, - * The output operand must be type "bool". - */ -#ifdef __GCC_ASM_FLAG_OUTPUTS__ -# define CC_SET(c) "\n\t/* output condition code " #c "*/\n" -# define CC_OUT(c) "=@cc" #c -#else -# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n" -# define CC_OUT(c) [_cc_ ## c] "=qm" -#endif - #ifdef __KERNEL__ # include <asm/extable_fixup_types.h> diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index a835f891164d..c2ce213f2b9b 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -99,8 +99,7 @@ static __always_inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, { bool negative; asm_inline volatile(LOCK_PREFIX "xorb %2,%1" - CC_SET(s) - : CC_OUT(s) (negative), WBYTE_ADDR(addr) + : "=@ccs" (negative), WBYTE_ADDR(addr) : "iq" ((char)mask) : "memory"); return negative; } @@ -149,8 +148,7 @@ arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr) bool oldbit; asm(__ASM_SIZE(bts) " %2,%1" - CC_SET(c) - : CC_OUT(c) (oldbit) + : "=@ccc" (oldbit) : ADDR, "Ir" (nr) : "memory"); return oldbit; } @@ -175,8 +173,7 @@ arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) bool oldbit; asm volatile(__ASM_SIZE(btr) " %2,%1" - CC_SET(c) - : CC_OUT(c) (oldbit) + : "=@ccc" (oldbit) : ADDR, "Ir" (nr) : "memory"); return oldbit; } @@ -187,8 +184,7 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) bool oldbit; asm volatile(__ASM_SIZE(btc) " %2,%1" - CC_SET(c) - : CC_OUT(c) (oldbit) + : "=@ccc" (oldbit) : ADDR, "Ir" (nr) : "memory"); return oldbit; @@ -211,8 +207,7 @@ static __always_inline bool constant_test_bit_acquire(long nr, const volatile un bool oldbit; asm volatile("testb %2,%1" - CC_SET(nz) - : CC_OUT(nz) (oldbit) + : "=@ccnz" (oldbit) : "m" (((unsigned char *)addr)[nr >> 3]), "i" (1 << (nr & 7)) :"memory"); @@ -225,8 +220,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l bool oldbit; asm volatile(__ASM_SIZE(bt) " %2,%1" - CC_SET(c) - : CC_OUT(c) (oldbit) + : "=@ccc" (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory"); return oldbit; diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 20fcb8507ad1..880ca15073ed 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -5,14 +5,19 @@ #include <linux/stringify.h> #include <linux/instrumentation.h> #include <linux/objtool.h> +#include <asm/asm.h> /* * Despite that some emulators terminate on UD2, we use it for WARN(). */ -#define ASM_UD2 ".byte 0x0f, 0x0b" +#define ASM_UD2 _ASM_BYTES(0x0f, 0x0b) #define INSN_UD2 0x0b0f #define LEN_UD2 2 +#define ASM_UDB _ASM_BYTES(0xd6) +#define INSN_UDB 0xd6 +#define LEN_UDB 1 + /* * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit. */ @@ -26,7 +31,7 @@ #define BUG_UD2 0xfffe #define BUG_UD1 0xfffd #define BUG_UD1_UBSAN 0xfffc -#define BUG_EA 0xffea +#define BUG_UDB 0xffd6 #define BUG_LOCK 0xfff0 #ifdef CONFIG_GENERIC_BUG diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h index 976b90a3d190..c40b9ebc1fb4 100644 --- a/arch/x86/include/asm/cfi.h +++ b/arch/x86/include/asm/cfi.h @@ -71,12 +71,10 @@ * * __cfi_foo: * endbr64 - * subl 0x12345678, %r10d - * jz foo - * ud2 - * nop + * subl 0x12345678, %eax + * jne.32,pn foo+3 * foo: - * osp nop3 # was endbr64 + * nopl -42(%rax) # was endbr64 * ... code here ... * ret * @@ -86,9 +84,9 @@ * indirect caller: * lea foo(%rip), %r11 * ... - * movl $0x12345678, %r10d - * subl $16, %r11 - * nop4 + * movl $0x12345678, %eax + * lea -0x10(%r11), %r11 + * nop5 * call *%r11 * */ diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index b61f32c3459f..a88b06f1c35e 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -166,8 +166,7 @@ extern void __add_wrong_size(void) { \ volatile u8 *__ptr = (volatile u8 *)(_ptr); \ asm_inline volatile(lock "cmpxchgb %[new], %[ptr]" \ - CC_SET(z) \ - : CC_OUT(z) (success), \ + : "=@ccz" (success), \ [ptr] "+m" (*__ptr), \ [old] "+a" (__old) \ : [new] "q" (__new) \ @@ -178,8 +177,7 @@ extern void __add_wrong_size(void) { \ volatile u16 *__ptr = (volatile u16 *)(_ptr); \ asm_inline volatile(lock "cmpxchgw %[new], %[ptr]" \ - CC_SET(z) \ - : CC_OUT(z) (success), \ + : "=@ccz" (success), \ [ptr] "+m" (*__ptr), \ [old] "+a" (__old) \ : [new] "r" (__new) \ @@ -190,8 +188,7 @@ extern void __add_wrong_size(void) { \ volatile u32 *__ptr = (volatile u32 *)(_ptr); \ asm_inline volatile(lock "cmpxchgl %[new], %[ptr]" \ - CC_SET(z) \ - : CC_OUT(z) (success), \ + : "=@ccz" (success), \ [ptr] "+m" (*__ptr), \ [old] "+a" (__old) \ : [new] "r" (__new) \ @@ -202,8 +199,7 @@ extern void __add_wrong_size(void) { \ volatile u64 *__ptr = (volatile u64 *)(_ptr); \ asm_inline volatile(lock "cmpxchgq %[new], %[ptr]" \ - CC_SET(z) \ - : CC_OUT(z) (success), \ + : "=@ccz" (success), \ [ptr] "+m" (*__ptr), \ [old] "+a" (__old) \ : [new] "r" (__new) \ diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 371f7906019e..1f80a62be969 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -46,8 +46,7 @@ static __always_inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new bool ret; \ \ asm_inline volatile(_lock "cmpxchg8b %[ptr]" \ - CC_SET(e) \ - : CC_OUT(e) (ret), \ + : "=@ccz" (ret), \ [ptr] "+m" (*(_ptr)), \ "+a" (o.low), "+d" (o.high) \ : "b" (n.low), "c" (n.high) \ @@ -125,8 +124,7 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64 ALTERNATIVE(_lock_loc \ "call cmpxchg8b_emu", \ _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \ - CC_SET(e) \ - : ALT_OUTPUT_SP(CC_OUT(e) (ret), \ + : ALT_OUTPUT_SP("=@ccz" (ret), \ "+a" (o.low), "+d" (o.high)) \ : "b" (n.low), "c" (n.high), \ [ptr] "S" (_ptr) \ diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 71d1e72ed879..5afea056fb20 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -66,8 +66,7 @@ static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old, bool ret; \ \ asm_inline volatile(_lock "cmpxchg16b %[ptr]" \ - CC_SET(e) \ - : CC_OUT(e) (ret), \ + : "=@ccz" (ret), \ [ptr] "+m" (*(_ptr)), \ "+a" (o.low), "+d" (o.high) \ : "b" (n.low), "c" (n.high) \ diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h index 28d845257303..5e45d6424722 100644 --- a/arch/x86/include/asm/ibt.h +++ b/arch/x86/include/asm/ibt.h @@ -59,10 +59,10 @@ static __always_inline __attribute_const__ u32 gen_endbr(void) static __always_inline __attribute_const__ u32 gen_endbr_poison(void) { /* - * 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it - * will be unique to (former) ENDBR sites. + * 4 byte NOP that isn't NOP4, such that it will be unique to (former) + * ENDBR sites. Additionally it carries UDB as immediate. */ - return 0x001f0f66; /* osp nopl (%rax) */ + return 0xd6401f0f; /* nopl -42(%rax) */ } static inline bool __is_endbr(u32 val) @@ -70,10 +70,6 @@ static inline bool __is_endbr(u32 val) if (val == gen_endbr_poison()) return true; - /* See cfi_fineibt_bhi_preamble() */ - if (IS_ENABLED(CONFIG_FINEIBT_BHI) && val == 0x001f0ff5) - return true; - val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */ return val == gen_endbr(); } diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index a4ec27c67988..abd637e54e94 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -460,17 +460,12 @@ __visible noinstr void func(struct pt_regs *regs, \ #endif void idt_install_sysvec(unsigned int n, const void *function); - -#ifdef CONFIG_X86_FRED void fred_install_sysvec(unsigned int vector, const idtentry_t function); -#else -static inline void fred_install_sysvec(unsigned int vector, const idtentry_t function) { } -#endif #define sysvec_install(vector, function) { \ - if (cpu_feature_enabled(X86_FEATURE_FRED)) \ + if (IS_ENABLED(CONFIG_X86_FRED)) \ fred_install_sysvec(vector, function); \ - else \ + if (!cpu_feature_enabled(X86_FEATURE_FRED)) \ idt_install_sysvec(vector, asm_##function); \ } diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index abc4659f5809..605abd02158d 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -6,6 +6,7 @@ #include <linux/nmi.h> #include <linux/msi.h> #include <linux/io.h> +#include <linux/static_call.h> #include <asm/nospec-branch.h> #include <asm/paravirt.h> #include <asm/msr.h> @@ -39,16 +40,21 @@ static inline unsigned char hv_get_nmi_reason(void) return 0; } -#if IS_ENABLED(CONFIG_HYPERV) -extern bool hyperv_paravisor_present; +extern u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2); +extern u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2); +extern u64 hv_std_hypercall(u64 control, u64 param1, u64 param2); +#if IS_ENABLED(CONFIG_HYPERV) extern void *hv_hypercall_pg; extern union hv_ghcb * __percpu *hv_ghcb_pg; bool hv_isolation_type_snp(void); bool hv_isolation_type_tdx(void); -u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2); + +#ifdef CONFIG_X86_64 +DECLARE_STATIC_CALL(hv_hypercall, hv_std_hypercall); +#endif /* * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA @@ -65,37 +71,15 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) { u64 input_address = input ? virt_to_phys(input) : 0; u64 output_address = output ? virt_to_phys(output) : 0; - u64 hv_status; #ifdef CONFIG_X86_64 - if (hv_isolation_type_tdx() && !hyperv_paravisor_present) - return hv_tdx_hypercall(control, input_address, output_address); - - if (hv_isolation_type_snp() && !hyperv_paravisor_present) { - __asm__ __volatile__("mov %[output_address], %%r8\n" - "vmmcall" - : "=a" (hv_status), ASM_CALL_CONSTRAINT, - "+c" (control), "+d" (input_address) - : [output_address] "r" (output_address) - : "cc", "memory", "r8", "r9", "r10", "r11"); - return hv_status; - } - - if (!hv_hypercall_pg) - return U64_MAX; - - __asm__ __volatile__("mov %[output_address], %%r8\n" - CALL_NOSPEC - : "=a" (hv_status), ASM_CALL_CONSTRAINT, - "+c" (control), "+d" (input_address) - : [output_address] "r" (output_address), - THUNK_TARGET(hv_hypercall_pg) - : "cc", "memory", "r8", "r9", "r10", "r11"); + return static_call_mod(hv_hypercall)(control, input_address, output_address); #else u32 input_address_hi = upper_32_bits(input_address); u32 input_address_lo = lower_32_bits(input_address); u32 output_address_hi = upper_32_bits(output_address); u32 output_address_lo = lower_32_bits(output_address); + u64 hv_status; if (!hv_hypercall_pg) return U64_MAX; @@ -108,48 +92,30 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) "D"(output_address_hi), "S"(output_address_lo), THUNK_TARGET(hv_hypercall_pg) : "cc", "memory"); -#endif /* !x86_64 */ return hv_status; +#endif /* !x86_64 */ } /* Fast hypercall with 8 bytes of input and no output */ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1) { - u64 hv_status; - #ifdef CONFIG_X86_64 - if (hv_isolation_type_tdx() && !hyperv_paravisor_present) - return hv_tdx_hypercall(control, input1, 0); - - if (hv_isolation_type_snp() && !hyperv_paravisor_present) { - __asm__ __volatile__( - "vmmcall" - : "=a" (hv_status), ASM_CALL_CONSTRAINT, - "+c" (control), "+d" (input1) - :: "cc", "r8", "r9", "r10", "r11"); - } else { - __asm__ __volatile__(CALL_NOSPEC - : "=a" (hv_status), ASM_CALL_CONSTRAINT, - "+c" (control), "+d" (input1) - : THUNK_TARGET(hv_hypercall_pg) - : "cc", "r8", "r9", "r10", "r11"); - } + return static_call_mod(hv_hypercall)(control, input1, 0); #else - { - u32 input1_hi = upper_32_bits(input1); - u32 input1_lo = lower_32_bits(input1); - - __asm__ __volatile__ (CALL_NOSPEC - : "=A"(hv_status), - "+c"(input1_lo), - ASM_CALL_CONSTRAINT - : "A" (control), - "b" (input1_hi), - THUNK_TARGET(hv_hypercall_pg) - : "cc", "edi", "esi"); - } -#endif + u32 input1_hi = upper_32_bits(input1); + u32 input1_lo = lower_32_bits(input1); + u64 hv_status; + + __asm__ __volatile__ (CALL_NOSPEC + : "=A"(hv_status), + "+c"(input1_lo), + ASM_CALL_CONSTRAINT + : "A" (control), + "b" (input1_hi), + THUNK_TARGET(hv_hypercall_pg) + : "cc", "edi", "esi"); return hv_status; +#endif } static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) @@ -162,45 +128,24 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) /* Fast hypercall with 16 bytes of input */ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2) { - u64 hv_status; - #ifdef CONFIG_X86_64 - if (hv_isolation_type_tdx() && !hyperv_paravisor_present) - return hv_tdx_hypercall(control, input1, input2); - - if (hv_isolation_type_snp() && !hyperv_paravisor_present) { - __asm__ __volatile__("mov %[input2], %%r8\n" - "vmmcall" - : "=a" (hv_status), ASM_CALL_CONSTRAINT, - "+c" (control), "+d" (input1) - : [input2] "r" (input2) - : "cc", "r8", "r9", "r10", "r11"); - } else { - __asm__ __volatile__("mov %[input2], %%r8\n" - CALL_NOSPEC - : "=a" (hv_status), ASM_CALL_CONSTRAINT, - "+c" (control), "+d" (input1) - : [input2] "r" (input2), - THUNK_TARGET(hv_hypercall_pg) - : "cc", "r8", "r9", "r10", "r11"); - } + return static_call_mod(hv_hypercall)(control, input1, input2); #else - { - u32 input1_hi = upper_32_bits(input1); - u32 input1_lo = lower_32_bits(input1); - u32 input2_hi = upper_32_bits(input2); - u32 input2_lo = lower_32_bits(input2); - - __asm__ __volatile__ (CALL_NOSPEC - : "=A"(hv_status), - "+c"(input1_lo), ASM_CALL_CONSTRAINT - : "A" (control), "b" (input1_hi), - "D"(input2_hi), "S"(input2_lo), - THUNK_TARGET(hv_hypercall_pg) - : "cc"); - } -#endif + u32 input1_hi = upper_32_bits(input1); + u32 input1_lo = lower_32_bits(input1); + u32 input2_hi = upper_32_bits(input2); + u32 input2_lo = lower_32_bits(input2); + u64 hv_status; + + __asm__ __volatile__ (CALL_NOSPEC + : "=A"(hv_status), + "+c"(input1_lo), ASM_CALL_CONSTRAINT + : "A" (control), "b" (input1_hi), + "D"(input2_hi), "S"(input2_lo), + THUNK_TARGET(hv_hypercall_pg) + : "cc"); return hv_status; +#endif } static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index c69e269937c5..76b95bd1a405 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -1,21 +1,8 @@ +/* SPDX-License-Identifier: LGPL-2.0+ */ /* Generic MTRR (Memory Type Range Register) ioctls. Copyright (C) 1997-1999 Richard Gooch - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with this library; if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - Richard Gooch may be reached by email at rgooch@atnf.csiro.au The postal address is: Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 6ca6516c7492..e4815e15dc9a 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -36,9 +36,7 @@ static __always_inline void __monitor(const void *eax, u32 ecx, u32 edx) static __always_inline void __monitorx(const void *eax, u32 ecx, u32 edx) { - /* "monitorx %eax, %ecx, %edx" */ - asm volatile(".byte 0x0f, 0x01, 0xfa" - :: "a" (eax), "c" (ecx), "d"(edx)); + asm volatile("monitorx" :: "a" (eax), "c" (ecx), "d"(edx)); } static __always_inline void __mwait(u32 eax, u32 ecx) @@ -80,9 +78,7 @@ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx) { /* No need for TSA buffer clearing on AMD */ - /* "mwaitx %eax, %ebx, %ecx" */ - asm volatile(".byte 0x0f, 0x01, 0xfb" - :: "a" (eax), "b" (ebx), "c" (ecx)); + asm volatile("mwaitx" :: "a" (eax), "b" (ebx), "c" (ecx)); } /* diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index b0d03b6c279b..332428caaed2 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -309,8 +309,7 @@ do { \ \ asm qual (__pcpu_op_##size("cmpxchg") "%[nval], " \ __percpu_arg([var]) \ - CC_SET(z) \ - : CC_OUT(z) (success), \ + : "=@ccz" (success), \ [oval] "+a" (pco_old__), \ [var] "+m" (__my_cpu_var(_var)) \ : [nval] __pcpu_reg_##size(, pco_new__) \ @@ -367,8 +366,7 @@ do { \ asm_inline qual ( \ ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \ "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \ - CC_SET(z) \ - : ALT_OUTPUT_SP(CC_OUT(z) (success), \ + : ALT_OUTPUT_SP("=@ccz" (success), \ [var] "+m" (__my_cpu_var(_var)), \ "+a" (old__.low), "+d" (old__.high)) \ : "b" (new__.low), "c" (new__.high), \ @@ -436,8 +434,7 @@ do { \ asm_inline qual ( \ ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \ "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \ - CC_SET(z) \ - : ALT_OUTPUT_SP(CC_OUT(z) (success), \ + : ALT_OUTPUT_SP("=@ccz" (success), \ [var] "+m" (__my_cpu_var(_var)), \ "+a" (old__.low), "+d" (old__.high)) \ : "b" (new__.low), "c" (new__.high), \ @@ -585,8 +582,7 @@ do { \ bool oldbit; \ \ asm volatile("btl %[nr], " __percpu_arg([var]) \ - CC_SET(c) \ - : CC_OUT(c) (oldbit) \ + : "=@ccc" (oldbit) \ : [var] "m" (__my_cpu_var(_var)), \ [nr] "rI" (_nr)); \ oldbit; \ diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 3821ee3fae35..54c8fc430684 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -6,37 +6,15 @@ #define __CLOBBERS_MEM(clb...) "memory", ## clb -#ifndef __GCC_ASM_FLAG_OUTPUTS__ - -/* Use asm goto */ - -#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \ -({ \ - bool c = false; \ - asm goto (fullop "; j" #cc " %l[cc_label]" \ - : : [var] "m" (_var), ## __VA_ARGS__ \ - : clobbers : cc_label); \ - if (0) { \ -cc_label: c = true; \ - } \ - c; \ -}) - -#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) */ - -/* Use flags output or a set instruction */ - #define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \ ({ \ bool c; \ - asm_inline volatile (fullop CC_SET(cc) \ - : [var] "+m" (_var), CC_OUT(cc) (c) \ + asm_inline volatile (fullop \ + : [var] "+m" (_var), "=@cc" #cc (c) \ : __VA_ARGS__ : clobbers); \ c; \ }) -#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) */ - #define GEN_UNARY_RMWcc_4(op, var, cc, arg0) \ __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM()) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index f9046c4b9a2b..0e6c0940100f 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -491,8 +491,7 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) /* "pvalidate" mnemonic support in binutils 2.36 and newer */ asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFF\n\t" - CC_SET(c) - : CC_OUT(c) (no_rmpupdate), "=a"(rc) + : "=@ccc"(no_rmpupdate), "=a"(rc) : "a"(vaddr), "c"(rmp_psize), "d"(validate) : "memory", "cc"); diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index c72d46175374..5c03aaa89014 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -83,8 +83,7 @@ static inline int __const_sigismember(sigset_t *set, int _sig) static inline int __gen_sigismember(sigset_t *set, int _sig) { bool ret; - asm("btl %2,%1" CC_SET(c) - : CC_OUT(c) (ret) : "m"(*set), "Ir"(_sig-1)); + asm("btl %2,%1" : "=@ccc"(ret) : "m"(*set), "Ir"(_sig-1)); return ret; } diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index fde2bd7af19e..46aa2c9c1bda 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -75,9 +75,7 @@ static inline u32 rdpkru(void) * "rdpkru" instruction. Places PKRU contents in to EAX, * clears EDX and requires that ecx=0. */ - asm volatile(".byte 0x0f,0x01,0xee\n\t" - : "=a" (pkru), "=d" (edx) - : "c" (ecx)); + asm volatile("rdpkru" : "=a" (pkru), "=d" (edx) : "c" (ecx)); return pkru; } @@ -89,8 +87,7 @@ static inline void wrpkru(u32 pkru) * "wrpkru" instruction. Loads contents in EAX to PKRU, * requires that ecx = edx = 0. */ - asm volatile(".byte 0x0f,0x01,0xef\n\t" - : : "a" (pkru), "c"(ecx), "d"(edx)); + asm volatile("wrpkru" : : "a" (pkru), "c"(ecx), "d"(edx)); } #else @@ -287,8 +284,7 @@ static inline int enqcmds(void __iomem *dst, const void *src) * See movdir64b()'s comment on operand specification. */ asm volatile(".byte 0xf3, 0x0f, 0x38, 0xf8, 0x02, 0x66, 0x90" - CC_SET(z) - : CC_OUT(z) (zf), "+m" (*__dst) + : "=@ccz" (zf), "+m" (*__dst) : "m" (*__src), "a" (__dst), "d" (__src)); /* Submission failure is indicated via EFLAGS.ZF=1 */ diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 5337f1be18f6..f2d142a0a862 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -178,9 +178,9 @@ void int3_emulate_ret(struct pt_regs *regs) } static __always_inline -void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp) +bool __emulate_cc(unsigned long flags, u8 cc) { - static const unsigned long jcc_mask[6] = { + static const unsigned long cc_mask[6] = { [0] = X86_EFLAGS_OF, [1] = X86_EFLAGS_CF, [2] = X86_EFLAGS_ZF, @@ -193,15 +193,21 @@ void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned lo bool match; if (cc < 0xc) { - match = regs->flags & jcc_mask[cc >> 1]; + match = flags & cc_mask[cc >> 1]; } else { - match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ - ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); + match = ((flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ + ((flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); if (cc >= 0xe) - match = match || (regs->flags & X86_EFLAGS_ZF); + match = match || (flags & X86_EFLAGS_ZF); } - if ((match && !invert) || (!match && invert)) + return (match && !invert) || (!match && invert); +} + +static __always_inline +void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp) +{ + if (__emulate_cc(regs->flags, cc)) ip += disp; int3_emulate_jmp(regs, ip); diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 3a7755c1a441..91a3fb8ae7ff 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -378,7 +378,7 @@ do { \ asm_goto_output("\n" \ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ _ASM_EXTABLE_UA(1b, %l[label]) \ - : CC_OUT(z) (success), \ + : "=@ccz" (success), \ [ptr] "+m" (*_ptr), \ [old] "+a" (__old) \ : [new] ltype (__new) \ @@ -397,7 +397,7 @@ do { \ asm_goto_output("\n" \ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ _ASM_EXTABLE_UA(1b, %l[label]) \ - : CC_OUT(z) (success), \ + : "=@ccz" (success), \ "+A" (__old), \ [ptr] "+m" (*_ptr) \ : "b" ((u32)__new), \ @@ -417,11 +417,10 @@ do { \ __typeof__(*(_ptr)) __new = (_new); \ asm volatile("\n" \ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ - CC_SET(z) \ "2:\n" \ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \ %[errout]) \ - : CC_OUT(z) (success), \ + : "=@ccz" (success), \ [errout] "+r" (__err), \ [ptr] "+m" (*_ptr), \ [old] "+a" (__old) \ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 79ae9cb50019..8ee5ff547357 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -147,10 +147,10 @@ static void *its_init_thunk(void *thunk, int reg) /* * When ITS uses indirect branch thunk the fineibt_paranoid * caller sequence doesn't fit in the caller site. So put the - * remaining part of the sequence (<ea> + JNE) into the ITS + * remaining part of the sequence (UDB + JNE) into the ITS * thunk. */ - bytes[i++] = 0xea; /* invalid instruction */ + bytes[i++] = 0xd6; /* UDB */ bytes[i++] = 0x75; /* JNE */ bytes[i++] = 0xfd; @@ -163,7 +163,7 @@ static void *its_init_thunk(void *thunk, int reg) reg -= 8; } bytes[i++] = 0xff; - bytes[i++] = 0xe0 + reg; /* jmp *reg */ + bytes[i++] = 0xe0 + reg; /* JMP *reg */ bytes[i++] = 0xcc; return thunk + offset; @@ -713,20 +713,33 @@ static inline bool is_jcc32(struct insn *insn) #if defined(CONFIG_MITIGATION_RETPOLINE) && defined(CONFIG_OBJTOOL) /* - * CALL/JMP *%\reg + * [CS]{,3} CALL/JMP *%\reg [INT3]* */ -static int emit_indirect(int op, int reg, u8 *bytes) +static int emit_indirect(int op, int reg, u8 *bytes, int len) { + int cs = 0, bp = 0; int i = 0; u8 modrm; + /* + * Set @len to the excess bytes after writing the instruction. + */ + len -= 2 + (reg >= 8); + WARN_ON_ONCE(len < 0); + switch (op) { case CALL_INSN_OPCODE: modrm = 0x10; /* Reg = 2; CALL r/m */ + /* + * Additional NOP is better than prefix decode penalty. + */ + if (len <= 3) + cs = len; break; case JMP32_INSN_OPCODE: modrm = 0x20; /* Reg = 4; JMP r/m */ + bp = len; break; default: @@ -734,6 +747,9 @@ static int emit_indirect(int op, int reg, u8 *bytes) return -1; } + while (cs--) + bytes[i++] = 0x2e; /* CS-prefix */ + if (reg >= 8) { bytes[i++] = 0x41; /* REX.B prefix */ reg -= 8; @@ -745,6 +761,9 @@ static int emit_indirect(int op, int reg, u8 *bytes) bytes[i++] = 0xff; /* opcode */ bytes[i++] = modrm; + while (bp--) + bytes[i++] = 0xcc; /* INT3 */ + return i; } @@ -918,20 +937,11 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) return emit_its_trampoline(addr, insn, reg, bytes); #endif - ret = emit_indirect(op, reg, bytes + i); + ret = emit_indirect(op, reg, bytes + i, insn->length - i); if (ret < 0) return ret; i += ret; - /* - * The compiler is supposed to EMIT an INT3 after every unconditional - * JMP instruction due to AMD BTC. However, if the compiler is too old - * or MITIGATION_SLS isn't enabled, we still need an INT3 after - * indirect JMPs even on Intel. - */ - if (op == JMP32_INSN_OPCODE && i < insn->length) - bytes[i++] = INT3_INSN_OPCODE; - for (; i < insn->length;) bytes[i++] = BYTES_NOP1; @@ -970,7 +980,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) case JMP32_INSN_OPCODE: /* Check for cfi_paranoid + ITS */ dest = addr + insn.length + insn.immediate.value; - if (dest[-1] == 0xea && (dest[0] & 0xf0) == 0x70) { + if (dest[-1] == 0xd6 && (dest[0] & 0xf0) == 0x70) { WARN_ON_ONCE(cfi_mode != CFI_FINEIBT); continue; } @@ -1177,6 +1187,7 @@ void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { } #endif enum cfi_mode cfi_mode __ro_after_init = __CFI_DEFAULT; +static bool cfi_debug __ro_after_init; #ifdef CONFIG_FINEIBT_BHI bool cfi_bhi __ro_after_init = false; @@ -1259,6 +1270,8 @@ static __init int cfi_parse_cmdline(char *str) } else if (!strcmp(str, "off")) { cfi_mode = CFI_OFF; cfi_rand = false; + } else if (!strcmp(str, "debug")) { + cfi_debug = true; } else if (!strcmp(str, "kcfi")) { cfi_mode = CFI_KCFI; } else if (!strcmp(str, "fineibt")) { @@ -1266,26 +1279,26 @@ static __init int cfi_parse_cmdline(char *str) } else if (!strcmp(str, "norand")) { cfi_rand = false; } else if (!strcmp(str, "warn")) { - pr_alert("CFI mismatch non-fatal!\n"); + pr_alert("CFI: mismatch non-fatal!\n"); cfi_warn = true; } else if (!strcmp(str, "paranoid")) { if (cfi_mode == CFI_FINEIBT) { cfi_paranoid = true; } else { - pr_err("Ignoring paranoid; depends on fineibt.\n"); + pr_err("CFI: ignoring paranoid; depends on fineibt.\n"); } } else if (!strcmp(str, "bhi")) { #ifdef CONFIG_FINEIBT_BHI if (cfi_mode == CFI_FINEIBT) { cfi_bhi = true; } else { - pr_err("Ignoring bhi; depends on fineibt.\n"); + pr_err("CFI: ignoring bhi; depends on fineibt.\n"); } #else - pr_err("Ignoring bhi; depends on FINEIBT_BHI=y.\n"); + pr_err("CFI: ignoring bhi; depends on FINEIBT_BHI=y.\n"); #endif } else { - pr_err("Ignoring unknown cfi option (%s).", str); + pr_err("CFI: Ignoring unknown option (%s).", str); } str = next; @@ -1300,9 +1313,9 @@ early_param("cfi", cfi_parse_cmdline); * * __cfi_\func: __cfi_\func: * movl $0x12345678,%eax // 5 endbr64 // 4 - * nop subl $0x12345678,%r10d // 7 - * nop jne __cfi_\func+6 // 2 - * nop nop3 // 3 + * nop subl $0x12345678,%eax // 5 + * nop jne.d32,pn \func+3 // 7 + * nop * nop * nop * nop @@ -1311,34 +1324,44 @@ early_param("cfi", cfi_parse_cmdline); * nop * nop * nop + * \func: \func: + * endbr64 nopl -42(%rax) * * * caller: caller: - * movl $(-0x12345678),%r10d // 6 movl $0x12345678,%r10d // 6 + * movl $(-0x12345678),%r10d // 6 movl $0x12345678,%eax // 5 * addl $-15(%r11),%r10d // 4 lea -0x10(%r11),%r11 // 4 - * je 1f // 2 nop4 // 4 + * je 1f // 2 nop5 // 5 * ud2 // 2 * 1: cs call __x86_indirect_thunk_r11 // 6 call *%r11; nop3; // 6 * + * + * Notably, the FineIBT sequences are crafted such that branches are presumed + * non-taken. This is based on Agner Fog's optimization manual, which states: + * + * "Make conditional jumps most often not taken: The efficiency and throughput + * for not-taken branches is better than for taken branches on most + * processors. Therefore, it is good to place the most frequent branch first" */ /* * <fineibt_preamble_start>: * 0: f3 0f 1e fa endbr64 - * 4: 41 81 <ea> 78 56 34 12 sub $0x12345678, %r10d - * b: 75 f9 jne 6 <fineibt_preamble_start+0x6> - * d: 0f 1f 00 nopl (%rax) + * 4: 2d 78 56 34 12 sub $0x12345678, %eax + * 9: 2e 0f 85 03 00 00 00 jne,pn 13 <fineibt_preamble_start+0x13> + * 10: 0f 1f 40 d6 nopl -0x2a(%rax) * - * Note that the JNE target is the 0xEA byte inside the SUB, this decodes as - * (bad) on x86_64 and raises #UD. + * Note that the JNE target is the 0xD6 byte inside the NOPL, this decodes as + * UDB on x86_64 and raises #UD. */ asm( ".pushsection .rodata \n" "fineibt_preamble_start: \n" " endbr64 \n" - " subl $0x12345678, %r10d \n" + " subl $0x12345678, %eax \n" "fineibt_preamble_bhi: \n" - " jne fineibt_preamble_start+6 \n" - ASM_NOP3 + " cs jne.d32 fineibt_preamble_start+0x13 \n" + "#fineibt_func: \n" + " nopl -42(%rax) \n" "fineibt_preamble_end: \n" ".popsection\n" ); @@ -1349,20 +1372,20 @@ extern u8 fineibt_preamble_end[]; #define fineibt_preamble_size (fineibt_preamble_end - fineibt_preamble_start) #define fineibt_preamble_bhi (fineibt_preamble_bhi - fineibt_preamble_start) -#define fineibt_preamble_ud 6 -#define fineibt_preamble_hash 7 +#define fineibt_preamble_ud 0x13 +#define fineibt_preamble_hash 5 /* * <fineibt_caller_start>: - * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d - * 6: 4d 8d 5b f0 lea -0x10(%r11), %r11 - * a: 0f 1f 40 00 nopl 0x0(%rax) + * 0: b8 78 56 34 12 mov $0x12345678, %eax + * 5: 4d 8d 5b f0 lea -0x10(%r11), %r11 + * 9: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) */ asm( ".pushsection .rodata \n" "fineibt_caller_start: \n" - " movl $0x12345678, %r10d \n" + " movl $0x12345678, %eax \n" " lea -0x10(%r11), %r11 \n" - ASM_NOP4 + ASM_NOP5 "fineibt_caller_end: \n" ".popsection \n" ); @@ -1371,7 +1394,7 @@ extern u8 fineibt_caller_start[]; extern u8 fineibt_caller_end[]; #define fineibt_caller_size (fineibt_caller_end - fineibt_caller_start) -#define fineibt_caller_hash 2 +#define fineibt_caller_hash 1 #define fineibt_caller_jmp (fineibt_caller_size - 2) @@ -1388,9 +1411,9 @@ extern u8 fineibt_caller_end[]; * of adding a load. * * <fineibt_paranoid_start>: - * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d - * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d - * a: 4d 8d 5b <f0> lea -0x10(%r11), %r11 + * 0: b8 78 56 34 12 mov $0x12345678, %eax + * 5: 41 3b 43 f5 cmp -0x11(%r11), %eax + * 9: 2e 4d 8d 5b <f0> cs lea -0x10(%r11), %r11 * e: 75 fd jne d <fineibt_paranoid_start+0xd> * 10: 41 ff d3 call *%r11 * 13: 90 nop @@ -1402,13 +1425,13 @@ extern u8 fineibt_caller_end[]; */ asm( ".pushsection .rodata \n" "fineibt_paranoid_start: \n" - " movl $0x12345678, %r10d \n" - " cmpl -9(%r11), %r10d \n" - " lea -0x10(%r11), %r11 \n" + " mov $0x12345678, %eax \n" + " cmpl -11(%r11), %eax \n" + " cs lea -0x10(%r11), %r11 \n" + "#fineibt_caller_size: \n" " jne fineibt_paranoid_start+0xd \n" "fineibt_paranoid_ind: \n" - " call *%r11 \n" - " nop \n" + " cs call *%r11 \n" "fineibt_paranoid_end: \n" ".popsection \n" ); @@ -1520,51 +1543,67 @@ static int cfi_rand_preamble(s32 *start, s32 *end) return 0; } +/* + * Inline the bhi-arity 1 case: + * + * __cfi_foo: + * 0: f3 0f 1e fa endbr64 + * 4: 2d 78 56 34 12 sub $0x12345678, %eax + * 9: 49 0f 45 fa cmovne %rax, %rdi + * d: 2e 75 03 jne,pn foo+0x3 + * + * foo: + * 10: 0f 1f 40 <d6> nopl -42(%rax) + * + * Notably, this scheme is incompatible with permissive CFI + * because the CMOVcc is unconditional and RDI will have been + * clobbered. + */ +asm( ".pushsection .rodata \n" + "fineibt_bhi1_start: \n" + " cmovne %rax, %rdi \n" + " cs jne fineibt_bhi1_func + 0x3 \n" + "fineibt_bhi1_func: \n" + " nopl -42(%rax) \n" + "fineibt_bhi1_end: \n" + ".popsection \n" +); + +extern u8 fineibt_bhi1_start[]; +extern u8 fineibt_bhi1_end[]; + +#define fineibt_bhi1_size (fineibt_bhi1_end - fineibt_bhi1_start) + static void cfi_fineibt_bhi_preamble(void *addr, int arity) { + u8 bytes[MAX_INSN_SIZE]; + if (!arity) return; if (!cfi_warn && arity == 1) { - /* - * Crazy scheme to allow arity-1 inline: - * - * __cfi_foo: - * 0: f3 0f 1e fa endbr64 - * 4: 41 81 <ea> 78 56 34 12 sub 0x12345678, %r10d - * b: 49 0f 45 fa cmovne %r10, %rdi - * f: 75 f5 jne __cfi_foo+6 - * 11: 0f 1f 00 nopl (%rax) - * - * Code that direct calls to foo()+0, decodes the tail end as: - * - * foo: - * 0: f5 cmc - * 1: 0f 1f 00 nopl (%rax) - * - * which clobbers CF, but does not affect anything ABI - * wise. - * - * Notably, this scheme is incompatible with permissive CFI - * because the CMOVcc is unconditional and RDI will have been - * clobbered. - */ - const u8 magic[9] = { - 0x49, 0x0f, 0x45, 0xfa, - 0x75, 0xf5, - BYTES_NOP3, - }; - - text_poke_early(addr + fineibt_preamble_bhi, magic, 9); - + text_poke_early(addr + fineibt_preamble_bhi, + fineibt_bhi1_start, fineibt_bhi1_size); return; } - text_poke_early(addr + fineibt_preamble_bhi, - text_gen_insn(CALL_INSN_OPCODE, - addr + fineibt_preamble_bhi, - __bhi_args[arity]), - CALL_INSN_SIZE); + /* + * Replace the bytes at fineibt_preamble_bhi with a CALL instruction + * that lines up exactly with the end of the preamble, such that the + * return address will be foo+0. + * + * __cfi_foo: + * 0: f3 0f 1e fa endbr64 + * 4: 2d 78 56 34 12 sub $0x12345678, %eax + * 9: 2e 2e e8 DD DD DD DD cs cs call __bhi_args[arity] + */ + bytes[0] = 0x2e; + bytes[1] = 0x2e; + __text_gen_insn(bytes + 2, CALL_INSN_OPCODE, + addr + fineibt_preamble_bhi + 2, + __bhi_args[arity], CALL_INSN_SIZE); + + text_poke_early(addr + fineibt_preamble_bhi, bytes, 7); } static int cfi_rewrite_preamble(s32 *start, s32 *end) @@ -1655,8 +1694,6 @@ static int cfi_rewrite_callers(s32 *start, s32 *end) { s32 *s; - BUG_ON(fineibt_paranoid_size != 20); - for (s = start; s < end; s++) { void *addr = (void *)s + *s; struct insn insn; @@ -1696,8 +1733,9 @@ static int cfi_rewrite_callers(s32 *start, s32 *end) emit_paranoid_trampoline(addr + fineibt_caller_size, &insn, 11, bytes + fineibt_caller_size); } else { - ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind); - if (WARN_ON_ONCE(ret != 3)) + int len = fineibt_paranoid_size - fineibt_paranoid_ind; + ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind, len); + if (WARN_ON_ONCE(ret != len)) continue; } @@ -1707,13 +1745,20 @@ static int cfi_rewrite_callers(s32 *start, s32 *end) return 0; } +#define pr_cfi_debug(X...) if (cfi_debug) pr_info(X) + +#define FINEIBT_WARN(_f, _v) \ + WARN_ONCE((_f) != (_v), "FineIBT: " #_f " %ld != %d\n", _f, _v) + static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, s32 *start_cfi, s32 *end_cfi, bool builtin) { int ret; - if (WARN_ONCE(fineibt_preamble_size != 16, - "FineIBT preamble wrong size: %ld", fineibt_preamble_size)) + if (FINEIBT_WARN(fineibt_preamble_size, 20) || + FINEIBT_WARN(fineibt_preamble_bhi + fineibt_bhi1_size, 20) || + FINEIBT_WARN(fineibt_caller_size, 14) || + FINEIBT_WARN(fineibt_paranoid_size, 20)) return; if (cfi_mode == CFI_AUTO) { @@ -1734,6 +1779,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, * rewrite them. This disables all CFI. If this succeeds but any of the * later stages fails, we're without CFI. */ + pr_cfi_debug("CFI: disabling all indirect call checking\n"); ret = cfi_disable_callers(start_retpoline, end_retpoline); if (ret) goto err; @@ -1744,43 +1790,53 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, cfi_bpf_hash = cfi_rehash(cfi_bpf_hash); cfi_bpf_subprog_hash = cfi_rehash(cfi_bpf_subprog_hash); } + pr_cfi_debug("CFI: cfi_seed: 0x%08x\n", cfi_seed); + pr_cfi_debug("CFI: rehashing all preambles\n"); ret = cfi_rand_preamble(start_cfi, end_cfi); if (ret) goto err; + pr_cfi_debug("CFI: rehashing all indirect calls\n"); ret = cfi_rand_callers(start_retpoline, end_retpoline); if (ret) goto err; + } else { + pr_cfi_debug("CFI: rehashing disabled\n"); } switch (cfi_mode) { case CFI_OFF: if (builtin) - pr_info("Disabling CFI\n"); + pr_info("CFI: disabled\n"); return; case CFI_KCFI: + pr_cfi_debug("CFI: re-enabling all indirect call checking\n"); ret = cfi_enable_callers(start_retpoline, end_retpoline); if (ret) goto err; if (builtin) - pr_info("Using kCFI\n"); + pr_info("CFI: Using %sretpoline kCFI\n", + cfi_rand ? "rehashed " : ""); return; case CFI_FINEIBT: + pr_cfi_debug("CFI: adding FineIBT to all preambles\n"); /* place the FineIBT preamble at func()-16 */ ret = cfi_rewrite_preamble(start_cfi, end_cfi); if (ret) goto err; /* rewrite the callers to target func()-16 */ + pr_cfi_debug("CFI: rewriting indirect call sites to use FineIBT\n"); ret = cfi_rewrite_callers(start_retpoline, end_retpoline); if (ret) goto err; /* now that nobody targets func()+0, remove ENDBR there */ + pr_cfi_debug("CFI: removing old endbr insns\n"); cfi_rewrite_endbr(start_cfi, end_cfi); if (builtin) { @@ -1823,11 +1879,11 @@ static void poison_cfi(void *addr) /* * __cfi_\func: - * osp nopl (%rax) - * subl $0, %r10d - * jz 1f - * ud2 - * 1: nop + * nopl -42(%rax) + * sub $0, %eax + * jne \func+3 + * \func: + * nopl -42(%rax) */ poison_endbr(addr); poison_hash(addr + fineibt_preamble_hash); @@ -1853,12 +1909,14 @@ static void poison_cfi(void *addr) } } +#define fineibt_prefix_size (fineibt_preamble_size - ENDBR_INSN_SIZE) + /* - * When regs->ip points to a 0xEA byte in the FineIBT preamble, + * When regs->ip points to a 0xD6 byte in the FineIBT preamble, * return true and fill out target and type. * * We check the preamble by checking for the ENDBR instruction relative to the - * 0xEA instruction. + * UDB instruction. */ static bool decode_fineibt_preamble(struct pt_regs *regs, unsigned long *target, u32 *type) { @@ -1868,10 +1926,10 @@ static bool decode_fineibt_preamble(struct pt_regs *regs, unsigned long *target, if (!exact_endbr((void *)addr)) return false; - *target = addr + fineibt_preamble_size; + *target = addr + fineibt_prefix_size; __get_kernel_nofault(&hash, addr + fineibt_preamble_hash, u32, Efault); - *type = (u32)regs->r10 + hash; + *type = (u32)regs->ax + hash; /* * Since regs->ip points to the middle of an instruction; it cannot @@ -1909,12 +1967,12 @@ static bool decode_fineibt_bhi(struct pt_regs *regs, unsigned long *target, u32 __get_kernel_nofault(&addr, regs->sp, unsigned long, Efault); *target = addr; - addr -= fineibt_preamble_size; + addr -= fineibt_prefix_size; if (!exact_endbr((void *)addr)) return false; __get_kernel_nofault(&hash, addr + fineibt_preamble_hash, u32, Efault); - *type = (u32)regs->r10 + hash; + *type = (u32)regs->ax + hash; /* * The UD2 sites are constructed with a RET immediately following, @@ -1931,7 +1989,7 @@ static bool is_paranoid_thunk(unsigned long addr) u32 thunk; __get_kernel_nofault(&thunk, (u32 *)addr, u32, Efault); - return (thunk & 0x00FFFFFF) == 0xfd75ea; + return (thunk & 0x00FFFFFF) == 0xfd75d6; Efault: return false; @@ -1939,8 +1997,7 @@ Efault: /* * regs->ip points to a LOCK Jcc.d8 instruction from the fineibt_paranoid_start[] - * sequence, or to an invalid instruction (0xea) + Jcc.d8 for cfi_paranoid + ITS - * thunk. + * sequence, or to UDB + Jcc.d8 for cfi_paranoid + ITS thunk. */ static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, u32 *type) { @@ -1950,8 +2007,8 @@ static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, return false; if (is_cfi_trap(addr + fineibt_caller_size - LEN_UD2)) { - *target = regs->r11 + fineibt_preamble_size; - *type = regs->r10; + *target = regs->r11 + fineibt_prefix_size; + *type = regs->ax; /* * Since the trapping instruction is the exact, but LOCK prefixed, @@ -1963,14 +2020,14 @@ static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, /* * The cfi_paranoid + ITS thunk combination results in: * - * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d - * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d - * a: 4d 8d 5b f0 lea -0x10(%r11), %r11 + * 0: b8 78 56 34 12 mov $0x12345678, %eax + * 5: 41 3b 43 f7 cmp -11(%r11), %eax + * a: 2e 3d 8d 5b f0 cs lea -0x10(%r11), %r11 * e: 2e e8 XX XX XX XX cs call __x86_indirect_paranoid_thunk_r11 * * Where the paranoid_thunk looks like: * - * 1d: <ea> (bad) + * 1d: <d6> udb * __x86_indirect_paranoid_thunk_r11: * 1e: 75 fd jne 1d * __x86_indirect_its_thunk_r11: @@ -1979,8 +2036,8 @@ static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, * */ if (is_paranoid_thunk(regs->ip)) { - *target = regs->r11 + fineibt_preamble_size; - *type = regs->r10; + *target = regs->r11 + fineibt_prefix_size; + *type = regs->ax; regs->ip = *target; return true; @@ -2005,6 +2062,8 @@ bool decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type) static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, s32 *start_cfi, s32 *end_cfi, bool builtin) { + if (IS_ENABLED(CONFIG_CFI) && builtin) + pr_info("CFI: Using standard kCFI\n"); } #ifdef CONFIG_X86_KERNEL_IBT @@ -2321,6 +2380,7 @@ void __init alternative_instructions(void) __apply_fineibt(__retpoline_sites, __retpoline_sites_end, __cfi_sites, __cfi_sites_end, true); + cfi_debug = false; /* * Rewrite the retpolines, must be done before alternatives since diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 6259b474073b..32ba599a51f8 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -102,6 +102,7 @@ static void __used common(void) BLANK(); DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); + OFFSET(C_PTREGS_SIZE, pt_regs, orig_ax); /* TLB state for the entry code */ OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask); diff --git a/arch/x86/kernel/cfi.c b/arch/x86/kernel/cfi.c index 77086cf565ec..638eb5c933e0 100644 --- a/arch/x86/kernel/cfi.c +++ b/arch/x86/kernel/cfi.c @@ -27,7 +27,7 @@ static bool decode_cfi_insn(struct pt_regs *regs, unsigned long *target, * for indirect call checks: * * movl -<id>, %r10d ; 6 bytes - * addl -4(%reg), %r10d ; 4 bytes + * addl -<pos>(%reg), %r10d; 4 bytes * je .Ltmp1 ; 2 bytes * ud2 ; <- regs->ip * .Ltmp1: diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 25773af116bc..c4febdbcfe4d 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -38,10 +38,6 @@ bool hv_nested; struct ms_hyperv_info ms_hyperv; -/* Used in modules via hv_do_hypercall(): see arch/x86/include/asm/mshyperv.h */ -bool hyperv_paravisor_present __ro_after_init; -EXPORT_SYMBOL_GPL(hyperv_paravisor_present); - #if IS_ENABLED(CONFIG_HYPERV) static inline unsigned int hv_get_nested_msr(unsigned int reg) { @@ -288,8 +284,18 @@ static void __init x86_setup_ops_for_tsc_pg_clock(void) old_restore_sched_clock_state = x86_platform.restore_sched_clock_state; x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state; } + +#ifdef CONFIG_X86_64 +DEFINE_STATIC_CALL(hv_hypercall, hv_std_hypercall); +EXPORT_STATIC_CALL_TRAMP_GPL(hv_hypercall); +#define hypercall_update(hc) static_call_update(hv_hypercall, hc) +#endif #endif /* CONFIG_HYPERV */ +#ifndef hypercall_update +#define hypercall_update(hc) (void)hc +#endif + static uint32_t __init ms_hyperv_platform(void) { u32 eax; @@ -484,14 +490,14 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.shared_gpa_boundary = BIT_ULL(ms_hyperv.shared_gpa_boundary_bits); - hyperv_paravisor_present = !!ms_hyperv.paravisor_present; - pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n", ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b); if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) { static_branch_enable(&isolation_type_snp); + if (!ms_hyperv.paravisor_present) + hypercall_update(hv_snp_hypercall); } else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) { static_branch_enable(&isolation_type_tdx); @@ -499,6 +505,7 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED; if (!ms_hyperv.paravisor_present) { + hypercall_update(hv_tdx_hypercall); /* * Mark the Hyper-V TSC page feature as disabled * in a TDX VM without paravisor so that the diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 18cf79d6e2c5..763534d77f59 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -1,21 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.0+ /* * MTRR (Memory Type Range Register) cleanup * * Copyright (C) 2009 Yinghai Lu - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public - * License along with this library; if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/init.h> #include <linux/pci.h> diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index ecbda0341a8a..4b3d492afe17 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -1,22 +1,9 @@ +// SPDX-License-Identifier: LGPL-2.0+ /* Generic MTRR (Memory Type Range Register) driver. Copyright (C) 1997-2000 Richard Gooch Copyright (c) 2002 Patrick Mochel - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with this library; if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - Richard Gooch may be reached by email at rgooch@atnf.csiro.au The postal address is: Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. diff --git a/arch/x86/kernel/cpu/sgx/encls.h b/arch/x86/kernel/cpu/sgx/encls.h index 99004b02e2ed..42a088a337c5 100644 --- a/arch/x86/kernel/cpu/sgx/encls.h +++ b/arch/x86/kernel/cpu/sgx/encls.h @@ -68,7 +68,7 @@ static inline bool encls_failed(int ret) ({ \ int ret; \ asm volatile( \ - "1: .byte 0x0f, 0x01, 0xcf;\n\t" \ + "1: encls\n" \ "2:\n" \ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_SGX) \ : "=a"(ret) \ @@ -111,8 +111,8 @@ static inline bool encls_failed(int ret) ({ \ int ret; \ asm volatile( \ - "1: .byte 0x0f, 0x01, 0xcf;\n\t" \ - " xor %%eax,%%eax;\n" \ + "1: encls\n\t" \ + "xor %%eax,%%eax\n" \ "2:\n" \ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_SGX) \ : "=a"(ret), "=b"(rbx_out) \ diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index f79c5edc0b89..6ab9eac64670 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -97,9 +97,11 @@ void __init native_init_IRQ(void) /* Execute any quirks before the call gates are initialised: */ x86_init.irqs.pre_vector_init(); - if (cpu_feature_enabled(X86_FEATURE_FRED)) + /* FRED's IRQ path may be used even if FRED isn't fully enabled. */ + if (IS_ENABLED(CONFIG_X86_FRED)) fred_complete_exception_setup(); - else + + if (!cpu_feature_enabled(X86_FEATURE_FRED)) idt_setup_apic_and_irq_gates(); lapic_assign_system_vectors(); diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 15088d14904f..201137b98fb8 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -479,6 +479,10 @@ void __nocfi machine_kexec(struct kimage *image) __ftrace_enabled_restore(save_ftrace_enabled); } +/* + * Handover to the next kernel, no CFI concern. + */ +ANNOTATE_NOCFI_SYM(machine_kexec); /* arch-dependent functionality related to kexec file-based syscall */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 36354b470590..6b22611e69cc 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -97,7 +97,7 @@ __always_inline int is_valid_bugaddr(unsigned long addr) * Check for UD1 or UD2, accounting for Address Size Override Prefixes. * If it's a UD1, further decode to determine its use: * - * FineIBT: ea (bad) + * FineIBT: d6 udb * FineIBT: f0 75 f9 lock jne . - 6 * UBSan{0}: 67 0f b9 00 ud1 (%eax),%eax * UBSan{10}: 67 0f b9 40 10 ud1 0x10(%eax),%eax @@ -130,9 +130,9 @@ __always_inline int decode_bug(unsigned long addr, s32 *imm, int *len) WARN_ON_ONCE(!lock); return BUG_LOCK; - case 0xea: + case 0xd6: *len = addr - start; - return BUG_EA; + return BUG_UDB; case OPCODE_ESCAPE: break; @@ -341,7 +341,7 @@ static noinstr bool handle_bug(struct pt_regs *regs) } fallthrough; - case BUG_EA: + case BUG_UDB: case BUG_LOCK: if (handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) { handled = true; diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 67d4f23bab66..278f08194ec8 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -96,6 +96,7 @@ config KVM_SW_PROTECTED_VM config KVM_INTEL tristate "KVM for Intel (and compatible) processors support" depends on KVM && IA32_FEAT_CTL + select X86_FRED if X86_64 help Provides support for KVM on processors equipped with Intel's VT extensions, a.k.a. Virtual Machine Extensions (VMX). diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 59f93f68718a..4e3da5b497b8 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -26,6 +26,7 @@ #include <asm/debugreg.h> #include <asm/nospec-branch.h> #include <asm/ibt.h> +#include <asm/text-patching.h> #include "x86.h" #include "tss.h" @@ -166,7 +167,6 @@ #define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */ #define Avx ((u64)3 << 41) /* Advanced Vector Extensions */ #define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */ -#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */ #define NoWrite ((u64)1 << 45) /* No writeback */ #define SrcWrite ((u64)1 << 46) /* Write back src operand */ #define NoMod ((u64)1 << 47) /* Mod field is ignored */ @@ -203,7 +203,6 @@ struct opcode { const struct escape *esc; const struct instr_dual *idual; const struct mode_dual *mdual; - void (*fastop)(struct fastop *fake); } u; int (*check_perm)(struct x86_emulate_ctxt *ctxt); }; @@ -267,186 +266,130 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) X86_EFLAGS_PF|X86_EFLAGS_CF) #ifdef CONFIG_X86_64 -#define ON64(x) x +#define ON64(x...) x #else -#define ON64(x) +#define ON64(x...) #endif -/* - * fastop functions have a special calling convention: - * - * dst: rax (in/out) - * src: rdx (in/out) - * src2: rcx (in) - * flags: rflags (in/out) - * ex: rsi (in:fastop pointer, out:zero if exception) - * - * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for - * different operand sizes can be reached by calculation, rather than a jump - * table (which would be bigger than the code). - * - * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR - * and 1 for the straight line speculation INT3, leaves 7 bytes for the - * body of the function. Currently none is larger than 4. - */ -static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); - -#define FASTOP_SIZE 16 - -#define __FOP_FUNC(name) \ - ".align " __stringify(FASTOP_SIZE) " \n\t" \ - ".type " name ", @function \n\t" \ - name ":\n\t" \ - ASM_ENDBR \ - IBT_NOSEAL(name) - -#define FOP_FUNC(name) \ - __FOP_FUNC(#name) - -#define __FOP_RET(name) \ - "11: " ASM_RET \ - ".size " name ", .-" name "\n\t" - -#define FOP_RET(name) \ - __FOP_RET(#name) - -#define __FOP_START(op, align) \ - extern void em_##op(struct fastop *fake); \ - asm(".pushsection .text, \"ax\" \n\t" \ - ".global em_" #op " \n\t" \ - ".align " __stringify(align) " \n\t" \ - "em_" #op ":\n\t" - -#define FOP_START(op) __FOP_START(op, FASTOP_SIZE) - -#define FOP_END \ - ".popsection") - -#define __FOPNOP(name) \ - __FOP_FUNC(name) \ - __FOP_RET(name) - -#define FOPNOP() \ - __FOPNOP(__stringify(__UNIQUE_ID(nop))) - -#define FOP1E(op, dst) \ - __FOP_FUNC(#op "_" #dst) \ - "10: " #op " %" #dst " \n\t" \ - __FOP_RET(#op "_" #dst) - -#define FOP1EEX(op, dst) \ - FOP1E(op, dst) _ASM_EXTABLE_TYPE_REG(10b, 11b, EX_TYPE_ZERO_REG, %%esi) - -#define FASTOP1(op) \ - FOP_START(op) \ - FOP1E(op##b, al) \ - FOP1E(op##w, ax) \ - FOP1E(op##l, eax) \ - ON64(FOP1E(op##q, rax)) \ - FOP_END - -/* 1-operand, using src2 (for MUL/DIV r/m) */ -#define FASTOP1SRC2(op, name) \ - FOP_START(name) \ - FOP1E(op, cl) \ - FOP1E(op, cx) \ - FOP1E(op, ecx) \ - ON64(FOP1E(op, rcx)) \ - FOP_END - -/* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */ -#define FASTOP1SRC2EX(op, name) \ - FOP_START(name) \ - FOP1EEX(op, cl) \ - FOP1EEX(op, cx) \ - FOP1EEX(op, ecx) \ - ON64(FOP1EEX(op, rcx)) \ - FOP_END - -#define FOP2E(op, dst, src) \ - __FOP_FUNC(#op "_" #dst "_" #src) \ - #op " %" #src ", %" #dst " \n\t" \ - __FOP_RET(#op "_" #dst "_" #src) - -#define FASTOP2(op) \ - FOP_START(op) \ - FOP2E(op##b, al, dl) \ - FOP2E(op##w, ax, dx) \ - FOP2E(op##l, eax, edx) \ - ON64(FOP2E(op##q, rax, rdx)) \ - FOP_END - -/* 2 operand, word only */ -#define FASTOP2W(op) \ - FOP_START(op) \ - FOPNOP() \ - FOP2E(op##w, ax, dx) \ - FOP2E(op##l, eax, edx) \ - ON64(FOP2E(op##q, rax, rdx)) \ - FOP_END - -/* 2 operand, src is CL */ -#define FASTOP2CL(op) \ - FOP_START(op) \ - FOP2E(op##b, al, cl) \ - FOP2E(op##w, ax, cl) \ - FOP2E(op##l, eax, cl) \ - ON64(FOP2E(op##q, rax, cl)) \ - FOP_END - -/* 2 operand, src and dest are reversed */ -#define FASTOP2R(op, name) \ - FOP_START(name) \ - FOP2E(op##b, dl, al) \ - FOP2E(op##w, dx, ax) \ - FOP2E(op##l, edx, eax) \ - ON64(FOP2E(op##q, rdx, rax)) \ - FOP_END - -#define FOP3E(op, dst, src, src2) \ - __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \ - #op " %" #src2 ", %" #src ", %" #dst " \n\t"\ - __FOP_RET(#op "_" #dst "_" #src "_" #src2) - -/* 3-operand, word-only, src2=cl */ -#define FASTOP3WCL(op) \ - FOP_START(op) \ - FOPNOP() \ - FOP3E(op##w, ax, dx, cl) \ - FOP3E(op##l, eax, edx, cl) \ - ON64(FOP3E(op##q, rax, rdx, cl)) \ - FOP_END - -/* Special case for SETcc - 1 instruction per cc */ -#define FOP_SETCC(op) \ - FOP_FUNC(op) \ - #op " %al \n\t" \ - FOP_RET(op) - -FOP_START(setcc) -FOP_SETCC(seto) -FOP_SETCC(setno) -FOP_SETCC(setc) -FOP_SETCC(setnc) -FOP_SETCC(setz) -FOP_SETCC(setnz) -FOP_SETCC(setbe) -FOP_SETCC(setnbe) -FOP_SETCC(sets) -FOP_SETCC(setns) -FOP_SETCC(setp) -FOP_SETCC(setnp) -FOP_SETCC(setl) -FOP_SETCC(setnl) -FOP_SETCC(setle) -FOP_SETCC(setnle) -FOP_END; - -FOP_START(salc) -FOP_FUNC(salc) -"pushf; sbb %al, %al; popf \n\t" -FOP_RET(salc) -FOP_END; +#define EM_ASM_START(op) \ +static int em_##op(struct x86_emulate_ctxt *ctxt) \ +{ \ + unsigned long flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; \ + int bytes = 1, ok = 1; \ + if (!(ctxt->d & ByteOp)) \ + bytes = ctxt->dst.bytes; \ + switch (bytes) { + +#define __EM_ASM(str) \ + asm("push %[flags]; popf \n\t" \ + "10: " str \ + "pushf; pop %[flags] \n\t" \ + "11: \n\t" \ + : "+a" (ctxt->dst.val), \ + "+d" (ctxt->src.val), \ + [flags] "+D" (flags), \ + "+S" (ok) \ + : "c" (ctxt->src2.val)) + +#define __EM_ASM_1(op, dst) \ + __EM_ASM(#op " %%" #dst " \n\t") + +#define __EM_ASM_1_EX(op, dst) \ + __EM_ASM(#op " %%" #dst " \n\t" \ + _ASM_EXTABLE_TYPE_REG(10b, 11f, EX_TYPE_ZERO_REG, %%esi)) + +#define __EM_ASM_2(op, dst, src) \ + __EM_ASM(#op " %%" #src ", %%" #dst " \n\t") + +#define __EM_ASM_3(op, dst, src, src2) \ + __EM_ASM(#op " %%" #src2 ", %%" #src ", %%" #dst " \n\t") + +#define EM_ASM_END \ + } \ + ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); \ + return !ok ? emulate_de(ctxt) : X86EMUL_CONTINUE; \ +} + +/* 1-operand, using "a" (dst) */ +#define EM_ASM_1(op) \ + EM_ASM_START(op) \ + case 1: __EM_ASM_1(op##b, al); break; \ + case 2: __EM_ASM_1(op##w, ax); break; \ + case 4: __EM_ASM_1(op##l, eax); break; \ + ON64(case 8: __EM_ASM_1(op##q, rax); break;) \ + EM_ASM_END + +/* 1-operand, using "c" (src2) */ +#define EM_ASM_1SRC2(op, name) \ + EM_ASM_START(name) \ + case 1: __EM_ASM_1(op##b, cl); break; \ + case 2: __EM_ASM_1(op##w, cx); break; \ + case 4: __EM_ASM_1(op##l, ecx); break; \ + ON64(case 8: __EM_ASM_1(op##q, rcx); break;) \ + EM_ASM_END + +/* 1-operand, using "c" (src2) with exception */ +#define EM_ASM_1SRC2EX(op, name) \ + EM_ASM_START(name) \ + case 1: __EM_ASM_1_EX(op##b, cl); break; \ + case 2: __EM_ASM_1_EX(op##w, cx); break; \ + case 4: __EM_ASM_1_EX(op##l, ecx); break; \ + ON64(case 8: __EM_ASM_1_EX(op##q, rcx); break;) \ + EM_ASM_END + +/* 2-operand, using "a" (dst), "d" (src) */ +#define EM_ASM_2(op) \ + EM_ASM_START(op) \ + case 1: __EM_ASM_2(op##b, al, dl); break; \ + case 2: __EM_ASM_2(op##w, ax, dx); break; \ + case 4: __EM_ASM_2(op##l, eax, edx); break; \ + ON64(case 8: __EM_ASM_2(op##q, rax, rdx); break;) \ + EM_ASM_END + +/* 2-operand, reversed */ +#define EM_ASM_2R(op, name) \ + EM_ASM_START(name) \ + case 1: __EM_ASM_2(op##b, dl, al); break; \ + case 2: __EM_ASM_2(op##w, dx, ax); break; \ + case 4: __EM_ASM_2(op##l, edx, eax); break; \ + ON64(case 8: __EM_ASM_2(op##q, rdx, rax); break;) \ + EM_ASM_END + +/* 2-operand, word only (no byte op) */ +#define EM_ASM_2W(op) \ + EM_ASM_START(op) \ + case 1: break; \ + case 2: __EM_ASM_2(op##w, ax, dx); break; \ + case 4: __EM_ASM_2(op##l, eax, edx); break; \ + ON64(case 8: __EM_ASM_2(op##q, rax, rdx); break;) \ + EM_ASM_END + +/* 2-operand, using "a" (dst) and CL (src2) */ +#define EM_ASM_2CL(op) \ + EM_ASM_START(op) \ + case 1: __EM_ASM_2(op##b, al, cl); break; \ + case 2: __EM_ASM_2(op##w, ax, cl); break; \ + case 4: __EM_ASM_2(op##l, eax, cl); break; \ + ON64(case 8: __EM_ASM_2(op##q, rax, cl); break;) \ + EM_ASM_END + +/* 3-operand, using "a" (dst), "d" (src) and CL (src2) */ +#define EM_ASM_3WCL(op) \ + EM_ASM_START(op) \ + case 1: break; \ + case 2: __EM_ASM_3(op##w, ax, dx, cl); break; \ + case 4: __EM_ASM_3(op##l, eax, edx, cl); break; \ + ON64(case 8: __EM_ASM_3(op##q, rax, rdx, cl); break;) \ + EM_ASM_END + +static int em_salc(struct x86_emulate_ctxt *ctxt) +{ + /* + * Set AL 0xFF if CF is set, or 0x00 when clear. + */ + ctxt->dst.val = 0xFF * !!(ctxt->eflags & X86_EFLAGS_CF); + return X86EMUL_CONTINUE; +} /* * XXX: inoutclob user must know where the argument is being expanded. @@ -1007,56 +950,55 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, return rc; } -FASTOP2(add); -FASTOP2(or); -FASTOP2(adc); -FASTOP2(sbb); -FASTOP2(and); -FASTOP2(sub); -FASTOP2(xor); -FASTOP2(cmp); -FASTOP2(test); - -FASTOP1SRC2(mul, mul_ex); -FASTOP1SRC2(imul, imul_ex); -FASTOP1SRC2EX(div, div_ex); -FASTOP1SRC2EX(idiv, idiv_ex); - -FASTOP3WCL(shld); -FASTOP3WCL(shrd); - -FASTOP2W(imul); - -FASTOP1(not); -FASTOP1(neg); -FASTOP1(inc); -FASTOP1(dec); - -FASTOP2CL(rol); -FASTOP2CL(ror); -FASTOP2CL(rcl); -FASTOP2CL(rcr); -FASTOP2CL(shl); -FASTOP2CL(shr); -FASTOP2CL(sar); - -FASTOP2W(bsf); -FASTOP2W(bsr); -FASTOP2W(bt); -FASTOP2W(bts); -FASTOP2W(btr); -FASTOP2W(btc); - -FASTOP2(xadd); - -FASTOP2R(cmp, cmp_r); +EM_ASM_2(add); +EM_ASM_2(or); +EM_ASM_2(adc); +EM_ASM_2(sbb); +EM_ASM_2(and); +EM_ASM_2(sub); +EM_ASM_2(xor); +EM_ASM_2(cmp); +EM_ASM_2(test); +EM_ASM_2(xadd); + +EM_ASM_1SRC2(mul, mul_ex); +EM_ASM_1SRC2(imul, imul_ex); +EM_ASM_1SRC2EX(div, div_ex); +EM_ASM_1SRC2EX(idiv, idiv_ex); + +EM_ASM_3WCL(shld); +EM_ASM_3WCL(shrd); + +EM_ASM_2W(imul); + +EM_ASM_1(not); +EM_ASM_1(neg); +EM_ASM_1(inc); +EM_ASM_1(dec); + +EM_ASM_2CL(rol); +EM_ASM_2CL(ror); +EM_ASM_2CL(rcl); +EM_ASM_2CL(rcr); +EM_ASM_2CL(shl); +EM_ASM_2CL(shr); +EM_ASM_2CL(sar); + +EM_ASM_2W(bsf); +EM_ASM_2W(bsr); +EM_ASM_2W(bt); +EM_ASM_2W(bts); +EM_ASM_2W(btr); +EM_ASM_2W(btc); + +EM_ASM_2R(cmp, cmp_r); static int em_bsf_c(struct x86_emulate_ctxt *ctxt) { /* If src is zero, do not writeback, but update flags */ if (ctxt->src.val == 0) ctxt->dst.type = OP_NONE; - return fastop(ctxt, em_bsf); + return em_bsf(ctxt); } static int em_bsr_c(struct x86_emulate_ctxt *ctxt) @@ -1064,18 +1006,12 @@ static int em_bsr_c(struct x86_emulate_ctxt *ctxt) /* If src is zero, do not writeback, but update flags */ if (ctxt->src.val == 0) ctxt->dst.type = OP_NONE; - return fastop(ctxt, em_bsr); + return em_bsr(ctxt); } static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) { - u8 rc; - void (*fop)(void) = (void *)em_setcc + FASTOP_SIZE * (condition & 0xf); - - flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; - asm("push %[flags]; popf; " CALL_NOSPEC - : "=a"(rc), ASM_CALL_CONSTRAINT : [thunk_target]"r"(fop), [flags]"r"(flags)); - return rc; + return __emulate_cc(flags, condition & 0xf); } static void fetch_register_operand(struct operand *op) @@ -2325,7 +2261,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX); ctxt->src.orig_val = ctxt->src.val; ctxt->src.val = ctxt->dst.orig_val; - fastop(ctxt, em_cmp); + em_cmp(ctxt); if (ctxt->eflags & X86_EFLAGS_ZF) { /* Success: write back to memory; no update of EAX */ @@ -3090,7 +3026,7 @@ static int em_das(struct x86_emulate_ctxt *ctxt) ctxt->src.type = OP_IMM; ctxt->src.val = 0; ctxt->src.bytes = 1; - fastop(ctxt, em_or); + em_or(ctxt); ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF); if (cf) ctxt->eflags |= X86_EFLAGS_CF; @@ -3116,7 +3052,7 @@ static int em_aam(struct x86_emulate_ctxt *ctxt) ctxt->src.type = OP_IMM; ctxt->src.val = 0; ctxt->src.bytes = 1; - fastop(ctxt, em_or); + em_or(ctxt); return X86EMUL_CONTINUE; } @@ -3134,7 +3070,7 @@ static int em_aad(struct x86_emulate_ctxt *ctxt) ctxt->src.type = OP_IMM; ctxt->src.val = 0; ctxt->src.bytes = 1; - fastop(ctxt, em_or); + em_or(ctxt); return X86EMUL_CONTINUE; } @@ -3225,7 +3161,7 @@ static int em_xchg(struct x86_emulate_ctxt *ctxt) static int em_imul_3op(struct x86_emulate_ctxt *ctxt) { ctxt->dst.val = ctxt->src2.val; - return fastop(ctxt, em_imul); + return em_imul(ctxt); } static int em_cwd(struct x86_emulate_ctxt *ctxt) @@ -4004,7 +3940,6 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) } #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } -#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } #define II(_f, _e, _i) \ { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i } #define IIP(_f, _e, _i, _p) \ @@ -4019,9 +3954,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define I2bvIP(_f, _e, _i, _p) \ IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p) -#define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \ - F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ - F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) +#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ + I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ + I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) static const struct opcode group7_rm0[] = { N, @@ -4059,14 +3994,14 @@ static const struct opcode group7_rm7[] = { }; static const struct opcode group1[] = { - F(Lock, em_add), - F(Lock | PageTable, em_or), - F(Lock, em_adc), - F(Lock, em_sbb), - F(Lock | PageTable, em_and), - F(Lock, em_sub), - F(Lock, em_xor), - F(NoWrite, em_cmp), + I(Lock, em_add), + I(Lock | PageTable, em_or), + I(Lock, em_adc), + I(Lock, em_sbb), + I(Lock | PageTable, em_and), + I(Lock, em_sub), + I(Lock, em_xor), + I(NoWrite, em_cmp), }; static const struct opcode group1A[] = { @@ -4074,36 +4009,36 @@ static const struct opcode group1A[] = { }; static const struct opcode group2[] = { - F(DstMem | ModRM, em_rol), - F(DstMem | ModRM, em_ror), - F(DstMem | ModRM, em_rcl), - F(DstMem | ModRM, em_rcr), - F(DstMem | ModRM, em_shl), - F(DstMem | ModRM, em_shr), - F(DstMem | ModRM, em_shl), - F(DstMem | ModRM, em_sar), + I(DstMem | ModRM, em_rol), + I(DstMem | ModRM, em_ror), + I(DstMem | ModRM, em_rcl), + I(DstMem | ModRM, em_rcr), + I(DstMem | ModRM, em_shl), + I(DstMem | ModRM, em_shr), + I(DstMem | ModRM, em_shl), + I(DstMem | ModRM, em_sar), }; static const struct opcode group3[] = { - F(DstMem | SrcImm | NoWrite, em_test), - F(DstMem | SrcImm | NoWrite, em_test), - F(DstMem | SrcNone | Lock, em_not), - F(DstMem | SrcNone | Lock, em_neg), - F(DstXacc | Src2Mem, em_mul_ex), - F(DstXacc | Src2Mem, em_imul_ex), - F(DstXacc | Src2Mem, em_div_ex), - F(DstXacc | Src2Mem, em_idiv_ex), + I(DstMem | SrcImm | NoWrite, em_test), + I(DstMem | SrcImm | NoWrite, em_test), + I(DstMem | SrcNone | Lock, em_not), + I(DstMem | SrcNone | Lock, em_neg), + I(DstXacc | Src2Mem, em_mul_ex), + I(DstXacc | Src2Mem, em_imul_ex), + I(DstXacc | Src2Mem, em_div_ex), + I(DstXacc | Src2Mem, em_idiv_ex), }; static const struct opcode group4[] = { - F(ByteOp | DstMem | SrcNone | Lock, em_inc), - F(ByteOp | DstMem | SrcNone | Lock, em_dec), + I(ByteOp | DstMem | SrcNone | Lock, em_inc), + I(ByteOp | DstMem | SrcNone | Lock, em_dec), N, N, N, N, N, N, }; static const struct opcode group5[] = { - F(DstMem | SrcNone | Lock, em_inc), - F(DstMem | SrcNone | Lock, em_dec), + I(DstMem | SrcNone | Lock, em_inc), + I(DstMem | SrcNone | Lock, em_dec), I(SrcMem | NearBranch | IsBranch | ShadowStack, em_call_near_abs), I(SrcMemFAddr | ImplicitOps | IsBranch | ShadowStack, em_call_far), I(SrcMem | NearBranch | IsBranch, em_jmp_abs), @@ -4139,10 +4074,10 @@ static const struct group_dual group7 = { { static const struct opcode group8[] = { N, N, N, N, - F(DstMem | SrcImmByte | NoWrite, em_bt), - F(DstMem | SrcImmByte | Lock | PageTable, em_bts), - F(DstMem | SrcImmByte | Lock, em_btr), - F(DstMem | SrcImmByte | Lock | PageTable, em_btc), + I(DstMem | SrcImmByte | NoWrite, em_bt), + I(DstMem | SrcImmByte | Lock | PageTable, em_bts), + I(DstMem | SrcImmByte | Lock, em_btr), + I(DstMem | SrcImmByte | Lock | PageTable, em_btc), }; /* @@ -4279,31 +4214,31 @@ static const struct instr_dual instr_dual_8d = { static const struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ - F6ALU(Lock, em_add), + I6ALU(Lock, em_add), I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), /* 0x08 - 0x0F */ - F6ALU(Lock | PageTable, em_or), + I6ALU(Lock | PageTable, em_or), I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), N, /* 0x10 - 0x17 */ - F6ALU(Lock, em_adc), + I6ALU(Lock, em_adc), I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg), I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg), /* 0x18 - 0x1F */ - F6ALU(Lock, em_sbb), + I6ALU(Lock, em_sbb), I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), /* 0x20 - 0x27 */ - F6ALU(Lock | PageTable, em_and), N, N, + I6ALU(Lock | PageTable, em_and), N, N, /* 0x28 - 0x2F */ - F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), + I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), /* 0x30 - 0x37 */ - F6ALU(Lock, em_xor), N, N, + I6ALU(Lock, em_xor), N, N, /* 0x38 - 0x3F */ - F6ALU(NoWrite, em_cmp), N, N, + I6ALU(NoWrite, em_cmp), N, N, /* 0x40 - 0x4F */ - X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)), + X8(I(DstReg, em_inc)), X8(I(DstReg, em_dec)), /* 0x50 - 0x57 */ X8(I(SrcReg | Stack, em_push)), /* 0x58 - 0x5F */ @@ -4327,7 +4262,7 @@ static const struct opcode opcode_table[256] = { G(DstMem | SrcImm, group1), G(ByteOp | DstMem | SrcImm | No64, group1), G(DstMem | SrcImmByte, group1), - F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test), + I2bv(DstMem | SrcReg | ModRM | NoWrite, em_test), I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), /* 0x88 - 0x8F */ I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov), @@ -4348,12 +4283,12 @@ static const struct opcode opcode_table[256] = { I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov), - F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r), + I2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r), /* 0xA8 - 0xAF */ - F2bv(DstAcc | SrcImm | NoWrite, em_test), + I2bv(DstAcc | SrcImm | NoWrite, em_test), I2bv(SrcAcc | DstDI | Mov | String, em_mov), I2bv(SrcSI | DstAcc | Mov | String, em_mov), - F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r), + I2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r), /* 0xB0 - 0xB7 */ X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), /* 0xB8 - 0xBF */ @@ -4378,7 +4313,7 @@ static const struct opcode opcode_table[256] = { G(Src2CL | ByteOp, group2), G(Src2CL, group2), I(DstAcc | SrcImmUByte | No64, em_aam), I(DstAcc | SrcImmUByte | No64, em_aad), - F(DstAcc | ByteOp | No64, em_salc), + I(DstAcc | ByteOp | No64, em_salc), I(DstAcc | SrcXLat | ByteOp, em_mov), /* 0xD8 - 0xDF */ N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N, @@ -4463,32 +4398,32 @@ static const struct opcode twobyte_table[256] = { /* 0xA0 - 0xA7 */ I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), II(ImplicitOps, em_cpuid, cpuid), - F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt), - F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld), - F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, + I(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt), + I(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld), + I(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, /* 0xA8 - 0xAF */ I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), II(EmulateOnUD | ImplicitOps, em_rsm, rsm), - F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), - F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), - F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), - GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul), + I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), + I(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), + I(DstMem | SrcReg | Src2CL | ModRM, em_shrd), + GD(0, &group15), I(DstReg | SrcMem | ModRM, em_imul), /* 0xB0 - 0xB7 */ I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg), I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), - F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), + I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), /* 0xB8 - 0xBF */ N, N, G(BitOp, group8), - F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), + I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), I(DstReg | SrcMem | ModRM, em_bsf_c), I(DstReg | SrcMem | ModRM, em_bsr_c), D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), /* 0xC0 - 0xC7 */ - F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd), + I2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd), N, ID(0, &instr_dual_0f_c3), N, N, N, GD(0, &group9), /* 0xC8 - 0xCF */ @@ -5198,24 +5133,6 @@ static void fetch_possible_mmx_operand(struct operand *op) kvm_read_mmx_reg(op->addr.mm, &op->mm_val); } -static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop) -{ - ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; - - if (!(ctxt->d & ByteOp)) - fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; - - asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" - : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), - [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT - : "c"(ctxt->src2.val)); - - ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); - if (!fop) /* exception is returned in fop variable */ - return emulate_de(ctxt); - return X86EMUL_CONTINUE; -} - void init_decode_cache(struct x86_emulate_ctxt *ctxt) { /* Clear fields that are set conditionally but read without a guard. */ @@ -5379,10 +5296,7 @@ special_insn: ctxt->eflags &= ~X86_EFLAGS_RF; if (ctxt->execute) { - if (ctxt->d & Fastop) - rc = fastop(ctxt, ctxt->fop); - else - rc = ctxt->execute(ctxt); + rc = ctxt->execute(ctxt); if (rc != X86EMUL_CONTINUE) goto done; goto writeback; diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 0a6cf5bff2aa..bc255d709d8a 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -361,6 +361,10 @@ SYM_FUNC_END(vmread_error_trampoline) .section .text, "ax" +#ifndef CONFIG_X86_FRED + SYM_FUNC_START(vmx_do_interrupt_irqoff) VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1 SYM_FUNC_END(vmx_do_interrupt_irqoff) + +#endif diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d7b258af63ea..f87c216d976d 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7021,8 +7021,14 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu, "unexpected VM-Exit interrupt info: 0x%x", intr_info)) return; + /* + * Invoke the kernel's IRQ handler for the vector. Use the FRED path + * when it's available even if FRED isn't fully enabled, e.g. even if + * FRED isn't supported in hardware, in order to avoid the indirect + * CALL in the non-FRED path. + */ kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ); - if (cpu_feature_enabled(X86_FEATURE_FRED)) + if (IS_ENABLED(CONFIG_X86_FRED)) fred_entry_from_kvm(EVENT_TYPE_EXTINT, vector); else vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base + vector)); diff --git a/arch/x86/lib/bhi.S b/arch/x86/lib/bhi.S index 58891681261b..aad1e5839202 100644 --- a/arch/x86/lib/bhi.S +++ b/arch/x86/lib/bhi.S @@ -5,7 +5,7 @@ #include <asm/nospec-branch.h> /* - * Notably, the FineIBT preamble calling these will have ZF set and r10 zero. + * Notably, the FineIBT preamble calling these will have ZF set and eax zero. * * The very last element is in fact larger than 32 bytes, but since its the * last element, this does not matter, @@ -36,7 +36,7 @@ SYM_INNER_LABEL(__bhi_args_1, SYM_L_LOCAL) ANNOTATE_NOENDBR UNWIND_HINT_FUNC jne .Lud_1 - cmovne %r10, %rdi + cmovne %rax, %rdi ANNOTATE_UNRET_SAFE ret int3 @@ -53,8 +53,8 @@ SYM_INNER_LABEL(__bhi_args_2, SYM_L_LOCAL) ANNOTATE_NOENDBR UNWIND_HINT_FUNC jne .Lud_1 - cmovne %r10, %rdi - cmovne %r10, %rsi + cmovne %rax, %rdi + cmovne %rax, %rsi ANNOTATE_UNRET_SAFE ret int3 @@ -64,9 +64,9 @@ SYM_INNER_LABEL(__bhi_args_3, SYM_L_LOCAL) ANNOTATE_NOENDBR UNWIND_HINT_FUNC jne .Lud_1 - cmovne %r10, %rdi - cmovne %r10, %rsi - cmovne %r10, %rdx + cmovne %rax, %rdi + cmovne %rax, %rsi + cmovne %rax, %rdx ANNOTATE_UNRET_SAFE ret int3 @@ -76,10 +76,10 @@ SYM_INNER_LABEL(__bhi_args_4, SYM_L_LOCAL) ANNOTATE_NOENDBR UNWIND_HINT_FUNC jne .Lud_2 - cmovne %r10, %rdi - cmovne %r10, %rsi - cmovne %r10, %rdx - cmovne %r10, %rcx + cmovne %rax, %rdi + cmovne %rax, %rsi + cmovne %rax, %rdx + cmovne %rax, %rcx ANNOTATE_UNRET_SAFE ret int3 @@ -89,11 +89,11 @@ SYM_INNER_LABEL(__bhi_args_5, SYM_L_LOCAL) ANNOTATE_NOENDBR UNWIND_HINT_FUNC jne .Lud_2 - cmovne %r10, %rdi - cmovne %r10, %rsi - cmovne %r10, %rdx - cmovne %r10, %rcx - cmovne %r10, %r8 + cmovne %rax, %rdi + cmovne %rax, %rsi + cmovne %rax, %rdx + cmovne %rax, %rcx + cmovne %rax, %r8 ANNOTATE_UNRET_SAFE ret int3 @@ -110,12 +110,12 @@ SYM_INNER_LABEL(__bhi_args_6, SYM_L_LOCAL) ANNOTATE_NOENDBR UNWIND_HINT_FUNC jne .Lud_2 - cmovne %r10, %rdi - cmovne %r10, %rsi - cmovne %r10, %rdx - cmovne %r10, %rcx - cmovne %r10, %r8 - cmovne %r10, %r9 + cmovne %rax, %rdi + cmovne %rax, %rsi + cmovne %rax, %rdx + cmovne %rax, %rcx + cmovne %rax, %r8 + cmovne %rax, %r9 ANNOTATE_UNRET_SAFE ret int3 @@ -125,13 +125,13 @@ SYM_INNER_LABEL(__bhi_args_7, SYM_L_LOCAL) ANNOTATE_NOENDBR UNWIND_HINT_FUNC jne .Lud_2 - cmovne %r10, %rdi - cmovne %r10, %rsi - cmovne %r10, %rdx - cmovne %r10, %rcx - cmovne %r10, %r8 - cmovne %r10, %r9 - cmovne %r10, %rsp + cmovne %rax, %rdi + cmovne %rax, %rsi + cmovne %rax, %rdx + cmovne %rax, %rcx + cmovne %rax, %r8 + cmovne %rax, %r9 + cmovne %rax, %rsp ANNOTATE_UNRET_SAFE ret int3 diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index f513d33b6d37..8f1fed0c3b83 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -134,10 +134,10 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array) .macro ITS_THUNK reg /* - * If CFI paranoid is used then the ITS thunk starts with opcodes (0xea; jne 1b) + * If CFI paranoid is used then the ITS thunk starts with opcodes (1: udb; jne 1b) * that complete the fineibt_paranoid caller sequence. */ -1: .byte 0xea +1: ASM_UDB SYM_INNER_LABEL(__x86_indirect_paranoid_thunk_\reg, SYM_L_GLOBAL) UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index fc13306af15f..d4c93d9e73e4 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -420,12 +420,12 @@ static void emit_fineibt(u8 **pprog, u8 *ip, u32 hash, int arity) u8 *prog = *pprog; EMIT_ENDBR(); - EMIT3_off32(0x41, 0x81, 0xea, hash); /* subl $hash, %r10d */ + EMIT1_off32(0x2d, hash); /* subl $hash, %eax */ if (cfi_bhi) { + EMIT2(0x2e, 0x2e); /* cs cs */ emit_call(&prog, __bhi_args[arity], ip + 11); } else { - EMIT2(0x75, 0xf9); /* jne.d8 .-7 */ - EMIT3(0x0f, 0x1f, 0x00); /* nop3 */ + EMIT3_off32(0x2e, 0x0f, 0x85, 3); /* jne.d32,pn 3 */ } EMIT_ENDBR_POISON(); diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 2206b8bc47b8..f0a5fba0717e 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -11,6 +11,10 @@ #include <asm/nospec-branch.h> SYM_FUNC_START(__efi_call) + /* + * The EFI code doesn't have any CFI, annotate away the CFI violation. + */ + ANNOTATE_NOCFI_SYM pushq %rbp movq %rsp, %rbp and $~0xf, %rsp diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c index 6c24426104ba..e1f5e9abb301 100644 --- a/drivers/misc/lkdtm/perms.c +++ b/drivers/misc/lkdtm/perms.c @@ -9,6 +9,7 @@ #include <linux/vmalloc.h> #include <linux/mman.h> #include <linux/uaccess.h> +#include <linux/objtool.h> #include <asm/cacheflush.h> #include <asm/sections.h> @@ -86,6 +87,10 @@ static noinline __nocfi void execute_location(void *dst, bool write) func(); pr_err("FAIL: func returned\n"); } +/* + * Explicitly doing the wrong thing for testing. + */ +ANNOTATE_NOCFI_SYM(execute_location); static void execute_user_location(void *dst) { diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 4a8dc8d0a4b7..2933c41c77c8 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -406,6 +406,16 @@ config RTC_DRV_MAX77686 This driver can also be built as a module. If so, the module will be called rtc-max77686. +config RTC_DRV_SPACEMIT_P1 + tristate "SpacemiT P1 RTC" + depends on ARCH_SPACEMIT || COMPILE_TEST + select MFD_SPACEMIT_P1 + default ARCH_SPACEMIT + help + Enable support for the RTC function in the SpacemiT P1 PMIC. + This driver can also be built as a module, which will be called + "spacemit-p1-rtc". + config RTC_DRV_NCT3018Y tristate "Nuvoton NCT3018Y" depends on OF @@ -2044,20 +2054,6 @@ config RTC_DRV_RENESAS_RTCA3 This driver can also be built as a module, if so, the module will be called "rtc-rtca3". -comment "HID Sensor RTC drivers" - -config RTC_DRV_HID_SENSOR_TIME - tristate "HID Sensor Time" - depends on USB_HID - depends on HID_SENSOR_HUB && IIO - select HID_SENSOR_IIO_COMMON - help - Say yes here to build support for the HID Sensors of type Time. - This drivers makes such sensors available as RTCs. - - If this driver is compiled as a module, it will be named - rtc-hid-sensor-time. - config RTC_DRV_GOLDFISH tristate "Goldfish Real Time Clock" depends on HAS_IOMEM @@ -2132,4 +2128,18 @@ config RTC_DRV_S32G This RTC module can be used as a wakeup source. Please note that it is not battery-powered. +comment "HID Sensor RTC drivers" + +config RTC_DRV_HID_SENSOR_TIME + tristate "HID Sensor Time" + depends on USB_HID + depends on HID_SENSOR_HUB && IIO + select HID_SENSOR_IIO_COMMON + help + Say yes here to build support for the HID Sensors of type Time. + This drivers makes such sensors available as RTCs. + + If this driver is compiled as a module, it will be named + rtc-hid-sensor-time. + endif # RTC_CLASS diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 610a9ee5fd33..8221bda6e6dc 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -172,6 +172,7 @@ obj-$(CONFIG_RTC_DRV_SD2405AL) += rtc-sd2405al.o obj-$(CONFIG_RTC_DRV_SD3078) += rtc-sd3078.o obj-$(CONFIG_RTC_DRV_SH) += rtc-sh.o obj-$(CONFIG_RTC_DRV_SNVS) += rtc-snvs.o +obj-$(CONFIG_RTC_DRV_SPACEMIT_P1) += rtc-spacemit-p1.o obj-$(CONFIG_RTC_DRV_SPEAR) += rtc-spear.o obj-$(CONFIG_RTC_DRV_STARFIRE) += rtc-starfire.o obj-$(CONFIG_RTC_DRV_STK17TA8) += rtc-stk17ta8.o diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index dc741ba29fa3..b8b298efd9a9 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -443,6 +443,29 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) else err = rtc->ops->set_alarm(rtc->dev.parent, alarm); + /* + * Check for potential race described above. If the waiting for next + * second, and the second just ticked since the check above, either + * + * 1) It ticked after the alarm was set, and an alarm irq should be + * generated. + * + * 2) It ticked before the alarm was set, and alarm irq most likely will + * not be generated. + * + * While we cannot easily check for which of these two scenarios we + * are in, we can return -ETIME to signal that the timer has already + * expired, which is true in both cases. + */ + if ((scheduled - now) <= 1) { + err = __rtc_read_time(rtc, &tm); + if (err) + return err; + now = rtc_tm_to_time64(&tm); + if (scheduled <= now) + return -ETIME; + } + trace_rtc_set_alarm(rtc_tm_to_time64(&alarm->time), err); return err; } @@ -594,6 +617,10 @@ int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled) rtc->uie_rtctimer.node.expires = ktime_add(now, onesec); rtc->uie_rtctimer.period = ktime_set(1, 0); err = rtc_timer_enqueue(rtc, &rtc->uie_rtctimer); + if (!err && rtc->ops && rtc->ops->alarm_irq_enable) + err = rtc->ops->alarm_irq_enable(rtc->dev.parent, 1); + if (err) + goto out; } else { rtc_timer_remove(rtc, &rtc->uie_rtctimer); } diff --git a/drivers/rtc/rtc-amlogic-a4.c b/drivers/rtc/rtc-amlogic-a4.c index 09d78c2cc691..1928b29c1045 100644 --- a/drivers/rtc/rtc-amlogic-a4.c +++ b/drivers/rtc/rtc-amlogic-a4.c @@ -72,13 +72,6 @@ struct aml_rtc_data { const struct aml_rtc_config *config; }; -static const struct regmap_config aml_rtc_regmap_config = { - .reg_bits = 32, - .val_bits = 32, - .reg_stride = 4, - .max_register = RTC_REAL_TIME, -}; - static inline u32 gray_to_binary(u32 gray) { u32 bcd = gray; @@ -328,6 +321,13 @@ static int aml_rtc_probe(struct platform_device *pdev) void __iomem *base; int ret = 0; + const struct regmap_config aml_rtc_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .max_register = RTC_REAL_TIME, + }; + rtc = devm_kzalloc(dev, sizeof(*rtc), GFP_KERNEL); if (!rtc) return -ENOMEM; diff --git a/drivers/rtc/rtc-cpcap.c b/drivers/rtc/rtc-cpcap.c index c170345ac076..8b6b35716f53 100644 --- a/drivers/rtc/rtc-cpcap.c +++ b/drivers/rtc/rtc-cpcap.c @@ -268,6 +268,7 @@ static int cpcap_rtc_probe(struct platform_device *pdev) return err; rtc->alarm_irq = platform_get_irq(pdev, 0); + rtc->alarm_enabled = true; err = devm_request_threaded_irq(dev, rtc->alarm_irq, NULL, cpcap_rtc_alarm_irq, IRQF_TRIGGER_NONE | IRQF_ONESHOT, diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c index fa8bf82df948..b4f44999ef0f 100644 --- a/drivers/rtc/rtc-efi.c +++ b/drivers/rtc/rtc-efi.c @@ -112,48 +112,6 @@ convert_from_efi_time(efi_time_t *eft, struct rtc_time *wtime) return true; } -static int efi_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm) -{ - efi_time_t eft; - efi_status_t status; - - /* - * As of EFI v1.10, this call always returns an unsupported status - */ - status = efi.get_wakeup_time((efi_bool_t *)&wkalrm->enabled, - (efi_bool_t *)&wkalrm->pending, &eft); - - if (status != EFI_SUCCESS) - return -EINVAL; - - if (!convert_from_efi_time(&eft, &wkalrm->time)) - return -EIO; - - return rtc_valid_tm(&wkalrm->time); -} - -static int efi_set_alarm(struct device *dev, struct rtc_wkalrm *wkalrm) -{ - efi_time_t eft; - efi_status_t status; - - convert_to_efi_time(&wkalrm->time, &eft); - - /* - * XXX Fixme: - * As of EFI 0.92 with the firmware I have on my - * machine this call does not seem to work quite - * right - * - * As of v1.10, this call always returns an unsupported status - */ - status = efi.set_wakeup_time((efi_bool_t)wkalrm->enabled, &eft); - - dev_warn(dev, "write status is %d\n", (int)status); - - return status == EFI_SUCCESS ? 0 : -EINVAL; -} - static int efi_read_time(struct device *dev, struct rtc_time *tm) { efi_status_t status; @@ -188,17 +146,13 @@ static int efi_set_time(struct device *dev, struct rtc_time *tm) static int efi_procfs(struct device *dev, struct seq_file *seq) { - efi_time_t eft, alm; + efi_time_t eft; efi_time_cap_t cap; - efi_bool_t enabled, pending; - struct rtc_device *rtc = dev_get_drvdata(dev); memset(&eft, 0, sizeof(eft)); - memset(&alm, 0, sizeof(alm)); memset(&cap, 0, sizeof(cap)); efi.get_time(&eft, &cap); - efi.get_wakeup_time(&enabled, &pending, &alm); seq_printf(seq, "Time\t\t: %u:%u:%u.%09u\n" @@ -214,26 +168,6 @@ static int efi_procfs(struct device *dev, struct seq_file *seq) /* XXX fixme: convert to string? */ seq_printf(seq, "Timezone\t: %u\n", eft.timezone); - if (test_bit(RTC_FEATURE_ALARM, rtc->features)) { - seq_printf(seq, - "Alarm Time\t: %u:%u:%u.%09u\n" - "Alarm Date\t: %u-%u-%u\n" - "Alarm Daylight\t: %u\n" - "Enabled\t\t: %s\n" - "Pending\t\t: %s\n", - alm.hour, alm.minute, alm.second, alm.nanosecond, - alm.year, alm.month, alm.day, - alm.daylight, - enabled == 1 ? "yes" : "no", - pending == 1 ? "yes" : "no"); - - if (alm.timezone == EFI_UNSPECIFIED_TIMEZONE) - seq_puts(seq, "Timezone\t: unspecified\n"); - else - /* XXX fixme: convert to string? */ - seq_printf(seq, "Timezone\t: %u\n", alm.timezone); - } - /* * now prints the capabilities */ @@ -249,8 +183,6 @@ static int efi_procfs(struct device *dev, struct seq_file *seq) static const struct rtc_class_ops efi_rtc_ops = { .read_time = efi_read_time, .set_time = efi_set_time, - .read_alarm = efi_read_alarm, - .set_alarm = efi_set_alarm, .proc = efi_procfs, }; @@ -271,11 +203,7 @@ static int __init efi_rtc_probe(struct platform_device *dev) platform_set_drvdata(dev, rtc); rtc->ops = &efi_rtc_ops; - clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->features); - if (efi_rt_services_supported(EFI_RT_SUPPORTED_WAKEUP_SERVICES)) - set_bit(RTC_FEATURE_ALARM_WAKEUP_ONLY, rtc->features); - else - clear_bit(RTC_FEATURE_ALARM, rtc->features); + clear_bit(RTC_FEATURE_ALARM, rtc->features); device_init_wakeup(&dev->dev, true); diff --git a/drivers/rtc/rtc-isl12022.c b/drivers/rtc/rtc-isl12022.c index 9b44839a7402..5fc52dc64213 100644 --- a/drivers/rtc/rtc-isl12022.c +++ b/drivers/rtc/rtc-isl12022.c @@ -413,6 +413,7 @@ static int isl12022_setup_irq(struct device *dev, int irq) if (ret) return ret; + isl12022->irq_enabled = true; ret = devm_request_threaded_irq(dev, irq, NULL, isl12022_rtc_interrupt, IRQF_SHARED | IRQF_ONESHOT, diff --git a/drivers/rtc/rtc-meson.c b/drivers/rtc/rtc-meson.c index 47e9ebf58ffc..21eceb9e2e13 100644 --- a/drivers/rtc/rtc-meson.c +++ b/drivers/rtc/rtc-meson.c @@ -72,7 +72,6 @@ static const struct regmap_config meson_rtc_peripheral_regmap_config = { .val_bits = 32, .reg_stride = 4, .max_register = RTC_REG4, - .fast_io = true, }; /* RTC front-end serialiser controls */ diff --git a/drivers/rtc/rtc-optee.c b/drivers/rtc/rtc-optee.c index 9f8b5d4a8f6b..184c6d142801 100644 --- a/drivers/rtc/rtc-optee.c +++ b/drivers/rtc/rtc-optee.c @@ -5,19 +5,104 @@ #include <linux/device.h> #include <linux/kernel.h> +#include <linux/kthread.h> #include <linux/module.h> #include <linux/rtc.h> #include <linux/tee_drv.h> -#define RTC_INFO_VERSION 0x1 +#define RTC_INFO_VERSION 0x1 -#define TA_CMD_RTC_GET_INFO 0x0 -#define TA_CMD_RTC_GET_TIME 0x1 -#define TA_CMD_RTC_SET_TIME 0x2 -#define TA_CMD_RTC_GET_OFFSET 0x3 -#define TA_CMD_RTC_SET_OFFSET 0x4 +#define TA_RTC_FEATURE_CORRECTION BIT(0) +#define TA_RTC_FEATURE_ALARM BIT(1) +#define TA_RTC_FEATURE_WAKEUP_ALARM BIT(2) -#define TA_RTC_FEATURE_CORRECTION BIT(0) +enum rtc_optee_pta_cmd { + /* PTA_CMD_RTC_GET_INFO - Get RTC information + * + * [out] memref[0] RTC buffer memory reference containing a struct pta_rtc_info + */ + PTA_CMD_RTC_GET_INF = 0x0, + + /* + * PTA_CMD_RTC_GET_TIME - Get time from RTC + * + * [out] memref[0] RTC buffer memory reference containing a struct pta_rtc_time + */ + PTA_CMD_RTC_GET_TIME = 0x1, + + /* + * PTA_CMD_RTC_SET_TIME - Set time from RTC + * + * [in] memref[0] RTC buffer memory reference containing a struct pta_rtc_time to be + * used as RTC time + */ + PTA_CMD_RTC_SET_TIME = 0x2, + + /* + * PTA_CMD_RTC_GET_OFFSET - Get RTC offset + * + * [out] value[0].a RTC offset (signed 32bit value) + */ + PTA_CMD_RTC_GET_OFFSET = 0x3, + + /* + * PTA_CMD_RTC_SET_OFFSET - Set RTC offset + * + * [in] value[0].a RTC offset to be set (signed 32bit value) + */ + PTA_CMD_RTC_SET_OFFSET = 0x4, + + /* + * PTA_CMD_RTC_READ_ALARM - Read RTC alarm + * + * [out] memref[0] RTC buffer memory reference containing a struct pta_rtc_alarm + */ + PTA_CMD_RTC_READ_ALARM = 0x5, + + /* + * PTA_CMD_RTC_SET_ALARM - Set RTC alarm + * + * [in] memref[0] RTC buffer memory reference containing a struct pta_rtc_alarm to be + * used as RTC alarm + */ + PTA_CMD_RTC_SET_ALARM = 0x6, + + /* + * PTA_CMD_RTC_ENABLE_ALARM - Enable Alarm + * + * [in] value[0].a RTC IRQ flag (uint32_t), 0 to disable the alarm, 1 to enable + */ + PTA_CMD_RTC_ENABLE_ALARM = 0x7, + + /* + * PTA_CMD_RTC_WAIT_ALARM - Get alarm event + * + * [out] value[0].a RTC wait alarm return status (uint32_t): + * - 0: No alarm event + * - 1: Alarm event occurred + * - 2: Alarm event canceled + */ + PTA_CMD_RTC_WAIT_ALARM = 0x8, + + /* + * PTA_CMD_RTC_CANCEL_WAIT - Cancel wait for alarm event + */ + PTA_CMD_RTC_CANCEL_WAIT = 0x9, + + /* + * PTA_CMD_RTC_SET_WAKE_ALARM_STATUS - Set RTC wake alarm status flag + * + * [in] value[0].a RTC IRQ wake alarm flag (uint32_t), 0 to disable the wake up + * capability, 1 to enable. + */ + PTA_CMD_RTC_SET_WAKE_ALARM_STATUS = 0xA, +}; + +enum rtc_wait_alarm_status { + WAIT_ALARM_RESET = 0x0, + WAIT_ALARM_ALARM_OCCURRED = 0x1, + WAIT_ALARM_CANCELED = 0x2, +}; struct optee_rtc_time { u32 tm_sec; @@ -29,6 +114,12 @@ struct optee_rtc_time { u32 tm_wday; }; +struct optee_rtc_alarm { + u8 enabled; + u8 pending; + struct optee_rtc_time time; +}; + struct optee_rtc_info { u64 version; u64 features; @@ -41,15 +132,21 @@ struct optee_rtc_info { * @dev: OP-TEE based RTC device. * @ctx: OP-TEE context handler. * @session_id: RTC TA session identifier. + * @session2_id: RTC wait alarm session identifier. * @shm: Memory pool shared with RTC device. * @features: Bitfield of RTC features + * @alarm_task: RTC wait alamr task. + * @rtc: RTC device. */ struct optee_rtc { struct device *dev; struct tee_context *ctx; u32 session_id; + u32 session2_id; struct tee_shm *shm; u64 features; + struct task_struct *alarm_task; + struct rtc_device *rtc; }; static int optee_rtc_readtime(struct device *dev, struct rtc_time *tm) @@ -60,7 +157,7 @@ static int optee_rtc_readtime(struct device *dev, struct rtc_time *tm) struct tee_param param[4] = {0}; int ret; - inv_arg.func = TA_CMD_RTC_GET_TIME; + inv_arg.func = PTA_CMD_RTC_GET_TIME; inv_arg.session = priv->session_id; inv_arg.num_params = 4; @@ -97,19 +194,10 @@ static int optee_rtc_settime(struct device *dev, struct rtc_time *tm) struct optee_rtc *priv = dev_get_drvdata(dev); struct tee_ioctl_invoke_arg inv_arg = {0}; struct tee_param param[4] = {0}; - struct optee_rtc_time optee_tm; - void *rtc_data; + struct optee_rtc_time *optee_tm; int ret; - optee_tm.tm_sec = tm->tm_sec; - optee_tm.tm_min = tm->tm_min; - optee_tm.tm_hour = tm->tm_hour; - optee_tm.tm_mday = tm->tm_mday; - optee_tm.tm_mon = tm->tm_mon; - optee_tm.tm_year = tm->tm_year + 1900; - optee_tm.tm_wday = tm->tm_wday; - - inv_arg.func = TA_CMD_RTC_SET_TIME; + inv_arg.func = PTA_CMD_RTC_SET_TIME; inv_arg.session = priv->session_id; inv_arg.num_params = 4; @@ -117,11 +205,17 @@ static int optee_rtc_settime(struct device *dev, struct rtc_time *tm) param[0].u.memref.shm = priv->shm; param[0].u.memref.size = sizeof(struct optee_rtc_time); - rtc_data = tee_shm_get_va(priv->shm, 0); - if (IS_ERR(rtc_data)) - return PTR_ERR(rtc_data); + optee_tm = tee_shm_get_va(priv->shm, 0); + if (IS_ERR(optee_tm)) + return PTR_ERR(optee_tm); - memcpy(rtc_data, &optee_tm, sizeof(struct optee_rtc_time)); + optee_tm->tm_min = tm->tm_min; + optee_tm->tm_sec = tm->tm_sec; + optee_tm->tm_hour = tm->tm_hour; + optee_tm->tm_mday = tm->tm_mday; + optee_tm->tm_mon = tm->tm_mon; + optee_tm->tm_year = tm->tm_year + 1900; + optee_tm->tm_wday = tm->tm_wday; ret = tee_client_invoke_func(priv->ctx, &inv_arg, param); if (ret < 0 || inv_arg.ret != 0) @@ -140,7 +234,7 @@ static int optee_rtc_readoffset(struct device *dev, long *offset) if (!(priv->features & TA_RTC_FEATURE_CORRECTION)) return -EOPNOTSUPP; - inv_arg.func = TA_CMD_RTC_GET_OFFSET; + inv_arg.func = PTA_CMD_RTC_GET_OFFSET; inv_arg.session = priv->session_id; inv_arg.num_params = 4; @@ -165,7 +259,7 @@ static int optee_rtc_setoffset(struct device *dev, long offset) if (!(priv->features & TA_RTC_FEATURE_CORRECTION)) return -EOPNOTSUPP; - inv_arg.func = TA_CMD_RTC_SET_OFFSET; + inv_arg.func = PTA_CMD_RTC_SET_OFFSET; inv_arg.session = priv->session_id; inv_arg.num_params = 4; @@ -179,13 +273,228 @@ static int optee_rtc_setoffset(struct device *dev, long offset) return 0; } +static int optee_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) +{ + struct optee_rtc *priv = dev_get_drvdata(dev); + struct tee_ioctl_invoke_arg inv_arg = {0}; + struct optee_rtc_alarm *optee_alarm; + struct tee_param param[1] = {0}; + int ret; + + if (!(priv->features & TA_RTC_FEATURE_ALARM)) + return -EOPNOTSUPP; + + inv_arg.func = PTA_CMD_RTC_READ_ALARM; + inv_arg.session = priv->session_id; + inv_arg.num_params = 1; + + /* Fill invoke cmd params */ + param[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT; + param[0].u.memref.shm = priv->shm; + param[0].u.memref.size = sizeof(struct optee_rtc_alarm); + + ret = tee_client_invoke_func(priv->ctx, &inv_arg, param); + if (ret < 0 || inv_arg.ret != 0) + return ret ? ret : -EPROTO; + + optee_alarm = tee_shm_get_va(priv->shm, 0); + if (IS_ERR(optee_alarm)) + return PTR_ERR(optee_alarm); + + if (param[0].u.memref.size != sizeof(*optee_alarm)) + return -EPROTO; + + alarm->enabled = optee_alarm->enabled; + alarm->pending = optee_alarm->pending; + alarm->time.tm_sec = optee_alarm->time.tm_sec; + alarm->time.tm_min = optee_alarm->time.tm_min; + alarm->time.tm_hour = optee_alarm->time.tm_hour; + alarm->time.tm_mday = optee_alarm->time.tm_mday; + alarm->time.tm_mon = optee_alarm->time.tm_mon; + alarm->time.tm_year = optee_alarm->time.tm_year - 1900; + alarm->time.tm_wday = optee_alarm->time.tm_wday; + alarm->time.tm_yday = rtc_year_days(alarm->time.tm_mday, + alarm->time.tm_mon, + alarm->time.tm_year); + + return 0; +} + +static int optee_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) +{ + struct optee_rtc *priv = dev_get_drvdata(dev); + struct tee_ioctl_invoke_arg inv_arg = {0}; + struct optee_rtc_alarm *optee_alarm; + struct tee_param param[1] = {0}; + int ret; + + if (!(priv->features & TA_RTC_FEATURE_ALARM)) + return -EOPNOTSUPP; + + inv_arg.func = PTA_CMD_RTC_SET_ALARM; + inv_arg.session = priv->session_id; + inv_arg.num_params = 1; + + param[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT; + param[0].u.memref.shm = priv->shm; + param[0].u.memref.size = sizeof(struct optee_rtc_alarm); + + optee_alarm = tee_shm_get_va(priv->shm, 0); + if (IS_ERR(optee_alarm)) + return PTR_ERR(optee_alarm); + + optee_alarm->enabled = alarm->enabled; + optee_alarm->pending = alarm->pending; + optee_alarm->time.tm_sec = alarm->time.tm_sec; + optee_alarm->time.tm_min = alarm->time.tm_min; + optee_alarm->time.tm_hour = alarm->time.tm_hour; + optee_alarm->time.tm_mday = alarm->time.tm_mday; + optee_alarm->time.tm_mon = alarm->time.tm_mon; + optee_alarm->time.tm_year = alarm->time.tm_year + 1900; + optee_alarm->time.tm_wday = alarm->time.tm_wday; + + ret = tee_client_invoke_func(priv->ctx, &inv_arg, param); + if (ret < 0 || inv_arg.ret != 0) + return ret ? ret : -EPROTO; + + return 0; +} + +static int optee_rtc_enable_alarm(struct device *dev, unsigned int enabled) +{ + struct optee_rtc *priv = dev_get_drvdata(dev); + struct tee_ioctl_invoke_arg inv_arg = {0}; + struct tee_param param[1] = {0}; + int ret; + + if (!(priv->features & TA_RTC_FEATURE_ALARM)) + return -EOPNOTSUPP; + + inv_arg.func = PTA_CMD_RTC_ENABLE_ALARM; + inv_arg.session = priv->session_id; + inv_arg.num_params = 1; + + param[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT; + param[0].u.value.a = (bool)enabled; + + ret = tee_client_invoke_func(priv->ctx, &inv_arg, param); + if (ret < 0 || inv_arg.ret != 0) + return ret ? ret : -EPROTO; + + return 0; +} + static const struct rtc_class_ops optee_rtc_ops = { - .read_time = optee_rtc_readtime, - .set_time = optee_rtc_settime, - .set_offset = optee_rtc_setoffset, - .read_offset = optee_rtc_readoffset, + .read_time = optee_rtc_readtime, + .set_time = optee_rtc_settime, + .set_offset = optee_rtc_setoffset, + .read_offset = optee_rtc_readoffset, + .read_alarm = optee_rtc_read_alarm, + .set_alarm = optee_rtc_set_alarm, + .alarm_irq_enable = optee_rtc_enable_alarm, }; +static int optee_rtc_wait_alarm(struct device *dev, int *return_status) +{ + struct optee_rtc *priv = dev_get_drvdata(dev); + struct tee_ioctl_invoke_arg inv_arg = {0}; + struct tee_param param[1] = {0}; + int ret; + + if (!(priv->features & TA_RTC_FEATURE_ALARM)) + return -EOPNOTSUPP; + + inv_arg.func = PTA_CMD_RTC_WAIT_ALARM; + inv_arg.session = priv->session2_id; + inv_arg.num_params = 1; + + param[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT; + + ret = tee_client_invoke_func(priv->ctx, &inv_arg, param); + if (ret < 0 || inv_arg.ret != 0) + return ret ? ret : -EPROTO; + + *return_status = param[0].u.value.a; + + return 0; +} + +static int optee_rtc_cancel_wait_alarm(struct device *dev) +{ + struct optee_rtc *priv = dev_get_drvdata(dev); + struct tee_ioctl_invoke_arg inv_arg = {0}; + struct tee_param param[1] = {0}; + int ret; + + if (!(priv->features & TA_RTC_FEATURE_ALARM)) + return -EOPNOTSUPP; + + inv_arg.func = PTA_CMD_RTC_CANCEL_WAIT; + inv_arg.session = priv->session_id; + inv_arg.num_params = 0; + + ret = tee_client_invoke_func(priv->ctx, &inv_arg, param); + if (ret < 0 || inv_arg.ret != 0) + return ret ? ret : -EPROTO; + + return 0; +} + +static int optee_rtc_set_alarm_wake_status(struct device *dev, bool status) +{ + struct optee_rtc *priv = dev_get_drvdata(dev); + struct tee_ioctl_invoke_arg inv_arg = {0}; + struct tee_param param[1] = {0}; + int ret; + + if (!(priv->features & TA_RTC_FEATURE_ALARM)) + return -EOPNOTSUPP; + + inv_arg.func = PTA_CMD_RTC_SET_WAKE_ALARM_STATUS; + inv_arg.session = priv->session_id; + inv_arg.num_params = 1; + + param[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT; + param[0].u.value.a = status; + + ret = tee_client_invoke_func(priv->ctx, &inv_arg, param); + + if (ret < 0 || inv_arg.ret != 0) + return ret ? ret : -EPROTO; + + return 0; +} + +static int optee_rtc_handle_alarm_event(void *data) +{ + struct optee_rtc *priv = (struct optee_rtc *)data; + int wait_alarm_return_status = 0; + int ret; + + while (!kthread_should_stop()) { + ret = optee_rtc_wait_alarm(priv->dev, &wait_alarm_return_status); + if (ret) { + dev_err(priv->dev, "Failed to wait for alarm: %d\n", ret); + return ret; + } + switch (wait_alarm_return_status) { + case WAIT_ALARM_ALARM_OCCURRED: + dev_dbg(priv->dev, "Alarm occurred\n"); + rtc_update_irq(priv->rtc, 1, RTC_IRQF | RTC_AF); + break; + case WAIT_ALARM_CANCELED: + dev_dbg(priv->dev, "Alarm canceled\n"); + break; + default: + dev_warn(priv->dev, "Unknown return status: %d\n", + wait_alarm_return_status); + break; + } + } + + return 0; +} + static int optee_rtc_read_info(struct device *dev, struct rtc_device *rtc, u64 *features) { @@ -196,7 +505,7 @@ static int optee_rtc_read_info(struct device *dev, struct rtc_device *rtc, struct optee_rtc_time *tm; int ret; - inv_arg.func = TA_CMD_RTC_GET_INFO; + inv_arg.func = PTA_CMD_RTC_GET_INF; inv_arg.session = priv->session_id; inv_arg.num_params = 4; @@ -241,14 +550,13 @@ static int optee_ctx_match(struct tee_ioctl_version_data *ver, const void *data) static int optee_rtc_probe(struct device *dev) { struct tee_client_device *rtc_device = to_tee_client_device(dev); - struct tee_ioctl_open_session_arg sess_arg; + struct tee_ioctl_open_session_arg sess2_arg = {0}; + struct tee_ioctl_open_session_arg sess_arg = {0}; struct optee_rtc *priv; struct rtc_device *rtc; struct tee_shm *shm; int ret, err; - memset(&sess_arg, 0, sizeof(sess_arg)); - priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; @@ -257,12 +565,14 @@ static int optee_rtc_probe(struct device *dev) if (IS_ERR(rtc)) return PTR_ERR(rtc); + priv->rtc = rtc; + /* Open context with TEE driver */ priv->ctx = tee_client_open_context(NULL, optee_ctx_match, NULL, NULL); if (IS_ERR(priv->ctx)) return -ENODEV; - /* Open session with rtc Trusted App */ + /* Open first session with rtc Pseudo Trusted App */ export_uuid(sess_arg.uuid, &rtc_device->id.uuid); sess_arg.clnt_login = TEE_IOCTL_LOGIN_REE_KERNEL; @@ -274,6 +584,11 @@ static int optee_rtc_probe(struct device *dev) } priv->session_id = sess_arg.session; + /* + * Shared memory is used for passing an instance of either struct optee_rtc_info, + * struct optee_rtc_time or struct optee_rtc_alarm to OP-TEE service. + * The former is by definition large enough to cover both parameter cases. + */ shm = tee_shm_alloc_kernel_buf(priv->ctx, sizeof(struct optee_rtc_info)); if (IS_ERR(shm)) { dev_err(priv->dev, "tee_shm_alloc_kernel_buf failed\n"); @@ -293,19 +608,70 @@ static int optee_rtc_probe(struct device *dev) goto out_shm; } + /* Handle feature's related setup before registering to rtc framework */ + if (priv->features & TA_RTC_FEATURE_ALARM) { + priv->alarm_task = kthread_create(optee_rtc_handle_alarm_event, + priv, "rtc_alarm_evt"); + if (IS_ERR(priv->alarm_task)) { + dev_err(dev, "Failed to create alarm thread\n"); + err = PTR_ERR(priv->alarm_task); + goto out_shm; + } + + /* + * In case of supported alarm feature on optee side, we create a kthread + * that will, in a new optee session, call a PTA interface "rtc_wait_alarm". + * This call return in case of alarm and in case of canceled alarm. + * The new optee session is therefore only needed in this case as we cannot + * use the same session for parallel calls to optee PTA. + * Hence one session is reserved to wait for alarms and the other to make + * standard calls to RTC PTA. + */ + + /* Open second session with rtc Trusted App */ + export_uuid(sess2_arg.uuid, &rtc_device->id.uuid); + sess2_arg.clnt_login = TEE_IOCTL_LOGIN_REE_KERNEL; + + ret = tee_client_open_session(priv->ctx, &sess2_arg, NULL); + if (ret < 0 || sess2_arg.ret != 0) { + dev_err(dev, "tee_client_open_session failed, err: %x\n", sess2_arg.ret); + err = -EINVAL; + goto out_thrd; + } + priv->session2_id = sess2_arg.session; + + if (priv->features & TA_RTC_FEATURE_WAKEUP_ALARM) + device_init_wakeup(dev, true); + } + err = devm_rtc_register_device(rtc); if (err) - goto out_shm; + goto out_wk; /* - * We must clear this bit after registering because rtc_register_device - * will set it if it sees that .set_offset is provided. + * We must clear those bits after registering because registering a rtc_device + * will set them if it sees that .set_offset and .set_alarm are provided. */ if (!(priv->features & TA_RTC_FEATURE_CORRECTION)) clear_bit(RTC_FEATURE_CORRECTION, rtc->features); + if (!(priv->features & TA_RTC_FEATURE_ALARM)) + clear_bit(RTC_FEATURE_ALARM, rtc->features); - return 0; + /* Start the thread after the rtc is setup */ + if (priv->alarm_task) { + wake_up_process(priv->alarm_task); + dev_dbg(dev, "Wait alarm thread successfully started\n"); + } + return 0; +out_wk: + if (priv->features & TA_RTC_FEATURE_ALARM) { + device_init_wakeup(dev, false); + tee_client_close_session(priv->ctx, priv->session2_id); + } +out_thrd: + if (priv->features & TA_RTC_FEATURE_ALARM) + kthread_stop(priv->alarm_task); out_shm: tee_shm_free(priv->shm); out_sess: @@ -320,12 +686,34 @@ static int optee_rtc_remove(struct device *dev) { struct optee_rtc *priv = dev_get_drvdata(dev); + if (priv->features & TA_RTC_FEATURE_ALARM) { + optee_rtc_cancel_wait_alarm(dev); + kthread_stop(priv->alarm_task); + device_init_wakeup(dev, false); + tee_client_close_session(priv->ctx, priv->session2_id); + } + + tee_shm_free(priv->shm); tee_client_close_session(priv->ctx, priv->session_id); tee_client_close_context(priv->ctx); return 0; } +static int optee_rtc_suspend(struct device *dev) +{ + int res = optee_rtc_set_alarm_wake_status(dev, device_may_wakeup(dev)); + + if (res) { + dev_err(dev, "Unable to transmit wakeup information to optee rtc\n"); + return res; + } + + return 0; +} + +static DEFINE_SIMPLE_DEV_PM_OPS(optee_rtc_pm_ops, optee_rtc_suspend, NULL); + static const struct tee_client_device_id optee_rtc_id_table[] = { {UUID_INIT(0xf389f8c8, 0x845f, 0x496c, 0x8b, 0xbe, 0xd6, 0x4b, 0xd2, 0x4c, 0x92, 0xfd)}, @@ -341,6 +729,7 @@ static struct tee_client_driver optee_rtc_driver = { .bus = &tee_bus_type, .probe = optee_rtc_probe, .remove = optee_rtc_remove, + .pm = pm_sleep_ptr(&optee_rtc_pm_ops), }, }; diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 2e1ac0c42e93..bb4fe81d3d62 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -42,6 +42,7 @@ #define PCF2127_BIT_CTRL2_AF BIT(4) #define PCF2127_BIT_CTRL2_TSF2 BIT(5) #define PCF2127_BIT_CTRL2_WDTF BIT(6) +#define PCF2127_BIT_CTRL2_MSF BIT(7) /* Control register 3 */ #define PCF2127_REG_CTRL3 0x02 #define PCF2127_BIT_CTRL3_BLIE BIT(0) @@ -96,7 +97,8 @@ #define PCF2127_CTRL2_IRQ_MASK ( \ PCF2127_BIT_CTRL2_AF | \ PCF2127_BIT_CTRL2_WDTF | \ - PCF2127_BIT_CTRL2_TSF2) + PCF2127_BIT_CTRL2_TSF2 | \ + PCF2127_BIT_CTRL2_MSF) #define PCF2127_MAX_TS_SUPPORTED 4 @@ -606,6 +608,21 @@ static int pcf2127_watchdog_init(struct device *dev, struct pcf2127 *pcf2127) set_bit(WDOG_HW_RUNNING, &pcf2127->wdd.status); } + /* + * When using interrupt pin (INT A) as watchdog output, only allow + * watchdog interrupt (PCF2131_BIT_INT_WD_CD) and disable (mask) all + * other interrupts. + */ + if (pcf2127->cfg->type == PCF2131) { + ret = regmap_write(pcf2127->regmap, + PCF2131_REG_INT_A_MASK1, + PCF2131_BIT_INT_BLIE | + PCF2131_BIT_INT_BIE | + PCF2131_BIT_INT_AIE | + PCF2131_BIT_INT_SI | + PCF2131_BIT_INT_MI); + } + return devm_watchdog_register_device(dev, &pcf2127->wdd); } diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 79b2a16f15ad..291c0ccb0acd 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -331,7 +331,7 @@ static const struct rtc_class_ops s3c_rtcops = { .alarm_irq_enable = s3c_rtc_setaie, }; -static void s3c24xx_rtc_enable(struct s3c_rtc *info) +static void s3c6410_rtc_enable(struct s3c_rtc *info) { unsigned int con, tmp; @@ -361,19 +361,6 @@ static void s3c24xx_rtc_enable(struct s3c_rtc *info) } } -static void s3c24xx_rtc_disable(struct s3c_rtc *info) -{ - unsigned int con; - - con = readw(info->base + S3C2410_RTCCON); - con &= ~S3C2410_RTCCON_RTCEN; - writew(con, info->base + S3C2410_RTCCON); - - con = readb(info->base + S3C2410_TICNT); - con &= ~S3C2410_TICNT_ENABLE; - writeb(con, info->base + S3C2410_TICNT); -} - static void s3c6410_rtc_disable(struct s3c_rtc *info) { unsigned int con; @@ -538,53 +525,21 @@ static int s3c_rtc_resume(struct device *dev) #endif static SIMPLE_DEV_PM_OPS(s3c_rtc_pm_ops, s3c_rtc_suspend, s3c_rtc_resume); -static void s3c24xx_rtc_irq(struct s3c_rtc *info, int mask) -{ - rtc_update_irq(info->rtc, 1, RTC_AF | RTC_IRQF); -} - static void s3c6410_rtc_irq(struct s3c_rtc *info, int mask) { rtc_update_irq(info->rtc, 1, RTC_AF | RTC_IRQF); writeb(mask, info->base + S3C2410_INTP); } -static const struct s3c_rtc_data s3c2410_rtc_data = { - .irq_handler = s3c24xx_rtc_irq, - .enable = s3c24xx_rtc_enable, - .disable = s3c24xx_rtc_disable, -}; - -static const struct s3c_rtc_data s3c2416_rtc_data = { - .irq_handler = s3c24xx_rtc_irq, - .enable = s3c24xx_rtc_enable, - .disable = s3c24xx_rtc_disable, -}; - -static const struct s3c_rtc_data s3c2443_rtc_data = { - .irq_handler = s3c24xx_rtc_irq, - .enable = s3c24xx_rtc_enable, - .disable = s3c24xx_rtc_disable, -}; - static const struct s3c_rtc_data s3c6410_rtc_data = { .needs_src_clk = true, .irq_handler = s3c6410_rtc_irq, - .enable = s3c24xx_rtc_enable, + .enable = s3c6410_rtc_enable, .disable = s3c6410_rtc_disable, }; static const __maybe_unused struct of_device_id s3c_rtc_dt_match[] = { { - .compatible = "samsung,s3c2410-rtc", - .data = &s3c2410_rtc_data, - }, { - .compatible = "samsung,s3c2416-rtc", - .data = &s3c2416_rtc_data, - }, { - .compatible = "samsung,s3c2443-rtc", - .data = &s3c2443_rtc_data, - }, { .compatible = "samsung,s3c6410-rtc", .data = &s3c6410_rtc_data, }, { diff --git a/drivers/rtc/rtc-s3c.h b/drivers/rtc/rtc-s3c.h index 3552914aa611..11d7a1255ce4 100644 --- a/drivers/rtc/rtc-s3c.h +++ b/drivers/rtc/rtc-s3c.h @@ -21,25 +21,6 @@ #define S3C2443_RTCCON_TICSEL (1 << 4) #define S3C64XX_RTCCON_TICEN (1 << 8) -#define S3C2410_TICNT S3C2410_RTCREG(0x44) -#define S3C2410_TICNT_ENABLE (1 << 7) - -/* S3C2443: tick count is 15 bit wide - * TICNT[6:0] contains upper 7 bits - * TICNT1[7:0] contains lower 8 bits - */ -#define S3C2443_TICNT_PART(x) ((x & 0x7f00) >> 8) -#define S3C2443_TICNT1 S3C2410_RTCREG(0x4C) -#define S3C2443_TICNT1_PART(x) (x & 0xff) - -/* S3C2416: tick count is 32 bit wide - * TICNT[6:0] contains bits [14:8] - * TICNT1[7:0] contains lower 8 bits - * TICNT2[16:0] contains upper 17 bits - */ -#define S3C2416_TICNT2 S3C2410_RTCREG(0x48) -#define S3C2416_TICNT2_PART(x) ((x & 0xffff8000) >> 15) - #define S3C2410_RTCALM S3C2410_RTCREG(0x50) #define S3C2410_RTCALM_ALMEN (1 << 6) #define S3C2410_RTCALM_YEAREN (1 << 5) diff --git a/drivers/rtc/rtc-sd2405al.c b/drivers/rtc/rtc-sd2405al.c index 00c3033e8079..708ea5d964de 100644 --- a/drivers/rtc/rtc-sd2405al.c +++ b/drivers/rtc/rtc-sd2405al.c @@ -5,7 +5,9 @@ * Datasheet: * https://image.dfrobot.com/image/data/TOY0021/SD2405AL%20datasheet%20(Angelo%20v0.1).pdf * - * Copyright (C) 2024 Tóth János <gomba007@gmail.com> + * I2C slave address: 0x32 + * + * Copyright (C) 2024-2025 Tóth János <gomba007@gmail.com> */ #include <linux/bcd.h> diff --git a/drivers/rtc/rtc-spacemit-p1.c b/drivers/rtc/rtc-spacemit-p1.c new file mode 100644 index 000000000000..43ab62494bb4 --- /dev/null +++ b/drivers/rtc/rtc-spacemit-p1.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Driver for the RTC found in the SpacemiT P1 PMIC + * + * Copyright (C) 2025 by RISCstar Solutions Corporation. All rights reserved. + */ + +#include <linux/bits.h> +#include <linux/device.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/rtc.h> + +#define MOD_NAME "spacemit-p1-rtc" + +/* + * Six consecutive 1-byte registers hold the seconds, minutes, hours, + * day-of-month, month, and year (respectively). + * + * The range of values in these registers is: + * seconds 0-59 + * minutes 0-59 + * hours 0-59 + * day 0-30 (struct tm is 1-31) + * month 0-11 + * year years since 2000 (struct tm is since 1900) + * + * Note that the day and month must be converted after reading and + * before writing. + */ +#define RTC_TIME 0x0d /* Offset of the seconds register */ + +#define RTC_CTRL 0x1d +#define RTC_EN BIT(2) + +/* Number of attempts to read a consistent time stamp before giving up */ +#define RTC_READ_TRIES 20 /* At least 1 */ + +struct p1_rtc { + struct regmap *regmap; + struct rtc_device *rtc; +}; + +/* + * The P1 hardware documentation states that the register values are + * latched to ensure a consistent time snapshot within the registers, + * but these are in fact unstable due to a bug in the hardware design. + * So we loop until we get two identical readings. + */ +static int p1_rtc_read_time(struct device *dev, struct rtc_time *t) +{ + struct p1_rtc *p1 = dev_get_drvdata(dev); + struct regmap *regmap = p1->regmap; + u32 count = RTC_READ_TRIES; + u8 seconds; + u8 time[6]; + int ret; + + if (!regmap_test_bits(regmap, RTC_CTRL, RTC_EN)) + return -EINVAL; /* RTC is disabled */ + + ret = regmap_bulk_read(regmap, RTC_TIME, time, sizeof(time)); + if (ret) + return ret; + + do { + seconds = time[0]; + ret = regmap_bulk_read(regmap, RTC_TIME, time, sizeof(time)); + if (ret) + return ret; + } while (time[0] != seconds && --count); + + if (!count) + return -EIO; /* Unable to get a consistent result */ + + t->tm_sec = time[0] & GENMASK(5, 0); + t->tm_min = time[1] & GENMASK(5, 0); + t->tm_hour = time[2] & GENMASK(4, 0); + t->tm_mday = (time[3] & GENMASK(4, 0)) + 1; + t->tm_mon = time[4] & GENMASK(3, 0); + t->tm_year = (time[5] & GENMASK(5, 0)) + 100; + + return 0; +} + +/* + * The P1 hardware documentation states that values in the registers are + * latched so when written they represent a consistent time snapshot. + * Nevertheless, this is not guaranteed by the implementation, so we must + * disable the RTC while updating it. + */ +static int p1_rtc_set_time(struct device *dev, struct rtc_time *t) +{ + struct p1_rtc *p1 = dev_get_drvdata(dev); + struct regmap *regmap = p1->regmap; + u8 time[6]; + int ret; + + time[0] = t->tm_sec; + time[1] = t->tm_min; + time[2] = t->tm_hour; + time[3] = t->tm_mday - 1; + time[4] = t->tm_mon; + time[5] = t->tm_year - 100; + + /* Disable the RTC to update; re-enable again when done */ + ret = regmap_clear_bits(regmap, RTC_CTRL, RTC_EN); + if (ret) + return ret; + + /* If something goes wrong, leave the RTC disabled */ + ret = regmap_bulk_write(regmap, RTC_TIME, time, sizeof(time)); + if (ret) + return ret; + + return regmap_set_bits(regmap, RTC_CTRL, RTC_EN); +} + +static const struct rtc_class_ops p1_rtc_class_ops = { + .read_time = p1_rtc_read_time, + .set_time = p1_rtc_set_time, +}; + +static int p1_rtc_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct rtc_device *rtc; + struct p1_rtc *p1; + + p1 = devm_kzalloc(dev, sizeof(*p1), GFP_KERNEL); + if (!p1) + return -ENOMEM; + dev_set_drvdata(dev, p1); + + p1->regmap = dev_get_regmap(dev->parent, NULL); + if (!p1->regmap) + return dev_err_probe(dev, -ENODEV, "failed to get regmap\n"); + + rtc = devm_rtc_allocate_device(dev); + if (IS_ERR(rtc)) + return dev_err_probe(dev, PTR_ERR(rtc), + "error allocating device\n"); + p1->rtc = rtc; + + rtc->ops = &p1_rtc_class_ops; + rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; + rtc->range_max = RTC_TIMESTAMP_END_2063; + + clear_bit(RTC_FEATURE_ALARM, rtc->features); + clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->features); + + return devm_rtc_register_device(rtc); +} + +static struct platform_driver p1_rtc_driver = { + .probe = p1_rtc_probe, + .driver = { + .name = MOD_NAME, + }, +}; + +module_platform_driver(p1_rtc_driver); + +MODULE_DESCRIPTION("SpacemiT P1 RTC driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:" MOD_NAME); diff --git a/drivers/rtc/rtc-tps6586x.c b/drivers/rtc/rtc-tps6586x.c index 54c8429b16bf..76ecf7b798f0 100644 --- a/drivers/rtc/rtc-tps6586x.c +++ b/drivers/rtc/rtc-tps6586x.c @@ -258,6 +258,7 @@ static int tps6586x_rtc_probe(struct platform_device *pdev) irq_set_status_flags(rtc->irq, IRQ_NOAUTOEN); + rtc->irq_en = true; ret = devm_request_threaded_irq(&pdev->dev, rtc->irq, NULL, tps6586x_rtc_irq, IRQF_ONESHOT, diff --git a/drivers/rtc/rtc-x1205.c b/drivers/rtc/rtc-x1205.c index 4bcd7ca32f27..b8a0fccef14e 100644 --- a/drivers/rtc/rtc-x1205.c +++ b/drivers/rtc/rtc-x1205.c @@ -669,7 +669,7 @@ static const struct i2c_device_id x1205_id[] = { MODULE_DEVICE_TABLE(i2c, x1205_id); static const struct of_device_id x1205_dt_ids[] = { - { .compatible = "xircom,x1205", }, + { .compatible = "xicor,x1205", }, {}, }; MODULE_DEVICE_TABLE(of, x1205_dt_ids); diff --git a/drivers/rtc/rtc-zynqmp.c b/drivers/rtc/rtc-zynqmp.c index f39102b66eac..3baa2b481d9f 100644 --- a/drivers/rtc/rtc-zynqmp.c +++ b/drivers/rtc/rtc-zynqmp.c @@ -277,6 +277,10 @@ static irqreturn_t xlnx_rtc_interrupt(int irq, void *id) static int xlnx_rtc_probe(struct platform_device *pdev) { struct xlnx_rtc_dev *xrtcdev; + bool is_alarm_set = false; + u32 pending_alrm_irq; + u32 current_time; + u32 alarm_time; int ret; xrtcdev = devm_kzalloc(&pdev->dev, sizeof(*xrtcdev), GFP_KERNEL); @@ -296,6 +300,17 @@ static int xlnx_rtc_probe(struct platform_device *pdev) if (IS_ERR(xrtcdev->reg_base)) return PTR_ERR(xrtcdev->reg_base); + /* Clear any pending alarm interrupts from previous kernel/boot */ + pending_alrm_irq = readl(xrtcdev->reg_base + RTC_INT_STS) & RTC_INT_ALRM; + if (pending_alrm_irq) + writel(pending_alrm_irq, xrtcdev->reg_base + RTC_INT_STS); + + /* Check if a valid alarm is already set from previous kernel/boot */ + alarm_time = readl(xrtcdev->reg_base + RTC_ALRM); + current_time = readl(xrtcdev->reg_base + RTC_CUR_TM); + if (alarm_time > current_time && alarm_time != 0) + is_alarm_set = true; + xrtcdev->alarm_irq = platform_get_irq_byname(pdev, "alarm"); if (xrtcdev->alarm_irq < 0) return xrtcdev->alarm_irq; @@ -337,6 +352,10 @@ static int xlnx_rtc_probe(struct platform_device *pdev) xlnx_init_rtc(xrtcdev); + /* Re-enable alarm interrupt if a valid alarm was found */ + if (is_alarm_set) + writel(RTC_INT_ALRM, xrtcdev->reg_base + RTC_INT_EN); + device_init_wakeup(&pdev->dev, true); return devm_rtc_register_device(xrtcdev->rtc); diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c index 2c72da6b8cf0..7f1ad305eee6 100644 --- a/drivers/scsi/mvsas/mv_init.c +++ b/drivers/scsi/mvsas/mv_init.c @@ -124,7 +124,7 @@ static void mvs_free(struct mvs_info *mvi) if (mvi->shost) scsi_host_put(mvi->shost); list_for_each_entry(mwq, &mvi->wq_list, entry) - cancel_delayed_work(&mwq->work_q); + cancel_delayed_work_sync(&mwq->work_q); kfree(mvi->rsvd_tags); kfree(mvi); } diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 604e66bead1e..cb95b7b12051 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -4890,7 +4890,9 @@ struct purex_item { struct purex_item *pkt); atomic_t in_use; uint16_t size; - uint8_t iocb[] __counted_by(size); + struct { + uint8_t iocb[64]; + } iocb; }; #include "qla_edif.h" @@ -5099,6 +5101,7 @@ typedef struct scsi_qla_host { struct list_head head; spinlock_t lock; } purex_list; + struct purex_item default_item; struct name_list_extended gnl; /* Count of active session/fcport */ @@ -5127,11 +5130,6 @@ typedef struct scsi_qla_host { #define DPORT_DIAG_IN_PROGRESS BIT_0 #define DPORT_DIAG_CHIP_RESET_IN_PROGRESS BIT_1 uint16_t dport_status; - - /* Must be last --ends in a flexible-array member. */ - TRAILING_OVERLAP(struct purex_item, default_item, iocb, - uint8_t __default_item_iocb[QLA_DEFAULT_PAYLOAD_SIZE]; - ); } scsi_qla_host_t; struct qla27xx_image_status { diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 4559b490614d..c4c6b5c6658c 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1077,17 +1077,17 @@ static struct purex_item * qla24xx_alloc_purex_item(scsi_qla_host_t *vha, uint16_t size) { struct purex_item *item = NULL; + uint8_t item_hdr_size = sizeof(*item); if (size > QLA_DEFAULT_PAYLOAD_SIZE) { - item = kzalloc(struct_size(item, iocb, size), GFP_ATOMIC); + item = kzalloc(item_hdr_size + + (size - QLA_DEFAULT_PAYLOAD_SIZE), GFP_ATOMIC); } else { if (atomic_inc_return(&vha->default_item.in_use) == 1) { item = &vha->default_item; goto initialize_purex_header; } else { - item = kzalloc( - struct_size(item, iocb, QLA_DEFAULT_PAYLOAD_SIZE), - GFP_ATOMIC); + item = kzalloc(item_hdr_size, GFP_ATOMIC); } } if (!item) { @@ -1127,16 +1127,17 @@ qla24xx_queue_purex_item(scsi_qla_host_t *vha, struct purex_item *pkt, * @vha: SCSI driver HA context * @pkt: ELS packet */ -static struct purex_item * -qla24xx_copy_std_pkt(struct scsi_qla_host *vha, void *pkt) +static struct purex_item +*qla24xx_copy_std_pkt(struct scsi_qla_host *vha, void *pkt) { struct purex_item *item; - item = qla24xx_alloc_purex_item(vha, QLA_DEFAULT_PAYLOAD_SIZE); + item = qla24xx_alloc_purex_item(vha, + QLA_DEFAULT_PAYLOAD_SIZE); if (!item) return item; - memcpy(&item->iocb, pkt, QLA_DEFAULT_PAYLOAD_SIZE); + memcpy(&item->iocb, pkt, sizeof(item->iocb)); return item; } diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 065f9bcca26f..316594aa40cc 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -1308,7 +1308,7 @@ void qla2xxx_process_purls_iocb(void **pkt, struct rsp_que **rsp) ql_dbg(ql_dbg_unsol, vha, 0x2121, "PURLS OP[%01x] size %d xchg addr 0x%x portid %06x\n", - item->iocb[3], item->size, uctx->exchange_address, + item->iocb.iocb[3], item->size, uctx->exchange_address, fcport->d_id.b24); /* +48 0 1 2 3 4 5 6 7 8 9 A B C D E F * ----- ----------------------------------------------- diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index cb56d2af6cfa..5ffd94586652 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -6459,10 +6459,9 @@ dealloc: void qla24xx_free_purex_item(struct purex_item *item) { - if (item == &item->vha->default_item) { + if (item == &item->vha->default_item) memset(&item->vha->default_item, 0, sizeof(struct purex_item)); - memset(&item->vha->__default_item_iocb, 0, QLA_DEFAULT_PAYLOAD_SIZE); - } else + else kfree(item); } diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 0904ecae253a..b19acd662726 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -2774,7 +2774,7 @@ static ssize_t target_lu_gp_members_show(struct config_item *item, char *page) config_item_name(&dev->dev_group.cg_item)); cur_len++; /* Extra byte for NULL terminator */ - if ((cur_len + len) > PAGE_SIZE) { + if ((cur_len + len) > PAGE_SIZE || cur_len > LU_GROUP_NAME_BUF) { pr_warn("Ran out of lu_gp_show_attr" "_members buffer\n"); break; diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c index 0086816b27cd..c040afc6668e 100644 --- a/drivers/ufs/core/ufs-sysfs.c +++ b/drivers/ufs/core/ufs-sysfs.c @@ -1949,7 +1949,7 @@ static umode_t ufs_sysfs_hid_is_visible(struct kobject *kobj, return hba->dev_info.hid_sup ? attr->mode : 0; } -static const struct attribute_group ufs_sysfs_hid_group = { +const struct attribute_group ufs_sysfs_hid_group = { .name = "hid", .attrs = ufs_sysfs_hid, .is_visible = ufs_sysfs_hid_is_visible, diff --git a/drivers/ufs/core/ufs-sysfs.h b/drivers/ufs/core/ufs-sysfs.h index 8d94af3b8077..6efb82a082fd 100644 --- a/drivers/ufs/core/ufs-sysfs.h +++ b/drivers/ufs/core/ufs-sysfs.h @@ -14,5 +14,6 @@ void ufs_sysfs_remove_nodes(struct device *dev); extern const struct attribute_group ufs_sysfs_unit_descriptor_group; extern const struct attribute_group ufs_sysfs_lun_attributes_group; +extern const struct attribute_group ufs_sysfs_hid_group; #endif diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index d9632d7c5f01..8339fec975b9 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6684,6 +6684,14 @@ static void ufshcd_err_handler(struct work_struct *work) } spin_unlock_irqrestore(hba->host->host_lock, flags); + ufshcd_rpm_get_noresume(hba); + if (hba->pm_op_in_progress) { + ufshcd_link_recovery(hba); + ufshcd_rpm_put(hba); + return; + } + ufshcd_rpm_put(hba); + ufshcd_err_handling_prepare(hba); spin_lock_irqsave(hba->host->host_lock, flags); @@ -8489,6 +8497,8 @@ static int ufs_get_device_desc(struct ufs_hba *hba) DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP) & UFS_DEV_HID_SUPPORT; + sysfs_update_group(&hba->dev->kobj, &ufs_sysfs_hid_group); + model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME]; err = ufshcd_read_string_desc(hba, model_index, @@ -10677,6 +10687,9 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) */ spin_lock_init(&hba->clk_gating.lock); + /* Initialize mutex for PM QoS request synchronization */ + mutex_init(&hba->pm_qos_mutex); + /* * Set the default power management level for runtime and system PM. * Host controller drivers can override them in their @@ -10765,9 +10778,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) mutex_init(&hba->wb_mutex); - /* Initialize mutex for PM QoS request synchronization */ - mutex_init(&hba->pm_qos_mutex); - init_rwsem(&hba->clk_scaling_lock); ufshcd_init_clk_gating(hba); diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c index c14c9a035ee0..a4f5321eafae 100644 --- a/fs/hpfs/anode.c +++ b/fs/hpfs/anode.c @@ -27,7 +27,7 @@ secno hpfs_bplus_lookup(struct super_block *s, struct inode *inode, a = le32_to_cpu(btree->u.internal[i].down); brelse(bh); if (!(anode = hpfs_map_anode(s, a, &bh))) return -1; - btree = &anode->btree; + btree = GET_BTREE_PTR(&anode->btree); goto go_down; } hpfs_error(s, "sector %08x not found in internal anode %08x", sec, a); @@ -69,12 +69,13 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi int n; unsigned fs; int c1, c2 = 0; + if (fnod) { if (!(fnode = hpfs_map_fnode(s, node, &bh))) return -1; - btree = &fnode->btree; + btree = GET_BTREE_PTR(&fnode->btree); } else { if (!(anode = hpfs_map_anode(s, node, &bh))) return -1; - btree = &anode->btree; + btree = GET_BTREE_PTR(&anode->btree); } a = node; go_down: @@ -91,7 +92,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi if (hpfs_sb(s)->sb_chk) if (hpfs_stop_cycles(s, a, &c1, &c2, "hpfs_add_sector_to_btree #1")) return -1; if (!(anode = hpfs_map_anode(s, a, &bh))) return -1; - btree = &anode->btree; + btree = GET_BTREE_PTR(&anode->btree); goto go_down; } if (n >= 0) { @@ -151,7 +152,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi } brelse(bh); bh = bh1; - btree = &anode->btree; + btree = GET_BTREE_PTR(&anode->btree); } btree->n_free_nodes--; n = btree->n_used_nodes++; le16_add_cpu(&btree->first_free, 12); @@ -168,10 +169,10 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi if (hpfs_stop_cycles(s, up, &c1, &c2, "hpfs_add_sector_to_btree #2")) return -1; if (up != node || !fnod) { if (!(anode = hpfs_map_anode(s, up, &bh))) return -1; - btree = &anode->btree; + btree = GET_BTREE_PTR(&anode->btree); } else { if (!(fnode = hpfs_map_fnode(s, up, &bh))) return -1; - btree = &fnode->btree; + btree = GET_BTREE_PTR(&fnode->btree); } if (btree->n_free_nodes) { btree->n_free_nodes--; n = btree->n_used_nodes++; @@ -206,8 +207,8 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi anode->btree.n_used_nodes = 1; anode->btree.n_free_nodes = 59; anode->btree.first_free = cpu_to_le16(16); - anode->btree.u.internal[0].down = cpu_to_le32(a); - anode->btree.u.internal[0].file_secno = cpu_to_le32(-1); + GET_BTREE_PTR(&anode->btree)->u.internal[0].down = cpu_to_le32(a); + GET_BTREE_PTR(&anode->btree)->u.internal[0].file_secno = cpu_to_le32(-1); mark_buffer_dirty(bh); brelse(bh); if ((anode = hpfs_map_anode(s, a, &bh))) { @@ -229,20 +230,20 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi brelse(bh2); return -1; } - btree = &anode->btree; + btree = GET_BTREE_PTR(&anode->btree); } else { if (!(fnode = hpfs_map_fnode(s, node, &bh))) { brelse(bh2); return -1; } - btree = &fnode->btree; + btree = GET_BTREE_PTR(&fnode->btree); } ranode->up = cpu_to_le32(node); memcpy(&ranode->btree, btree, le16_to_cpu(btree->first_free)); if (fnod) ranode->btree.flags |= BP_fnode_parent; - ranode->btree.n_free_nodes = (bp_internal(&ranode->btree) ? 60 : 40) - ranode->btree.n_used_nodes; - if (bp_internal(&ranode->btree)) for (n = 0; n < ranode->btree.n_used_nodes; n++) { + GET_BTREE_PTR(&ranode->btree)->n_free_nodes = (bp_internal(GET_BTREE_PTR(&ranode->btree)) ? 60 : 40) - GET_BTREE_PTR(&ranode->btree)->n_used_nodes; + if (bp_internal(GET_BTREE_PTR(&ranode->btree))) for (n = 0; n < GET_BTREE_PTR(&ranode->btree)->n_used_nodes; n++) { struct anode *unode; if ((unode = hpfs_map_anode(s, le32_to_cpu(ranode->u.internal[n].down), &bh1))) { unode->up = cpu_to_le32(ra); @@ -291,7 +292,7 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree) if (hpfs_stop_cycles(s, ano, &d1, &d2, "hpfs_remove_btree #1")) return; if (!(anode = hpfs_map_anode(s, ano, &bh))) return; - btree1 = &anode->btree; + btree1 = GET_BTREE_PTR(&anode->btree); level++; pos = 0; } @@ -307,7 +308,7 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree) ano = le32_to_cpu(anode->up); if (--level) { if (!(anode = hpfs_map_anode(s, ano, &bh))) return; - btree1 = &anode->btree; + btree1 = GET_BTREE_PTR(&anode->btree); } else btree1 = btree; for (i = 0; i < btree1->n_used_nodes; i++) { if (le32_to_cpu(btree1->u.internal[i].down) == oano) { @@ -332,7 +333,7 @@ static secno anode_lookup(struct super_block *s, anode_secno a, unsigned sec) struct anode *anode; struct buffer_head *bh; if (!(anode = hpfs_map_anode(s, a, &bh))) return -1; - return hpfs_bplus_lookup(s, NULL, &anode->btree, sec, bh); + return hpfs_bplus_lookup(s, NULL, GET_BTREE_PTR(&anode->btree), sec, bh); } int hpfs_ea_read(struct super_block *s, secno a, int ano, unsigned pos, @@ -388,7 +389,7 @@ void hpfs_ea_remove(struct super_block *s, secno a, int ano, unsigned len) struct buffer_head *bh; if (ano) { if (!(anode = hpfs_map_anode(s, a, &bh))) return; - hpfs_remove_btree(s, &anode->btree); + hpfs_remove_btree(s, GET_BTREE_PTR(&anode->btree)); brelse(bh); hpfs_free_sectors(s, a, 1); } else hpfs_free_sectors(s, a, (len + 511) >> 9); @@ -407,10 +408,10 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs) int c1, c2 = 0; if (fno) { if (!(fnode = hpfs_map_fnode(s, f, &bh))) return; - btree = &fnode->btree; + btree = GET_BTREE_PTR(&fnode->btree); } else { if (!(anode = hpfs_map_anode(s, f, &bh))) return; - btree = &anode->btree; + btree = GET_BTREE_PTR(&anode->btree); } if (!secs) { hpfs_remove_btree(s, btree); @@ -448,7 +449,7 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs) if (hpfs_stop_cycles(s, node, &c1, &c2, "hpfs_truncate_btree")) return; if (!(anode = hpfs_map_anode(s, node, &bh))) return; - btree = &anode->btree; + btree = GET_BTREE_PTR(&anode->btree); } nodes = btree->n_used_nodes + btree->n_free_nodes; for (i = 0; i < btree->n_used_nodes; i++) @@ -485,7 +486,7 @@ void hpfs_remove_fnode(struct super_block *s, fnode_secno fno) struct extended_attribute *ea; struct extended_attribute *ea_end; if (!(fnode = hpfs_map_fnode(s, fno, &bh))) return; - if (!fnode_is_dir(fnode)) hpfs_remove_btree(s, &fnode->btree); + if (!fnode_is_dir(fnode)) hpfs_remove_btree(s, GET_BTREE_PTR(&fnode->btree)); else hpfs_remove_dtree(s, le32_to_cpu(fnode->u.external[0].disk_secno)); ea_end = fnode_end_ea(fnode); for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c index 102ba18e561f..2149d3ca530b 100644 --- a/fs/hpfs/ea.c +++ b/fs/hpfs/ea.c @@ -41,7 +41,7 @@ void hpfs_ea_ext_remove(struct super_block *s, secno a, int ano, unsigned len) struct buffer_head *bh; struct anode *anode; if ((anode = hpfs_map_anode(s, a, &bh))) { - hpfs_remove_btree(s, &anode->btree); + hpfs_remove_btree(s, GET_BTREE_PTR(&anode->btree)); brelse(bh); hpfs_free_sectors(s, a, 1); } diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 263b5bbe1849..29e876705369 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -51,7 +51,9 @@ static secno hpfs_bmap(struct inode *inode, unsigned file_secno, unsigned *n_sec return hpfs_inode->i_disk_sec + n; } if (!(fnode = hpfs_map_fnode(inode->i_sb, inode->i_ino, &bh))) return 0; - disk_secno = hpfs_bplus_lookup(inode->i_sb, inode, &fnode->btree, file_secno, bh); + disk_secno = hpfs_bplus_lookup(inode->i_sb, inode, + GET_BTREE_PTR(&fnode->btree), + file_secno, bh); if (disk_secno == -1) return 0; if (hpfs_chk_sectors(inode->i_sb, disk_secno, 1, "bmap")) return 0; n = file_secno - hpfs_inode->i_file_sec; diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h index 281dec8f636b..353f73c914d9 100644 --- a/fs/hpfs/hpfs.h +++ b/fs/hpfs/hpfs.h @@ -394,27 +394,45 @@ enum { BP_binary_search = 0x40, BP_internal = 0x80 }; + +/** + * GET_BTREE_PTR() - Get a pointer to struct bplus_header + * + * Wrapper around container_of() to retrieve a pointer to struct + * bplus_header from a pointer to struct bplus_header_fixed. + * + * @ptr: Pointer to struct bplus_header_fixed. + * + */ +#define GET_BTREE_PTR(ptr) \ + container_of(ptr, struct bplus_header, __hdr) + struct bplus_header { - u8 flags; /* bit 0 - high bit of first free entry offset + /* New members MUST be added within the struct_group() macro below. */ + struct_group_tagged(bplus_header_fixed, __hdr, + u8 flags; /* bit 0 - high bit of first free entry offset bit 5 - we're pointed to by an fnode, the data btree or some ea or the main ea bootage pointer ea_secno bit 6 - suggest binary search (unused) bit 7 - 1 -> (internal) tree of anodes 0 -> (leaf) list of extents */ - u8 fill[3]; - u8 n_free_nodes; /* free nodes in following array */ - u8 n_used_nodes; /* used nodes in following array */ - __le16 first_free; /* offset from start of header to + u8 fill[3]; + u8 n_free_nodes; /* free nodes in following array */ + u8 n_used_nodes; /* used nodes in following array */ + __le16 first_free; /* offset from start of header to first free node in array */ - union { - /* (internal) 2-word entries giving subtree pointers */ - DECLARE_FLEX_ARRAY(struct bplus_internal_node, internal); - /* (external) 3-word entries giving sector runs */ - DECLARE_FLEX_ARRAY(struct bplus_leaf_node, external); - } u; + ); + union { + /* (internal) 2-word entries giving subtree pointers */ + DECLARE_FLEX_ARRAY(struct bplus_internal_node, internal); + /* (external) 3-word entries giving sector runs */ + DECLARE_FLEX_ARRAY(struct bplus_leaf_node, external); + } u; }; +static_assert(offsetof(struct bplus_header, u.internal) == sizeof(struct bplus_header_fixed), + "struct member likely outside of struct_group_tagged()"); static inline bool bp_internal(struct bplus_header *bp) { @@ -453,7 +471,7 @@ struct fnode __le16 flags; /* bit 1 set -> ea_secno is an anode */ /* bit 8 set -> directory. first & only extent points to dnode. */ - struct bplus_header btree; /* b+ tree, 8 extents or 12 subtrees */ + struct bplus_header_fixed btree; /* b+ tree, 8 extents or 12 subtrees */ union { struct bplus_leaf_node external[8]; struct bplus_internal_node internal[12]; @@ -495,7 +513,7 @@ struct anode __le32 self; /* pointer to this anode */ __le32 up; /* parent anode or fnode */ - struct bplus_header btree; /* b+tree, 40 extents or 60 subtrees */ + struct bplus_header_fixed btree; /* b+tree, 40 extents or 60 subtrees */ union { struct bplus_leaf_node external[40]; struct bplus_internal_node internal[60]; diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index ecd9fccd1663..be73233502f8 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c @@ -178,14 +178,14 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea } if (!fnode_is_dir(fnode)) { if ((unsigned)fnode->btree.n_used_nodes + (unsigned)fnode->btree.n_free_nodes != - (bp_internal(&fnode->btree) ? 12 : 8)) { + (bp_internal(GET_BTREE_PTR(&fnode->btree)) ? 12 : 8)) { hpfs_error(s, "bad number of nodes in fnode %08lx", (unsigned long)ino); goto bail; } if (le16_to_cpu(fnode->btree.first_free) != - 8 + fnode->btree.n_used_nodes * (bp_internal(&fnode->btree) ? 8 : 12)) { + 8 + fnode->btree.n_used_nodes * (bp_internal(GET_BTREE_PTR(&fnode->btree)) ? 8 : 12)) { hpfs_error(s, "bad first_free pointer in fnode %08lx", (unsigned long)ino); @@ -233,12 +233,12 @@ struct anode *hpfs_map_anode(struct super_block *s, anode_secno ano, struct buff goto bail; } if ((unsigned)anode->btree.n_used_nodes + (unsigned)anode->btree.n_free_nodes != - (bp_internal(&anode->btree) ? 60 : 40)) { + (bp_internal(GET_BTREE_PTR(&anode->btree)) ? 60 : 40)) { hpfs_error(s, "bad number of nodes in anode %08x", ano); goto bail; } if (le16_to_cpu(anode->btree.first_free) != - 8 + anode->btree.n_used_nodes * (bp_internal(&anode->btree) ? 8 : 12)) { + 8 + anode->btree.n_used_nodes * (bp_internal(GET_BTREE_PTR(&anode->btree)) ? 8 : 12)) { hpfs_error(s, "bad first_free pointer in anode %08x", ano); goto bail; } diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index e3cdc421dfba..353e13a615f5 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -52,8 +52,10 @@ static struct dentry *hpfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, dee.fnode = cpu_to_le32(fno); dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(local_get_seconds(dir->i_sb)); result = new_inode(dir->i_sb); - if (!result) + if (!result) { + err = -ENOMEM; goto bail2; + } hpfs_init_inode(result); result->i_ino = fno; hpfs_i(result)->i_parent_dir = dir->i_ino; @@ -153,9 +155,10 @@ static int hpfs_create(struct mnt_idmap *idmap, struct inode *dir, dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(local_get_seconds(dir->i_sb)); result = new_inode(dir->i_sb); - if (!result) + if (!result) { + err = -ENOMEM; goto bail1; - + } hpfs_init_inode(result); result->i_ino = fno; result->i_mode |= S_IFREG; @@ -239,9 +242,10 @@ static int hpfs_mknod(struct mnt_idmap *idmap, struct inode *dir, dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(local_get_seconds(dir->i_sb)); result = new_inode(dir->i_sb); - if (!result) + if (!result) { + err = -ENOMEM; goto bail1; - + } hpfs_init_inode(result); result->i_ino = fno; hpfs_i(result)->i_parent_dir = dir->i_ino; @@ -314,8 +318,10 @@ static int hpfs_symlink(struct mnt_idmap *idmap, struct inode *dir, dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(local_get_seconds(dir->i_sb)); result = new_inode(dir->i_sb); - if (!result) + if (!result) { + err = -ENOMEM; goto bail1; + } result->i_ino = fno; hpfs_init_inode(result); hpfs_i(result)->i_parent_dir = dir->i_ino; diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 42b779b4d87f..8ab85e7ac91e 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -404,15 +404,11 @@ static int hpfs_parse_param(struct fs_context *fc, struct fs_parameter *param) break; case Opt_timeshift: { - int m = 1; char *rhs = param->string; int timeshift; - if (*rhs == '-') m = -1; - if (*rhs == '+' || *rhs == '-') rhs++; - timeshift = simple_strtoul(rhs, &rhs, 0) * m; - if (*rhs) - return -EINVAL; + if (kstrtoint(rhs, 0, ×hift)) + return -EINVAL; ctx->timeshift = timeshift; break; } diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 8720a0705900..107ce05bd16e 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -115,11 +115,6 @@ # define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif -#if __has_feature(kcfi) -/* Disable CFI checking inside a function. */ -#define __nocfi __attribute__((__no_sanitize__("kcfi"))) -#endif - /* * Turn individual warnings and errors on and off locally, depending * on version. diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 5d07c469b571..5de824a0b3d7 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -35,10 +35,6 @@ (typeof(ptr)) (__ptr + (off)); \ }) -#ifdef CONFIG_MITIGATION_RETPOLINE -#define __noretpoline __attribute__((__indirect_branch__("keep"))) -#endif - #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) #define __latent_entropy __attribute__((latent_entropy)) #endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 2f3e80bf9f35..59288a2c1ad2 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -455,7 +455,9 @@ struct ftrace_likely_data { # define __noscs #endif -#ifndef __nocfi +#if defined(CONFIG_CFI) +# define __nocfi __attribute__((__no_sanitize__("kcfi"))) +#else # define __nocfi #endif diff --git a/include/linux/init.h b/include/linux/init.h index a60d32d227ee..17c1bc712e23 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -7,13 +7,6 @@ #include <linux/stringify.h> #include <linux/types.h> -/* Built-in __init functions needn't be compiled with retpoline */ -#if defined(__noretpoline) && !defined(MODULE) -#define __noinitretpoline __noretpoline -#else -#define __noinitretpoline -#endif - /* These macros are used to mark some functions or * initialized data (doesn't apply to uninitialized data) * as `initialization' functions. The kernel can take this @@ -50,7 +43,6 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ #define __init __section(".init.text") __cold __latent_entropy \ - __noinitretpoline \ __no_kstack_erase #define __initdata __section(".init.data") #define __initconst __section(".init.rodata") diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h index 559d13a3bc44..25042c1d8d54 100644 --- a/include/linux/kexec_handover.h +++ b/include/linux/kexec_handover.h @@ -18,6 +18,7 @@ enum kho_event { struct folio; struct notifier_block; +struct page; #define DECLARE_KHOSER_PTR(name, type) \ union { \ @@ -38,13 +39,24 @@ struct notifier_block; struct kho_serialization; +struct kho_vmalloc_chunk; +struct kho_vmalloc { + DECLARE_KHOSER_PTR(first, struct kho_vmalloc_chunk *); + unsigned int total_pages; + unsigned short flags; + unsigned short order; +}; + #ifdef CONFIG_KEXEC_HANDOVER bool kho_is_enabled(void); bool is_kho_boot(void); int kho_preserve_folio(struct folio *folio); -int kho_preserve_phys(phys_addr_t phys, size_t size); +int kho_preserve_pages(struct page *page, unsigned int nr_pages); +int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation); struct folio *kho_restore_folio(phys_addr_t phys); +struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages); +void *kho_restore_vmalloc(const struct kho_vmalloc *preservation); int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt); int kho_retrieve_subtree(const char *name, phys_addr_t *phys); @@ -71,7 +83,13 @@ static inline int kho_preserve_folio(struct folio *folio) return -EOPNOTSUPP; } -static inline int kho_preserve_phys(phys_addr_t phys, size_t size) +static inline int kho_preserve_pages(struct page *page, unsigned int nr_pages) +{ + return -EOPNOTSUPP; +} + +static inline int kho_preserve_vmalloc(void *ptr, + struct kho_vmalloc *preservation) { return -EOPNOTSUPP; } @@ -81,6 +99,17 @@ static inline struct folio *kho_restore_folio(phys_addr_t phys) return NULL; } +static inline struct page *kho_restore_pages(phys_addr_t phys, + unsigned int nr_pages) +{ + return NULL; +} + +static inline void *kho_restore_vmalloc(const struct kho_vmalloc *preservation) +{ + return NULL; +} + static inline int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt) { diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 16fe0306e50e..873e510d6f8d 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1001,22 +1001,28 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, count_memcg_events_mm(mm, idx, 1); } -static inline void memcg_memory_event(struct mem_cgroup *memcg, - enum memcg_memory_event event) +static inline void __memcg_memory_event(struct mem_cgroup *memcg, + enum memcg_memory_event event, + bool allow_spinning) { bool swap_event = event == MEMCG_SWAP_HIGH || event == MEMCG_SWAP_MAX || event == MEMCG_SWAP_FAIL; + /* For now only MEMCG_MAX can happen with !allow_spinning context. */ + VM_WARN_ON_ONCE(!allow_spinning && event != MEMCG_MAX); + atomic_long_inc(&memcg->memory_events_local[event]); - if (!swap_event) + if (!swap_event && allow_spinning) cgroup_file_notify(&memcg->events_local_file); do { atomic_long_inc(&memcg->memory_events[event]); - if (swap_event) - cgroup_file_notify(&memcg->swap_events_file); - else - cgroup_file_notify(&memcg->events_file); + if (allow_spinning) { + if (swap_event) + cgroup_file_notify(&memcg->swap_events_file); + else + cgroup_file_notify(&memcg->events_file); + } if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) break; @@ -1026,6 +1032,12 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg, !mem_cgroup_is_root(memcg)); } +static inline void memcg_memory_event(struct mem_cgroup *memcg, + enum memcg_memory_event event) +{ + __memcg_memory_event(memcg, event, true); +} + static inline void memcg_memory_event_mm(struct mm_struct *mm, enum memcg_memory_event event) { diff --git a/include/linux/mm.h b/include/linux/mm.h index a3f97c551ad8..d16b33bacc32 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -323,7 +323,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ #define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */ #define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */ -#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ +#define VM_MERGEABLE BIT(31) /* KSM may merge identical pages */ #ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS #define VM_HIGH_ARCH_BIT_0 32 /* bit only usable on 64-bit architectures */ diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 366ad004d794..46ebaa46e6c5 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -184,6 +184,15 @@ * WARN using UD2. */ #define ANNOTATE_REACHABLE(label) __ASM_ANNOTATE(label, ANNOTYPE_REACHABLE) +/* + * This should not be used; it annotates away CFI violations. There are a few + * valid use cases like kexec handover to the next kernel image, and there is + * no security concern there. + * + * There are also a few real issues annotated away, like EFI because we can't + * control the EFI code. + */ +#define ANNOTATE_NOCFI_SYM(sym) asm(__ASM_ANNOTATE(sym, ANNOTYPE_NOCFI)) #else #define ANNOTATE_NOENDBR ANNOTATE type=ANNOTYPE_NOENDBR @@ -194,6 +203,7 @@ #define ANNOTATE_INTRA_FUNCTION_CALL ANNOTATE type=ANNOTYPE_INTRA_FUNCTION_CALL #define ANNOTATE_UNRET_BEGIN ANNOTATE type=ANNOTYPE_UNRET_BEGIN #define ANNOTATE_REACHABLE ANNOTATE type=ANNOTYPE_REACHABLE +#define ANNOTATE_NOCFI_SYM ANNOTATE type=ANNOTYPE_NOCFI #endif #if defined(CONFIG_NOINSTR_VALIDATION) && \ diff --git a/include/linux/objtool_types.h b/include/linux/objtool_types.h index df5d9fa84dba..aceac94632c8 100644 --- a/include/linux/objtool_types.h +++ b/include/linux/objtool_types.h @@ -65,5 +65,6 @@ struct unwind_hint { #define ANNOTYPE_IGNORE_ALTS 6 #define ANNOTYPE_INTRA_FUNCTION_CALL 7 #define ANNOTYPE_REACHABLE 8 +#define ANNOTYPE_NOCFI 9 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ae83d8649ef1..6829936d33f5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4891,7 +4891,7 @@ union bpf_attr { * * **-ENOENT** if the bpf_local_storage cannot be found. * - * long bpf_d_path(struct path *path, char *buf, u32 sz) + * long bpf_d_path(const struct path *path, char *buf, u32 sz) * Description * Return full path for given **struct path** object, which * needs to be the kernel BTF *path* object. The path is diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h index 612500a7088f..e64b70132101 100644 --- a/include/ufs/ufshci.h +++ b/include/ufs/ufshci.h @@ -180,6 +180,7 @@ static inline u32 ufshci_version(u32 major, u32 minor) #define UTP_TASK_REQ_COMPL 0x200 #define UIC_COMMAND_COMPL 0x400 #define DEVICE_FATAL_ERROR 0x800 +#define UTP_ERROR 0x1000 #define CONTROLLER_FATAL_ERROR 0x10000 #define SYSTEM_BUS_FATAL_ERROR 0x20000 #define CRYPTO_ENGINE_FATAL_ERROR 0x40000 @@ -199,7 +200,8 @@ static inline u32 ufshci_version(u32 major, u32 minor) CONTROLLER_FATAL_ERROR |\ SYSTEM_BUS_FATAL_ERROR |\ CRYPTO_ENGINE_FATAL_ERROR |\ - UIC_LINK_LOST) + UIC_LINK_LOST |\ + UTP_ERROR) /* HCS - Host Controller Status 30h */ #define DEVICE_PRESENT 0x1 diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index f90bdcc0a047..81780bcf8d25 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -775,7 +775,7 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root) return 0; } -static void bpf_free_inode(struct inode *inode) +static void bpf_destroy_inode(struct inode *inode) { enum bpf_type type; @@ -790,7 +790,7 @@ const struct super_operations bpf_super_ops = { .statfs = simple_statfs, .drop_inode = inode_just_drop, .show_options = bpf_show_options, - .free_inode = bpf_free_inode, + .destroy_inode = bpf_destroy_inode, }; enum { diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index 5083c68c3a4e..76f0940fb485 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -18,6 +18,7 @@ #include <linux/memblock.h> #include <linux/notifier.h> #include <linux/page-isolation.h> +#include <linux/vmalloc.h> #include <asm/early_ioremap.h> @@ -107,6 +108,29 @@ struct kho_serialization { struct khoser_mem_chunk *preserved_mem_map; }; +struct kho_out { + struct blocking_notifier_head chain_head; + + struct dentry *dir; + + struct mutex lock; /* protects KHO FDT finalization */ + + struct kho_serialization ser; + bool finalized; +}; + +static struct kho_out kho_out = { + .chain_head = BLOCKING_NOTIFIER_INIT(kho_out.chain_head), + .lock = __MUTEX_INITIALIZER(kho_out.lock), + .ser = { + .fdt_list = LIST_HEAD_INIT(kho_out.ser.fdt_list), + .track = { + .orders = XARRAY_INIT(kho_out.ser.track.orders, 0), + }, + }, + .finalized = false, +}; + static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz) { void *elm, *res; @@ -165,6 +189,9 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn, might_sleep(); + if (kho_out.finalized) + return -EBUSY; + physxa = xa_load(&track->orders, order); if (!physxa) { int err; @@ -248,6 +275,37 @@ struct folio *kho_restore_folio(phys_addr_t phys) } EXPORT_SYMBOL_GPL(kho_restore_folio); +/** + * kho_restore_pages - restore list of contiguous order 0 pages. + * @phys: physical address of the first page. + * @nr_pages: number of pages. + * + * Restore a contiguous list of order 0 pages that was preserved with + * kho_preserve_pages(). + * + * Return: 0 on success, error code on failure + */ +struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages) +{ + const unsigned long start_pfn = PHYS_PFN(phys); + const unsigned long end_pfn = start_pfn + nr_pages; + unsigned long pfn = start_pfn; + + while (pfn < end_pfn) { + const unsigned int order = + min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn)); + struct page *page = kho_restore_page(PFN_PHYS(pfn)); + + if (!page) + return NULL; + split_page(page, order); + pfn += 1 << order; + } + + return pfn_to_page(start_pfn); +} +EXPORT_SYMBOL_GPL(kho_restore_pages); + /* Serialize and deserialize struct kho_mem_phys across kexec * * Record all the bitmaps in a linked list of pages for the next kernel to @@ -667,29 +725,6 @@ int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt) } EXPORT_SYMBOL_GPL(kho_add_subtree); -struct kho_out { - struct blocking_notifier_head chain_head; - - struct dentry *dir; - - struct mutex lock; /* protects KHO FDT finalization */ - - struct kho_serialization ser; - bool finalized; -}; - -static struct kho_out kho_out = { - .chain_head = BLOCKING_NOTIFIER_INIT(kho_out.chain_head), - .lock = __MUTEX_INITIALIZER(kho_out.lock), - .ser = { - .fdt_list = LIST_HEAD_INIT(kho_out.ser.fdt_list), - .track = { - .orders = XARRAY_INIT(kho_out.ser.track.orders, 0), - }, - }, - .finalized = false, -}; - int register_kho_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&kho_out.chain_head, nb); @@ -717,37 +752,28 @@ int kho_preserve_folio(struct folio *folio) const unsigned int order = folio_order(folio); struct kho_mem_track *track = &kho_out.ser.track; - if (kho_out.finalized) - return -EBUSY; - return __kho_preserve_order(track, pfn, order); } EXPORT_SYMBOL_GPL(kho_preserve_folio); /** - * kho_preserve_phys - preserve a physically contiguous range across kexec. - * @phys: physical address of the range. - * @size: size of the range. + * kho_preserve_pages - preserve contiguous pages across kexec + * @page: first page in the list. + * @nr_pages: number of pages. * - * Instructs KHO to preserve the memory range from @phys to @phys + @size - * across kexec. + * Preserve a contiguous list of order 0 pages. Must be restored using + * kho_restore_pages() to ensure the pages are restored properly as order 0. * * Return: 0 on success, error code on failure */ -int kho_preserve_phys(phys_addr_t phys, size_t size) +int kho_preserve_pages(struct page *page, unsigned int nr_pages) { - unsigned long pfn = PHYS_PFN(phys); + struct kho_mem_track *track = &kho_out.ser.track; + const unsigned long start_pfn = page_to_pfn(page); + const unsigned long end_pfn = start_pfn + nr_pages; + unsigned long pfn = start_pfn; unsigned long failed_pfn = 0; - const unsigned long start_pfn = pfn; - const unsigned long end_pfn = PHYS_PFN(phys + size); int err = 0; - struct kho_mem_track *track = &kho_out.ser.track; - - if (kho_out.finalized) - return -EBUSY; - - if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size)) - return -EINVAL; while (pfn < end_pfn) { const unsigned int order = @@ -767,7 +793,256 @@ int kho_preserve_phys(phys_addr_t phys, size_t size) return err; } -EXPORT_SYMBOL_GPL(kho_preserve_phys); +EXPORT_SYMBOL_GPL(kho_preserve_pages); + +struct kho_vmalloc_hdr { + DECLARE_KHOSER_PTR(next, struct kho_vmalloc_chunk *); +}; + +#define KHO_VMALLOC_SIZE \ + ((PAGE_SIZE - sizeof(struct kho_vmalloc_hdr)) / \ + sizeof(phys_addr_t)) + +struct kho_vmalloc_chunk { + struct kho_vmalloc_hdr hdr; + phys_addr_t phys[KHO_VMALLOC_SIZE]; +}; + +static_assert(sizeof(struct kho_vmalloc_chunk) == PAGE_SIZE); + +/* vmalloc flags KHO supports */ +#define KHO_VMALLOC_SUPPORTED_FLAGS (VM_ALLOC | VM_ALLOW_HUGE_VMAP) + +/* KHO internal flags for vmalloc preservations */ +#define KHO_VMALLOC_ALLOC 0x0001 +#define KHO_VMALLOC_HUGE_VMAP 0x0002 + +static unsigned short vmalloc_flags_to_kho(unsigned int vm_flags) +{ + unsigned short kho_flags = 0; + + if (vm_flags & VM_ALLOC) + kho_flags |= KHO_VMALLOC_ALLOC; + if (vm_flags & VM_ALLOW_HUGE_VMAP) + kho_flags |= KHO_VMALLOC_HUGE_VMAP; + + return kho_flags; +} + +static unsigned int kho_flags_to_vmalloc(unsigned short kho_flags) +{ + unsigned int vm_flags = 0; + + if (kho_flags & KHO_VMALLOC_ALLOC) + vm_flags |= VM_ALLOC; + if (kho_flags & KHO_VMALLOC_HUGE_VMAP) + vm_flags |= VM_ALLOW_HUGE_VMAP; + + return vm_flags; +} + +static struct kho_vmalloc_chunk *new_vmalloc_chunk(struct kho_vmalloc_chunk *cur) +{ + struct kho_vmalloc_chunk *chunk; + int err; + + chunk = (struct kho_vmalloc_chunk *)get_zeroed_page(GFP_KERNEL); + if (!chunk) + return NULL; + + err = kho_preserve_pages(virt_to_page(chunk), 1); + if (err) + goto err_free; + if (cur) + KHOSER_STORE_PTR(cur->hdr.next, chunk); + return chunk; + +err_free: + free_page((unsigned long)chunk); + return NULL; +} + +static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk) +{ + struct kho_mem_track *track = &kho_out.ser.track; + unsigned long pfn = PHYS_PFN(virt_to_phys(chunk)); + + __kho_unpreserve(track, pfn, pfn + 1); + + for (int i = 0; chunk->phys[i]; i++) { + pfn = PHYS_PFN(chunk->phys[i]); + __kho_unpreserve(track, pfn, pfn + 1); + } +} + +static void kho_vmalloc_free_chunks(struct kho_vmalloc *kho_vmalloc) +{ + struct kho_vmalloc_chunk *chunk = KHOSER_LOAD_PTR(kho_vmalloc->first); + + while (chunk) { + struct kho_vmalloc_chunk *tmp = chunk; + + kho_vmalloc_unpreserve_chunk(chunk); + + chunk = KHOSER_LOAD_PTR(chunk->hdr.next); + free_page((unsigned long)tmp); + } +} + +/** + * kho_preserve_vmalloc - preserve memory allocated with vmalloc() across kexec + * @ptr: pointer to the area in vmalloc address space + * @preservation: placeholder for preservation metadata + * + * Instructs KHO to preserve the area in vmalloc address space at @ptr. The + * physical pages mapped at @ptr will be preserved and on successful return + * @preservation will hold the physical address of a structure that describes + * the preservation. + * + * NOTE: The memory allocated with vmalloc_node() variants cannot be reliably + * restored on the same node + * + * Return: 0 on success, error code on failure + */ +int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation) +{ + struct kho_vmalloc_chunk *chunk; + struct vm_struct *vm = find_vm_area(ptr); + unsigned int order, flags, nr_contig_pages; + unsigned int idx = 0; + int err; + + if (!vm) + return -EINVAL; + + if (vm->flags & ~KHO_VMALLOC_SUPPORTED_FLAGS) + return -EOPNOTSUPP; + + flags = vmalloc_flags_to_kho(vm->flags); + order = get_vm_area_page_order(vm); + + chunk = new_vmalloc_chunk(NULL); + if (!chunk) + return -ENOMEM; + KHOSER_STORE_PTR(preservation->first, chunk); + + nr_contig_pages = (1 << order); + for (int i = 0; i < vm->nr_pages; i += nr_contig_pages) { + phys_addr_t phys = page_to_phys(vm->pages[i]); + + err = kho_preserve_pages(vm->pages[i], nr_contig_pages); + if (err) + goto err_free; + + chunk->phys[idx++] = phys; + if (idx == ARRAY_SIZE(chunk->phys)) { + chunk = new_vmalloc_chunk(chunk); + if (!chunk) + goto err_free; + idx = 0; + } + } + + preservation->total_pages = vm->nr_pages; + preservation->flags = flags; + preservation->order = order; + + return 0; + +err_free: + kho_vmalloc_free_chunks(preservation); + return err; +} +EXPORT_SYMBOL_GPL(kho_preserve_vmalloc); + +/** + * kho_restore_vmalloc - recreates and populates an area in vmalloc address + * space from the preserved memory. + * @preservation: preservation metadata. + * + * Recreates an area in vmalloc address space and populates it with memory that + * was preserved using kho_preserve_vmalloc(). + * + * Return: pointer to the area in the vmalloc address space, NULL on failure. + */ +void *kho_restore_vmalloc(const struct kho_vmalloc *preservation) +{ + struct kho_vmalloc_chunk *chunk = KHOSER_LOAD_PTR(preservation->first); + unsigned int align, order, shift, vm_flags; + unsigned long total_pages, contig_pages; + unsigned long addr, size; + struct vm_struct *area; + struct page **pages; + unsigned int idx = 0; + int err; + + vm_flags = kho_flags_to_vmalloc(preservation->flags); + if (vm_flags & ~KHO_VMALLOC_SUPPORTED_FLAGS) + return NULL; + + total_pages = preservation->total_pages; + pages = kvmalloc_array(total_pages, sizeof(*pages), GFP_KERNEL); + if (!pages) + return NULL; + order = preservation->order; + contig_pages = (1 << order); + shift = PAGE_SHIFT + order; + align = 1 << shift; + + while (chunk) { + struct page *page; + + for (int i = 0; chunk->phys[i]; i++) { + phys_addr_t phys = chunk->phys[i]; + + if (idx + contig_pages > total_pages) + goto err_free_pages_array; + + page = kho_restore_pages(phys, contig_pages); + if (!page) + goto err_free_pages_array; + + for (int j = 0; j < contig_pages; j++) + pages[idx++] = page; + + phys += contig_pages * PAGE_SIZE; + } + + page = kho_restore_pages(virt_to_phys(chunk), 1); + if (!page) + goto err_free_pages_array; + chunk = KHOSER_LOAD_PTR(chunk->hdr.next); + __free_page(page); + } + + if (idx != total_pages) + goto err_free_pages_array; + + area = __get_vm_area_node(total_pages * PAGE_SIZE, align, shift, + vm_flags, VMALLOC_START, VMALLOC_END, + NUMA_NO_NODE, GFP_KERNEL, + __builtin_return_address(0)); + if (!area) + goto err_free_pages_array; + + addr = (unsigned long)area->addr; + size = get_vm_area_size(area); + err = vmap_pages_range(addr, addr + size, PAGE_KERNEL, pages, shift); + if (err) + goto err_free_vm_area; + + area->nr_pages = total_pages; + area->pages = pages; + + return area->addr; + +err_free_vm_area: + free_vm_area(area); +err_free_pages_array: + kvfree(pages); + return NULL; +} +EXPORT_SYMBOL_GPL(kho_restore_vmalloc); /* Handling for debug/kho/out */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 98b6a9cb1454..d1e527cf2aae 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7441,7 +7441,8 @@ static ssize_t write_raw_marker_to_buffer(struct trace_array *tr, ssize_t written; size_t size; - size = sizeof(*entry) + cnt; + /* cnt includes both the entry->id and the data behind it. */ + size = struct_size(entry, buf, cnt - sizeof(entry->id)); buffer = tr->array_buffer.buffer; @@ -7455,7 +7456,10 @@ static ssize_t write_raw_marker_to_buffer(struct trace_array *tr, return -EBADF; entry = ring_buffer_event_data(event); - memcpy(&entry->id, buf, cnt); + unsafe_memcpy(&entry->id, buf, cnt, + "id and content already reserved on ring buffer" + "'buf' includes the 'id' and the data." + "'entry' was allocated with cnt from 'id'."); written = cnt; __buffer_unlock_commit(buffer, event); @@ -7497,12 +7501,12 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, if (tr == &global_trace) { guard(rcu)(); list_for_each_entry_rcu(tr, &marker_copies, marker_list) { - written = write_raw_marker_to_buffer(tr, ubuf, cnt); + written = write_raw_marker_to_buffer(tr, buf, cnt); if (written < 0) break; } } else { - written = write_raw_marker_to_buffer(tr, ubuf, cnt); + written = write_raw_marker_to_buffer(tr, buf, cnt); } return written; diff --git a/lib/test_kho.c b/lib/test_kho.c index fe8504e3407b..60cd899ea745 100644 --- a/lib/test_kho.c +++ b/lib/test_kho.c @@ -32,6 +32,7 @@ module_param(max_mem, long, 0644); struct kho_test_state { unsigned int nr_folios; struct folio **folios; + phys_addr_t *folios_info; struct folio *fdt; __wsum csum; }; @@ -67,18 +68,15 @@ static struct notifier_block kho_test_nb = { static int kho_test_save_data(struct kho_test_state *state, void *fdt) { - phys_addr_t *folios_info; + phys_addr_t *folios_info __free(kvfree) = NULL; + struct kho_vmalloc folios_info_phys; int err = 0; - err |= fdt_begin_node(fdt, "data"); - err |= fdt_property(fdt, "nr_folios", &state->nr_folios, - sizeof(state->nr_folios)); - err |= fdt_property_placeholder(fdt, "folios_info", - state->nr_folios * sizeof(*folios_info), - (void **)&folios_info); - err |= fdt_property(fdt, "csum", &state->csum, sizeof(state->csum)); - err |= fdt_end_node(fdt); + folios_info = vmalloc_array(state->nr_folios, sizeof(*folios_info)); + if (!folios_info) + return -ENOMEM; + err = kho_preserve_vmalloc(folios_info, &folios_info_phys); if (err) return err; @@ -93,6 +91,17 @@ static int kho_test_save_data(struct kho_test_state *state, void *fdt) break; } + err |= fdt_begin_node(fdt, "data"); + err |= fdt_property(fdt, "nr_folios", &state->nr_folios, + sizeof(state->nr_folios)); + err |= fdt_property(fdt, "folios_info", &folios_info_phys, + sizeof(folios_info_phys)); + err |= fdt_property(fdt, "csum", &state->csum, sizeof(state->csum)); + err |= fdt_end_node(fdt); + + if (!err) + state->folios_info = no_free_ptr(folios_info); + return err; } @@ -209,8 +218,9 @@ err_free_folios: static int kho_test_restore_data(const void *fdt, int node) { + const struct kho_vmalloc *folios_info_phys; const unsigned int *nr_folios; - const phys_addr_t *folios_info; + phys_addr_t *folios_info; const __wsum *old_csum; __wsum csum = 0; int len; @@ -225,8 +235,12 @@ static int kho_test_restore_data(const void *fdt, int node) if (!old_csum || len != sizeof(*old_csum)) return -EINVAL; - folios_info = fdt_getprop(fdt, node, "folios_info", &len); - if (!folios_info || len != sizeof(*folios_info) * *nr_folios) + folios_info_phys = fdt_getprop(fdt, node, "folios_info", &len); + if (!folios_info_phys || len != sizeof(*folios_info_phys)) + return -EINVAL; + + folios_info = kho_restore_vmalloc(folios_info_phys); + if (!folios_info) return -EINVAL; for (int i = 0; i < *nr_folios; i++) { @@ -246,6 +260,8 @@ static int kho_test_restore_data(const void *fdt, int node) folio_put(folio); } + vfree(folios_info); + if (csum != *old_csum) return -EINVAL; @@ -304,6 +320,7 @@ static void kho_test_cleanup(void) folio_put(kho_test_state.folios[i]); kvfree(kho_test_state.folios); + vfree(kho_test_state.folios_info); folio_put(kho_test_state.fdt); } diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 8c048f9b129e..7e834467b2d8 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -328,10 +328,8 @@ static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, } pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); - if (!pte) { - walk->action = ACTION_AGAIN; + if (!pte) return 0; - } if (!pte_present(ptep_get(pte))) goto out; damon_ptep_mkold(pte, walk->vma, addr); @@ -481,10 +479,8 @@ regular_page: #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); - if (!pte) { - walk->action = ACTION_AGAIN; + if (!pte) return 0; - } ptent = ptep_get(pte); if (!pte_present(ptent)) goto out; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5acca24bbabb..1b81680b4225 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -4104,32 +4104,23 @@ static unsigned long deferred_split_count(struct shrinker *shrink, static bool thp_underused(struct folio *folio) { int num_zero_pages = 0, num_filled_pages = 0; - void *kaddr; int i; if (khugepaged_max_ptes_none == HPAGE_PMD_NR - 1) return false; for (i = 0; i < folio_nr_pages(folio); i++) { - kaddr = kmap_local_folio(folio, i * PAGE_SIZE); - if (!memchr_inv(kaddr, 0, PAGE_SIZE)) { - num_zero_pages++; - if (num_zero_pages > khugepaged_max_ptes_none) { - kunmap_local(kaddr); + if (pages_identical(folio_page(folio, i), ZERO_PAGE(0))) { + if (++num_zero_pages > khugepaged_max_ptes_none) return true; - } } else { /* * Another path for early exit once the number * of non-zero filled pages exceeds threshold. */ - num_filled_pages++; - if (num_filled_pages >= HPAGE_PMD_NR - khugepaged_max_ptes_none) { - kunmap_local(kaddr); + if (++num_filled_pages >= HPAGE_PMD_NR - khugepaged_max_ptes_none) return false; - } } - kunmap_local(kaddr); } return false; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6cac826cb61f..795ee393eac0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7222,6 +7222,8 @@ long hugetlb_change_protection(struct vm_area_struct *vma, psize); } spin_unlock(ptl); + + cond_resched(); } /* * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare diff --git a/mm/memblock.c b/mm/memblock.c index 120a501a887a..e23e16618e9b 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2452,8 +2452,10 @@ static int reserve_mem_kho_finalize(struct kho_serialization *ser) for (i = 0; i < reserved_mem_count; i++) { struct reserve_mem_table *map = &reserved_mem_table[i]; + struct page *page = phys_to_page(map->start); + unsigned int nr_pages = map->size >> PAGE_SHIFT; - err |= kho_preserve_phys(map->start, map->size); + err |= kho_preserve_pages(page, nr_pages); } err |= kho_preserve_folio(page_folio(kho_fdt)); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e090f29eb03b..4deda33625f4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2307,12 +2307,13 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, bool drained = false; bool raised_max_event = false; unsigned long pflags; + bool allow_spinning = gfpflags_allow_spinning(gfp_mask); retry: if (consume_stock(memcg, nr_pages)) return 0; - if (!gfpflags_allow_spinning(gfp_mask)) + if (!allow_spinning) /* Avoid the refill and flush of the older stock */ batch = nr_pages; @@ -2348,7 +2349,7 @@ retry: if (!gfpflags_allow_blocking(gfp_mask)) goto nomem; - memcg_memory_event(mem_over_limit, MEMCG_MAX); + __memcg_memory_event(mem_over_limit, MEMCG_MAX, allow_spinning); raised_max_event = true; psi_memstall_enter(&pflags); @@ -2415,7 +2416,7 @@ force: * a MEMCG_MAX event. */ if (!raised_max_event) - memcg_memory_event(mem_over_limit, MEMCG_MAX); + __memcg_memory_event(mem_over_limit, MEMCG_MAX, allow_spinning); /* * The allocation either can't fail or will lead to more memory diff --git a/mm/migrate.c b/mm/migrate.c index aee61a980374..e3065c9edb55 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -296,19 +296,16 @@ bool isolate_folio_to_list(struct folio *folio, struct list_head *list) } static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw, - struct folio *folio, - unsigned long idx) + struct folio *folio, pte_t old_pte, unsigned long idx) { struct page *page = folio_page(folio, idx); - bool contains_data; pte_t newpte; - void *addr; if (PageCompound(page)) return false; VM_BUG_ON_PAGE(!PageAnon(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); - VM_BUG_ON_PAGE(pte_present(ptep_get(pvmw->pte)), page); + VM_BUG_ON_PAGE(pte_present(old_pte), page); if (folio_test_mlocked(folio) || (pvmw->vma->vm_flags & VM_LOCKED) || mm_forbids_zeropage(pvmw->vma->vm_mm)) @@ -319,15 +316,17 @@ static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw, * this subpage has been non present. If the subpage is only zero-filled * then map it to the shared zeropage. */ - addr = kmap_local_page(page); - contains_data = memchr_inv(addr, 0, PAGE_SIZE); - kunmap_local(addr); - - if (contains_data) + if (!pages_identical(page, ZERO_PAGE(0))) return false; newpte = pte_mkspecial(pfn_pte(my_zero_pfn(pvmw->address), pvmw->vma->vm_page_prot)); + + if (pte_swp_soft_dirty(old_pte)) + newpte = pte_mksoft_dirty(newpte); + if (pte_swp_uffd_wp(old_pte)) + newpte = pte_mkuffd_wp(newpte); + set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte); dec_mm_counter(pvmw->vma->vm_mm, mm_counter(folio)); @@ -370,13 +369,13 @@ static bool remove_migration_pte(struct folio *folio, continue; } #endif + old_pte = ptep_get(pvmw.pte); if (rmap_walk_arg->map_unused_to_zeropage && - try_to_map_unused_to_zeropage(&pvmw, folio, idx)) + try_to_map_unused_to_zeropage(&pvmw, folio, old_pte, idx)) continue; folio_get(folio); pte = mk_pte(new, READ_ONCE(vma->vm_page_prot)); - old_pte = ptep_get(pvmw.pte); entry = pte_to_swp_entry(old_pte); if (!is_migration_entry_young(entry)) diff --git a/mm/slub.c b/mm/slub.c index 135c408e0515..b1f15598fbfd 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -504,10 +504,18 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) return s->node[node]; } -/* Get the barn of the current cpu's memory node */ +/* + * Get the barn of the current cpu's closest memory node. It may not exist on + * systems with memoryless nodes but without CONFIG_HAVE_MEMORYLESS_NODES + */ static inline struct node_barn *get_barn(struct kmem_cache *s) { - return get_node(s, numa_mem_id())->barn; + struct kmem_cache_node *n = get_node(s, numa_mem_id()); + + if (!n) + return NULL; + + return n->barn; } /* @@ -4982,6 +4990,10 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, } barn = get_barn(s); + if (!barn) { + local_unlock(&s->cpu_sheaves->lock); + return NULL; + } full = barn_replace_empty_sheaf(barn, pcs->main); @@ -5153,13 +5165,20 @@ next_batch: if (unlikely(pcs->main->size == 0)) { struct slab_sheaf *full; + struct node_barn *barn; if (pcs->spare && pcs->spare->size > 0) { swap(pcs->main, pcs->spare); goto do_alloc; } - full = barn_replace_empty_sheaf(get_barn(s), pcs->main); + barn = get_barn(s); + if (!barn) { + local_unlock(&s->cpu_sheaves->lock); + return allocated; + } + + full = barn_replace_empty_sheaf(barn, pcs->main); if (full) { stat(s, BARN_GET); @@ -5314,6 +5333,7 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size) { struct slub_percpu_sheaves *pcs; struct slab_sheaf *sheaf = NULL; + struct node_barn *barn; if (unlikely(size > s->sheaf_capacity)) { @@ -5355,8 +5375,11 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size) pcs->spare = NULL; stat(s, SHEAF_PREFILL_FAST); } else { + barn = get_barn(s); + stat(s, SHEAF_PREFILL_SLOW); - sheaf = barn_get_full_or_empty_sheaf(get_barn(s)); + if (barn) + sheaf = barn_get_full_or_empty_sheaf(barn); if (sheaf && sheaf->size) stat(s, BARN_GET); else @@ -5426,7 +5449,7 @@ void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp, * If the barn has too many full sheaves or we fail to refill the sheaf, * simply flush and free it. */ - if (data_race(barn->nr_full) >= MAX_FULL_SHEAVES || + if (!barn || data_race(barn->nr_full) >= MAX_FULL_SHEAVES || refill_sheaf(s, sheaf, gfp)) { sheaf_flush_unused(s, sheaf); free_empty_sheaf(s, sheaf); @@ -5943,10 +5966,9 @@ slab_empty: * put the full sheaf there. */ static void __pcs_install_empty_sheaf(struct kmem_cache *s, - struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty) + struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty, + struct node_barn *barn) { - struct node_barn *barn; - lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); /* This is what we expect to find if nobody interrupted us. */ @@ -5956,8 +5978,6 @@ static void __pcs_install_empty_sheaf(struct kmem_cache *s, return; } - barn = get_barn(s); - /* * Unlikely because if the main sheaf had space, we would have just * freed to it. Get rid of our empty sheaf. @@ -6002,6 +6022,11 @@ restart: lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); barn = get_barn(s); + if (!barn) { + local_unlock(&s->cpu_sheaves->lock); + return NULL; + } + put_fail = false; if (!pcs->spare) { @@ -6084,7 +6109,7 @@ got_empty: } pcs = this_cpu_ptr(s->cpu_sheaves); - __pcs_install_empty_sheaf(s, pcs, empty); + __pcs_install_empty_sheaf(s, pcs, empty, barn); return pcs; } @@ -6121,8 +6146,9 @@ bool free_to_pcs(struct kmem_cache *s, void *object) static void rcu_free_sheaf(struct rcu_head *head) { + struct kmem_cache_node *n; struct slab_sheaf *sheaf; - struct node_barn *barn; + struct node_barn *barn = NULL; struct kmem_cache *s; sheaf = container_of(head, struct slab_sheaf, rcu_head); @@ -6139,7 +6165,11 @@ static void rcu_free_sheaf(struct rcu_head *head) */ __rcu_free_sheaf_prepare(s, sheaf); - barn = get_node(s, sheaf->node)->barn; + n = get_node(s, sheaf->node); + if (!n) + goto flush; + + barn = n->barn; /* due to slab_free_hook() */ if (unlikely(sheaf->size == 0)) @@ -6157,11 +6187,12 @@ static void rcu_free_sheaf(struct rcu_head *head) return; } +flush: stat(s, BARN_PUT_FAIL); sheaf_flush_unused(s, sheaf); empty: - if (data_race(barn->nr_empty) < MAX_EMPTY_SHEAVES) { + if (barn && data_race(barn->nr_empty) < MAX_EMPTY_SHEAVES) { barn_put_empty_sheaf(barn, sheaf); return; } @@ -6191,6 +6222,10 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj) } barn = get_barn(s); + if (!barn) { + local_unlock(&s->cpu_sheaves->lock); + goto fail; + } empty = barn_get_empty_sheaf(barn); @@ -6304,6 +6339,8 @@ next_batch: goto do_free; barn = get_barn(s); + if (!barn) + goto no_empty; if (!pcs->spare) { empty = barn_get_empty_sheaf(barn); diff --git a/mm/util.c b/mm/util.c index 6c1d64ed0221..8989d5767528 100644 --- a/mm/util.c +++ b/mm/util.c @@ -566,6 +566,7 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long pgoff) { + loff_t off = (loff_t)pgoff << PAGE_SHIFT; unsigned long ret; struct mm_struct *mm = current->mm; unsigned long populate; @@ -573,7 +574,7 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, ret = security_mmap_file(file, prot, flag); if (!ret) - ret = fsnotify_mmap_perm(file, prot, pgoff >> PAGE_SHIFT, len); + ret = fsnotify_mmap_perm(file, prot, off, len); if (!ret) { if (mmap_write_lock_killable(mm)) return -EINTR; diff --git a/net/core/filter.c b/net/core/filter.c index 5d1838ff1ab9..76628df1fc82 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2281,6 +2281,7 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev, if (IS_ERR(dst)) goto out_drop; + skb_dst_drop(skb); skb_dst_set(skb, dst); } else if (nh->nh_family != AF_INET6) { goto out_drop; @@ -2389,6 +2390,7 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev, goto out_drop; } + skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); } diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index f16f390370dc..1eb8d9f8b104 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -143,14 +143,24 @@ static inline bool xp_unused_options_set(u32 options) static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { - u64 addr = desc->addr - pool->tx_metadata_len; - u64 len = desc->len + pool->tx_metadata_len; - u64 offset = addr & (pool->chunk_size - 1); + u64 len = desc->len; + u64 addr, offset; - if (!desc->len) + if (!len) return false; - if (offset + len > pool->chunk_size) + /* Can overflow if desc->addr < pool->tx_metadata_len */ + if (check_sub_overflow(desc->addr, pool->tx_metadata_len, &addr)) + return false; + + offset = addr & (pool->chunk_size - 1); + + /* + * Can't overflow: @offset is guaranteed to be < ``U32_MAX`` + * (pool->chunk_size is ``u32``), @len is guaranteed + * to be <= ``U32_MAX``. + */ + if (offset + len + pool->tx_metadata_len > pool->chunk_size) return false; if (addr >= pool->addrs_cnt) @@ -158,27 +168,42 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, if (xp_unused_options_set(desc->options)) return false; + return true; } static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { - u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len; - u64 len = desc->len + pool->tx_metadata_len; + u64 len = desc->len; + u64 addr, end; - if (!desc->len) + if (!len) return false; + /* Can't overflow: @len is guaranteed to be <= ``U32_MAX`` */ + len += pool->tx_metadata_len; if (len > pool->chunk_size) return false; - if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt || - xp_desc_crosses_non_contig_pg(pool, addr, len)) + /* Can overflow if desc->addr is close to 0 */ + if (check_sub_overflow(xp_unaligned_add_offset_to_addr(desc->addr), + pool->tx_metadata_len, &addr)) + return false; + + if (addr >= pool->addrs_cnt) + return false; + + /* Can overflow if pool->addrs_cnt is high enough */ + if (check_add_overflow(addr, len, &end) || end > pool->addrs_cnt) + return false; + + if (xp_desc_crosses_non_contig_pg(pool, addr, len)) return false; if (xp_unused_options_set(desc->options)) return false; + return true; } diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 04b75d4d01c3..2e43c66635a2 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -108,6 +108,7 @@ const xa_mark_t RUST_CONST_HELPER_XA_PRESENT = XA_PRESENT; const gfp_t RUST_CONST_HELPER_XA_FLAGS_ALLOC = XA_FLAGS_ALLOC; const gfp_t RUST_CONST_HELPER_XA_FLAGS_ALLOC1 = XA_FLAGS_ALLOC1; +const vm_flags_t RUST_CONST_HELPER_VM_MERGEABLE = VM_MERGEABLE; #if IS_ENABLED(CONFIG_ANDROID_BINDER_IPC_RUST) #include "../../drivers/android/binder/rust_binder.h" diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 1434cb6208cb..6af392f9cd02 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -223,9 +223,11 @@ KBUILD_USERCFLAGS += -Werror KBUILD_USERLDFLAGS += -Wl,--fatal-warnings KBUILD_RUSTFLAGS += -Dwarnings -endif - -# Hostprog flags are used during build bootstrapping and can not rely on CONFIG_ symbols. +# While hostprog flags are used during build bootstrapping (thus should not +# depend on CONFIG_ symbols), -Werror is disruptive and should be opted into. +# Only apply -Werror to hostprogs built after the initial Kconfig stage. KBUILD_HOSTCFLAGS += -Werror KBUILD_HOSTLDFLAGS += -Wl,--fatal-warnings KBUILD_HOSTRUSTFLAGS += -Dwarnings + +endif diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index 7c6ae9886f8f..ced4379550d7 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -82,9 +82,12 @@ endif # --------------------------------------------------------------------------- remove-section-y := .modinfo -remove-section-$(CONFIG_ARCH_VMLINUX_NEEDS_RELOCS) += '.rel*' +remove-section-$(CONFIG_ARCH_VMLINUX_NEEDS_RELOCS) += '.rel*' '!.rel*.dyn' +# for compatibility with binutils < 2.32 +# https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=c12d9fa2afe7abcbe407a00e15719e1a1350c2a7 +remove-section-$(CONFIG_ARCH_VMLINUX_NEEDS_RELOCS) += '.rel.*' -remove-symbols := -w --strip-symbol='__mod_device_table__*' +remove-symbols := -w --strip-unneeded-symbol='__mod_device_table__*' # To avoid warnings: "empty loadable segment detected at ..." from GNU objcopy, # it is necessary to remove the PT_LOAD flag from the segment. diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index c77dc40f7689..15d113a1bc1d 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -788,6 +788,7 @@ class PrinterHelpersHeader(Printer): 'struct task_struct', 'struct cgroup', 'struct path', + 'const struct path', 'struct btf_ptr', 'struct inode', 'struct socket', diff --git a/tools/arch/x86/include/asm/asm.h b/tools/arch/x86/include/asm/asm.h index dbe39b44256b..6e1b357c374b 100644 --- a/tools/arch/x86/include/asm/asm.h +++ b/tools/arch/x86/include/asm/asm.h @@ -108,18 +108,6 @@ #endif -/* - * Macros to generate condition code outputs from inline assembly, - * The output operand must be type "bool". - */ -#ifdef __GCC_ASM_FLAG_OUTPUTS__ -# define CC_SET(c) "\n\t/* output condition code " #c "*/\n" -# define CC_OUT(c) "=@cc" #c -#else -# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n" -# define CC_OUT(c) [_cc_ ## c] "=qm" -#endif - #ifdef __KERNEL__ /* Exception table entry */ diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index df5d9fa84dba..aceac94632c8 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -65,5 +65,6 @@ struct unwind_hint { #define ANNOTYPE_IGNORE_ALTS 6 #define ANNOTYPE_INTRA_FUNCTION_CALL 7 #define ANNOTYPE_REACHABLE 8 +#define ANNOTYPE_NOCFI 9 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index ae83d8649ef1..6829936d33f5 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4891,7 +4891,7 @@ union bpf_attr { * * **-ENOENT** if the bpf_local_storage cannot be found. * - * long bpf_d_path(struct path *path, char *buf, u32 sz) + * long bpf_d_path(const struct path *path, char *buf, u32 sz) * Description * Return full path for given **struct path** object, which * needs to be the kernel BTF *path* object. The path is diff --git a/tools/lib/bpf/libbpf_utils.c b/tools/lib/bpf/libbpf_utils.c index 5d66bc6ff098..ac3beae54cf6 100644 --- a/tools/lib/bpf/libbpf_utils.c +++ b/tools/lib/bpf/libbpf_utils.c @@ -148,16 +148,20 @@ const char *libbpf_errstr(int err) } } -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wpacked" -#pragma GCC diagnostic ignored "-Wattributes" -struct __packed_u32 { __u32 __val; } __attribute__((packed)); -#pragma GCC diagnostic pop - -#define get_unaligned_be32(p) be32_to_cpu((((struct __packed_u32 *)(p))->__val)) -#define put_unaligned_be32(v, p) do { \ - ((struct __packed_u32 *)(p))->__val = cpu_to_be32(v); \ -} while (0) +static inline __u32 get_unaligned_be32(const void *p) +{ + __be32 val; + + memcpy(&val, p, sizeof(val)); + return be32_to_cpu(val); +} + +static inline void put_unaligned_be32(__u32 val, void *p) +{ + __be32 be_val = cpu_to_be32(val); + + memcpy(p, &be_val, sizeof(be_val)); +} #define SHA256_BLOCK_LENGTH 64 #define Ch(x, y, z) (((x) & (y)) ^ (~(x) & (z))) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index a72059fcbc83..a5770570b106 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2392,6 +2392,8 @@ static int __annotate_ifc(struct objtool_file *file, int type, struct instructio static int __annotate_late(struct objtool_file *file, int type, struct instruction *insn) { + struct symbol *sym; + switch (type) { case ANNOTYPE_NOENDBR: /* early */ @@ -2433,6 +2435,15 @@ static int __annotate_late(struct objtool_file *file, int type, struct instructi insn->dead_end = false; break; + case ANNOTYPE_NOCFI: + sym = insn->sym; + if (!sym) { + ERROR_INSN(insn, "dodgy NOCFI annotation"); + return -1; + } + insn->sym->nocfi = 1; + break; + default: ERROR_INSN(insn, "Unknown annotation type: %d", type); return -1; @@ -3994,6 +4005,37 @@ static int validate_retpoline(struct objtool_file *file) warnings++; } + if (!opts.cfi) + return warnings; + + /* + * kCFI call sites look like: + * + * movl $(-0x12345678), %r10d + * addl -4(%r11), %r10d + * jz 1f + * ud2 + * 1: cs call __x86_indirect_thunk_r11 + * + * Verify all indirect calls are kCFI adorned by checking for the + * UD2. Notably, doing __nocfi calls to regular (cfi) functions is + * broken. + */ + list_for_each_entry(insn, &file->retpoline_call_list, call_node) { + struct symbol *sym = insn->sym; + + if (sym && (sym->type == STT_NOTYPE || + sym->type == STT_FUNC) && !sym->nocfi) { + struct instruction *prev = + prev_insn_same_sym(file, insn); + + if (!prev || prev->type != INSN_BUG) { + WARN_INSN(insn, "no-cfi indirect call!"); + warnings++; + } + } + } + return warnings; } diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 0a2fa3ac0079..df8434d3b744 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -70,6 +70,7 @@ struct symbol { u8 local_label : 1; u8 frame_pointer : 1; u8 ignore : 1; + u8 nocfi : 1; struct list_head pv_target; struct reloc *relocs; struct section *group_sec; diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c index 7e25b0e413f6..e697c20951bc 100644 --- a/tools/perf/bench/find-bit-bench.c +++ b/tools/perf/bench/find-bit-bench.c @@ -37,7 +37,7 @@ static noinline void workload(int val) accumulator++; } -#if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__) +#if defined(__i386__) || defined(__x86_64__) static bool asm_test_bit(long nr, const unsigned long *addr) { bool oldbit; diff --git a/tools/testing/selftests/bpf/prog_tests/pinning_htab.c b/tools/testing/selftests/bpf/prog_tests/pinning_htab.c new file mode 100644 index 000000000000..16bd74be3dbe --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/pinning_htab.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "test_pinning_htab.skel.h" + +static void unpin_map(const char *map_name, const char *pin_path) +{ + struct test_pinning_htab *skel; + struct bpf_map *map; + int err; + + skel = test_pinning_htab__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel open_and_load")) + return; + + map = bpf_object__find_map_by_name(skel->obj, map_name); + if (!ASSERT_OK_PTR(map, "bpf_object__find_map_by_name")) + goto out; + + err = bpf_map__pin(map, pin_path); + if (!ASSERT_OK(err, "bpf_map__pin")) + goto out; + + err = bpf_map__unpin(map, pin_path); + ASSERT_OK(err, "bpf_map__unpin"); +out: + test_pinning_htab__destroy(skel); +} + +void test_pinning_htab(void) +{ + if (test__start_subtest("timer_prealloc")) + unpin_map("timer_prealloc", "/sys/fs/bpf/timer_prealloc"); + if (test__start_subtest("timer_no_prealloc")) + unpin_map("timer_no_prealloc", "/sys/fs/bpf/timer_no_prealloc"); +} diff --git a/tools/testing/selftests/bpf/progs/test_pinning_htab.c b/tools/testing/selftests/bpf/progs/test_pinning_htab.c new file mode 100644 index 000000000000..ae227930c73c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_pinning_htab.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct timer_val { + struct bpf_timer timer; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, struct timer_val); + __uint(max_entries, 1); +} timer_prealloc SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, struct timer_val); + __uint(max_entries, 1); + __uint(map_flags, BPF_F_NO_PREALLOC); +} timer_no_prealloc SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c index 3e2d76ee8050..55398c04290a 100644 --- a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c +++ b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c @@ -70,7 +70,7 @@ __success int BPF_PROG(path_d_path_from_file_argument, struct file *file) { int ret; - struct path *path; + const struct path *path; /* The f_path member is a path which is embedded directly within a * file. Therefore, a pointer to such embedded members are still diff --git a/usr/gen_init_cpio.c b/usr/gen_init_cpio.c index 75e9561ba313..b7296edc6626 100644 --- a/usr/gen_init_cpio.c +++ b/usr/gen_init_cpio.c @@ -112,7 +112,10 @@ static int cpio_trailer(void) push_pad(padlen(offset, 512)) < 0) return -1; - return fsync(outfd); + if (fsync(outfd) < 0 && errno != EINVAL) + return -1; + + return 0; } static int cpio_mkslink(const char *name, const char *target, diff --git a/usr/include/headers_check.pl b/usr/include/headers_check.pl index 21c2fb9520e6..1fbc8785f96e 100755 --- a/usr/include/headers_check.pl +++ b/usr/include/headers_check.pl @@ -155,6 +155,8 @@ sub check_sizetypes if (my $included = ($line =~ /^\s*#\s*include\s+[<"](\S+)[>"]/)[0]) { check_include_typesh($included); } + # strip single-line comments, as types may be referenced within them + $line =~ s@/\*.*?\*/@@; if ($line =~ m/__[us](8|16|32|64)\b/) { printf STDERR "$filename:$lineno: " . "found __[us]{8,16,32,64} type " . |