diff options
Diffstat (limited to 'arch')
53 files changed, 365 insertions, 165 deletions
diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h index e201b4b1655a..e8c2c7469e10 100644 --- a/arch/arc/include/asm/cacheflush.h +++ b/arch/arc/include/asm/cacheflush.h @@ -36,6 +36,7 @@ void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 void flush_dcache_page(struct page *page); +void flush_dcache_folio(struct folio *folio); void dma_cache_wback_inv(phys_addr_t start, unsigned long sz); void dma_cache_inv(phys_addr_t start, unsigned long sz); diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 59baf6c132a7..dcf2df6da98f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -92,6 +92,7 @@ config ARM select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG select HAVE_FUNCTION_TRACER if !XIP_KERNEL + select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7) select HAVE_IRQ_TIME_ACCOUNTING diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c index aa075d8372ea..74255e819831 100644 --- a/arch/arm/boot/compressed/decompress.c +++ b/arch/arm/boot/compressed/decompress.c @@ -47,7 +47,10 @@ extern char * strchrnul(const char *, int); #endif #ifdef CONFIG_KERNEL_XZ +/* Prevent KASAN override of string helpers in decompressor */ +#undef memmove #define memmove memmove +#undef memcpy #define memcpy memcpy #include "../../../../lib/decompress_unxz.c" #endif diff --git a/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts b/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts index 8077f1716fbc..ecb91fb899ff 100644 --- a/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts +++ b/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts @@ -112,7 +112,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 5e56288e343b..e68fb879e4f9 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -290,6 +290,7 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr */ #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); +void flush_dcache_folio(struct folio *folio); #define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 static inline void flush_kernel_vmap_range(void *addr, int size) diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 084d1c07c2d0..36fbc3329252 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -176,6 +176,7 @@ extern int __get_user_64t_4(void *); register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ unsigned int __ua_flags = uaccess_save_and_enable(); \ + int __tmp_e; \ switch (sizeof(*(__p))) { \ case 1: \ if (sizeof((x)) >= 8) \ @@ -203,9 +204,10 @@ extern int __get_user_64t_4(void *); break; \ default: __e = __get_user_bad(); break; \ } \ + __tmp_e = __e; \ uaccess_restore(__ua_flags); \ x = (typeof(*(p))) __r2; \ - __e; \ + __tmp_e; \ }) #define get_user(x, p) \ diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 29070eb8df7d..3fc7f9750ce4 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -253,7 +253,7 @@ __create_page_tables: add r0, r4, #KERNEL_OFFSET >> (SECTION_SHIFT - PMD_ORDER) ldr r6, =(_end - 1) adr_l r5, kernel_sec_start @ _pa(kernel_sec_start) -#ifdef CONFIG_CPU_ENDIAN_BE8 +#if defined CONFIG_CPU_ENDIAN_BE8 || defined CONFIG_CPU_ENDIAN_BE32 str r8, [r5, #4] @ Save physical start of kernel (BE) #else str r8, [r5] @ Save physical start of kernel (LE) @@ -266,7 +266,7 @@ __create_page_tables: bls 1b eor r3, r3, r7 @ Remove the MMU flags adr_l r5, kernel_sec_end @ _pa(kernel_sec_end) -#ifdef CONFIG_CPU_ENDIAN_BE8 +#if defined CONFIG_CPU_ENDIAN_BE8 || defined CONFIG_CPU_ENDIAN_BE32 str r3, [r5, #4] @ Save physical end of kernel (BE) #else str r3, [r5] @ Save physical end of kernel (LE) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 4a7edc6e848f..195dff58bafc 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -136,7 +136,7 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, for (p = first, i = 0; i < 8 && p < top; i++, p += 4) { if (p >= bottom && p < top) { unsigned long val; - if (get_kernel_nofault(val, (unsigned long *)p)) + if (!get_kernel_nofault(val, (unsigned long *)p)) sprintf(str + i * 9, " %08lx", val); else sprintf(str + i * 9, " ????????"); diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 50136828f5b5..f14c2360ea0b 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -40,6 +40,10 @@ SECTIONS ARM_DISCARD *(.alt.smp.init) *(.pv_table) +#ifndef CONFIG_ARM_UNWIND + *(.ARM.exidx) *(.ARM.exidx.*) + *(.ARM.extab) *(.ARM.extab.*) +#endif } . = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR); @@ -172,7 +176,7 @@ ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined") ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA") #endif -#ifdef CONFIG_ARM_MPU +#if defined(CONFIG_ARM_MPU) && !defined(CONFIG_COMPILE_TEST) /* * Due to PMSAv7 restriction on base address and size we have to * enforce minimal alignment restrictions. It was seen that weaker diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index e2c743aa2eb2..d9f7dfe2a7ed 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -340,6 +340,7 @@ ENTRY(\name\()_cache_fns) .macro define_tlb_functions name:req, flags_up:req, flags_smp .type \name\()_tlb_fns, #object + .align 2 ENTRY(\name\()_tlb_fns) .long \name\()_flush_user_tlb_range .long \name\()_flush_kern_tlb_range diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index 27e0af78e88b..9d8634e2f12f 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -439,7 +439,7 @@ static struct undef_hook kprobes_arm_break_hook = { #endif /* !CONFIG_THUMB2_KERNEL */ -int __init arch_init_kprobes() +int __init arch_init_kprobes(void) { arm_probes_decode_init(); #ifdef CONFIG_THUMB2_KERNEL diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts index 02f8e72f0cad..05486cccee1c 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts @@ -75,7 +75,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts index d17abb515835..e99e7644ff39 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts @@ -70,7 +70,9 @@ regulator-name = "rst-usb-eth2"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb_eth2>; - gpio = <&gpio3 2 GPIO_ACTIVE_LOW>; + gpio = <&gpio3 2 GPIO_ACTIVE_HIGH>; + enable-active-high; + regulator-always-on; }; reg_vdd_5v: regulator-5v { @@ -95,7 +97,7 @@ clocks = <&osc_can>; interrupt-parent = <&gpio4>; interrupts = <28 IRQ_TYPE_EDGE_FALLING>; - spi-max-frequency = <100000>; + spi-max-frequency = <10000000>; vdd-supply = <®_vdd_3v3>; xceiver-supply = <®_vdd_5v>; }; @@ -111,7 +113,7 @@ &fec1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-rxid"; phy-handle = <ðphy>; status = "okay"; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi index 9db9b90bf2bc..42bbbb3f532b 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi @@ -91,10 +91,12 @@ reg_vdd_soc: BUCK1 { regulator-name = "buck1"; regulator-min-microvolt = <800000>; - regulator-max-microvolt = <900000>; + regulator-max-microvolt = <850000>; regulator-boot-on; regulator-always-on; regulator-ramp-delay = <3125>; + nxp,dvs-run-voltage = <850000>; + nxp,dvs-standby-voltage = <800000>; }; reg_vdd_arm: BUCK2 { @@ -111,7 +113,7 @@ reg_vdd_dram: BUCK3 { regulator-name = "buck3"; regulator-min-microvolt = <850000>; - regulator-max-microvolt = <900000>; + regulator-max-microvolt = <950000>; regulator-boot-on; regulator-always-on; }; @@ -150,7 +152,7 @@ reg_vdd_snvs: LDO2 { regulator-name = "ldo2"; - regulator-min-microvolt = <850000>; + regulator-min-microvolt = <800000>; regulator-max-microvolt = <900000>; regulator-boot-on; regulator-always-on; diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index 8c15d9fed08f..d12e4cbfc852 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -2590,9 +2590,10 @@ power-domains = <&dispcc MDSS_GDSC>; clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>, + <&gcc GCC_DISP_HF_AXI_CLK>, <&gcc GCC_DISP_SF_AXI_CLK>, <&dispcc DISP_CC_MDSS_MDP_CLK>; - clock-names = "iface", "nrt_bus", "core"; + clock-names = "iface", "bus", "nrt_bus", "core"; assigned-clocks = <&dispcc DISP_CC_MDSS_MDP_CLK>; assigned-clock-rates = <460000000>; diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h index fb0f523d1492..0a048dc06a7d 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h +++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h @@ -24,6 +24,7 @@ struct hyp_pool { /* Allocation */ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order); +void hyp_split_page(struct hyp_page *page); void hyp_get_page(struct hyp_pool *pool, void *addr); void hyp_put_page(struct hyp_pool *pool, void *addr); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index bacd493a4eac..34eeb524b686 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -35,7 +35,18 @@ const u8 pkvm_hyp_id = 1; static void *host_s2_zalloc_pages_exact(size_t size) { - return hyp_alloc_pages(&host_s2_pool, get_order(size)); + void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size)); + + hyp_split_page(hyp_virt_to_page(addr)); + + /* + * The size of concatenated PGDs is always a power of two of PAGE_SIZE, + * so there should be no need to free any of the tail pages to make the + * allocation exact. + */ + WARN_ON(size != (PAGE_SIZE << get_order(size))); + + return addr; } static void *host_s2_zalloc_page(void *pool) diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index 41fc25bdfb34..0bd7701ad1df 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -152,6 +152,7 @@ static inline void hyp_page_ref_inc(struct hyp_page *p) static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) { + BUG_ON(!p->refcount); p->refcount--; return (p->refcount == 0); } @@ -193,6 +194,20 @@ void hyp_get_page(struct hyp_pool *pool, void *addr) hyp_spin_unlock(&pool->lock); } +void hyp_split_page(struct hyp_page *p) +{ + unsigned short order = p->order; + unsigned int i; + + p->order = 0; + for (i = 1; i < (1 << order); i++) { + struct hyp_page *tail = p + i; + + tail->order = 0; + hyp_set_page_refcounted(tail); + } +} + void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order) { unsigned short i = order; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 1a94a7ca48f2..69bd1732a299 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1529,8 +1529,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, * when updating the PG_mte_tagged page flag, see * sanitise_mte_tags for more details. */ - if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) - return -EINVAL; + if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) { + ret = -EINVAL; + break; + } if (vma->vm_flags & VM_PFNMAP) { /* IO region dirty page logging not allowed */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 41c23f474ea6..803e7773fa86 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1136,6 +1136,11 @@ out: return prog; } +u64 bpf_jit_alloc_exec_limit(void) +{ + return BPF_JIT_REGION_SIZE; +} + void *bpf_jit_alloc_exec(unsigned long size) { return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, diff --git a/arch/m68k/include/asm/cacheflush_mm.h b/arch/m68k/include/asm/cacheflush_mm.h index 1ac55e7b47f0..8ab46625ddd3 100644 --- a/arch/m68k/include/asm/cacheflush_mm.h +++ b/arch/m68k/include/asm/cacheflush_mm.h @@ -250,6 +250,7 @@ static inline void __flush_page_to_ram(void *vaddr) #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 #define flush_dcache_page(page) __flush_page_to_ram(page_address(page)) +void flush_dcache_folio(struct folio *folio); #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) #define flush_icache_page(vma, page) __flush_page_to_ram(page_address(page)) diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h index b3dc9c589442..f207388541d5 100644 --- a/arch/mips/include/asm/cacheflush.h +++ b/arch/mips/include/asm/cacheflush.h @@ -61,6 +61,8 @@ static inline void flush_dcache_page(struct page *page) SetPageDcacheDirty(page); } +void flush_dcache_folio(struct folio *folio); + #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h index c2a222ebfa2a..3fc0bb7d6487 100644 --- a/arch/nds32/include/asm/cacheflush.h +++ b/arch/nds32/include/asm/cacheflush.h @@ -27,6 +27,7 @@ void flush_cache_vunmap(unsigned long start, unsigned long end); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 void flush_dcache_page(struct page *page); +void flush_dcache_folio(struct folio *folio); void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, void *src, int len); void copy_from_user_page(struct vm_area_struct *vma, struct page *page, diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index 0e23e3a8df6b..d55b73b18149 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -6,7 +6,7 @@ #ifndef CONFIG_DYNAMIC_FTRACE extern void (*ftrace_trace_function)(unsigned long, unsigned long, - struct ftrace_ops*, struct pt_regs*); + struct ftrace_ops*, struct ftrace_regs*); extern void ftrace_graph_caller(void); noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip, diff --git a/arch/nios2/include/asm/cacheflush.h b/arch/nios2/include/asm/cacheflush.h index 18eb9f69f806..1999561b22aa 100644 --- a/arch/nios2/include/asm/cacheflush.h +++ b/arch/nios2/include/asm/cacheflush.h @@ -28,7 +28,8 @@ extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, extern void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 -extern void flush_dcache_page(struct page *page); +void flush_dcache_page(struct page *page); +void flush_dcache_folio(struct folio *folio); extern void flush_icache_range(unsigned long start, unsigned long end); extern void flush_icache_page(struct vm_area_struct *vma, struct page *page); diff --git a/arch/nios2/include/asm/irqflags.h b/arch/nios2/include/asm/irqflags.h index b3ec3e510706..25acf27862f9 100644 --- a/arch/nios2/include/asm/irqflags.h +++ b/arch/nios2/include/asm/irqflags.h @@ -9,7 +9,7 @@ static inline unsigned long arch_local_save_flags(void) { - return RDCTL(CTL_STATUS); + return RDCTL(CTL_FSTATUS); } /* @@ -18,7 +18,7 @@ static inline unsigned long arch_local_save_flags(void) */ static inline void arch_local_irq_restore(unsigned long flags) { - WRCTL(CTL_STATUS, flags); + WRCTL(CTL_FSTATUS, flags); } static inline void arch_local_irq_disable(void) diff --git a/arch/nios2/include/asm/registers.h b/arch/nios2/include/asm/registers.h index 183c720e454d..95b67dd16f81 100644 --- a/arch/nios2/include/asm/registers.h +++ b/arch/nios2/include/asm/registers.h @@ -11,7 +11,7 @@ #endif /* control register numbers */ -#define CTL_STATUS 0 +#define CTL_FSTATUS 0 #define CTL_ESTATUS 1 #define CTL_BSTATUS 2 #define CTL_IENABLE 3 diff --git a/arch/nios2/platform/Kconfig.platform b/arch/nios2/platform/Kconfig.platform index 9e32fb7f3d4c..e849daff6fd1 100644 --- a/arch/nios2/platform/Kconfig.platform +++ b/arch/nios2/platform/Kconfig.platform @@ -37,6 +37,7 @@ config NIOS2_DTB_PHYS_ADDR config NIOS2_DTB_SOURCE_BOOL bool "Compile and link device tree into kernel image" + depends on !COMPILE_TEST help This allows you to specify a dts (device tree source) file which will be compiled and linked into the kernel image. diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index eef0096db5f8..da0cd4b3a28f 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -49,7 +49,8 @@ void invalidate_kernel_vmap_range(void *vaddr, int size); #define flush_cache_vunmap(start, end) flush_cache_all() #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 -extern void flush_dcache_page(struct page *page); +void flush_dcache_page(struct page *page); +void flush_dcache_folio(struct folio *folio); #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages) #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index abb719b21cae..3d97fb833834 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -126,14 +126,16 @@ _GLOBAL(idle_return_gpr_loss) /* * This is the sequence required to execute idle instructions, as * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0. - * - * The 0(r1) slot is used to save r2 in isa206, so use that here. + * We have to store a GPR somewhere, ptesync, then reload it, and create + * a false dependency on the result of the load. It doesn't matter which + * GPR we store, or where we store it. We have already stored r2 to the + * stack at -8(r1) in isa206_idle_insn_mayloss, so use that. */ #define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ - std r2,0(r1); \ + std r2,-8(r1); \ ptesync; \ - ld r2,0(r1); \ + ld r2,-8(r1); \ 236: cmpd cr0,r2,r2; \ bne 236b; \ IDLE_INST; \ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9cc7d3dbf439..605bab448f84 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1730,8 +1730,6 @@ void __cpu_die(unsigned int cpu) void arch_cpu_idle_dead(void) { - sched_preempt_enable_no_resched(); - /* * Disable on the down path. This will be re-enabled by * start_secondary() via start_secondary_resume() below diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index dab5c56ffd0e..a52af8fbf571 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1302,6 +1302,12 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) struct property *default_win; int reset_win_ext; + /* DDW + IOMMU on single window may fail if there is any allocation */ + if (iommu_table_in_use(tbl)) { + dev_warn(&dev->dev, "current IOMMU table in use, can't be replaced.\n"); + goto out_failed; + } + default_win = of_find_property(pdn, "ibm,dma-window", NULL); if (!default_win) goto out_failed; @@ -1356,12 +1362,6 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) query.largest_available_block, 1ULL << page_shift); - /* DDW + IOMMU on single window may fail if there is any allocation */ - if (default_win_removed && iommu_table_in_use(tbl)) { - dev_dbg(&dev->dev, "current IOMMU table in use, can't be replaced.\n"); - goto out_failed; - } - len = order_base_2(query.largest_available_block << page_shift); win_name = DMA64_PROPNAME; } else { @@ -1411,18 +1411,19 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) } else { struct iommu_table *newtbl; int i; + unsigned long start = 0, end = 0; for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) { const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM; /* Look for MMIO32 */ - if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM) + if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM) { + start = pci->phb->mem_resources[i].start; + end = pci->phb->mem_resources[i].end; break; + } } - if (i == ARRAY_SIZE(pci->phb->mem_resources)) - goto out_del_list; - /* New table for using DDW instead of the default DMA window */ newtbl = iommu_pseries_alloc_table(pci->phb->node); if (!newtbl) { @@ -1432,15 +1433,15 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, win_addr, 1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops); - iommu_init_table(newtbl, pci->phb->node, pci->phb->mem_resources[i].start, - pci->phb->mem_resources[i].end); + iommu_init_table(newtbl, pci->phb->node, start, end); pci->table_group->tables[1] = newtbl; /* Keep default DMA window stuct if removed */ if (default_win_removed) { tbl->it_size = 0; - kfree(tbl->it_map); + vfree(tbl->it_map); + tbl->it_map = NULL; } set_iommu_table_base(&dev->dev, newtbl); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 6a6fa9e976d5..f076cee11af6 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -163,6 +163,12 @@ config PAGE_OFFSET default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB +config KASAN_SHADOW_OFFSET + hex + depends on KASAN_GENERIC + default 0xdfffffc800000000 if 64BIT + default 0xffffffff if 32BIT + config ARCH_FLATMEM_ENABLE def_bool !NUMA diff --git a/arch/riscv/include/asm/kasan.h b/arch/riscv/include/asm/kasan.h index a2b3d9cdbc86..b00f503ec124 100644 --- a/arch/riscv/include/asm/kasan.h +++ b/arch/riscv/include/asm/kasan.h @@ -30,8 +30,7 @@ #define KASAN_SHADOW_SIZE (UL(1) << ((CONFIG_VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT)) #define KASAN_SHADOW_START KERN_VIRT_START #define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) -#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << \ - (64 - KASAN_SHADOW_SCALE_SHIFT))) +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) void kasan_init(void); asmlinkage void kasan_early_init(void); diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index fce5184b22c3..52c5ff9804c5 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -193,6 +193,7 @@ setup_trap_vector: csrw CSR_SCRATCH, zero ret +.align 2 .Lsecondary_park: /* We lack SMP support or have too many harts, so park this hart */ wfi diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index d7189c8714a9..54294f83513d 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -17,6 +17,9 @@ asmlinkage void __init kasan_early_init(void) uintptr_t i; pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START); + BUILD_BUG_ON(KASAN_SHADOW_OFFSET != + KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT))); + for (i = 0; i < PTRS_PER_PTE; ++i) set_pte(kasan_early_shadow_pte + i, mk_pte(virt_to_page(kasan_early_shadow_page), @@ -172,21 +175,10 @@ void __init kasan_init(void) phys_addr_t p_start, p_end; u64 i; - /* - * Populate all kernel virtual address space with kasan_early_shadow_page - * except for the linear mapping and the modules/kernel/BPF mapping. - */ - kasan_populate_early_shadow((void *)KASAN_SHADOW_START, - (void *)kasan_mem_to_shadow((void *) - VMEMMAP_END)); if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) kasan_shallow_populate( (void *)kasan_mem_to_shadow((void *)VMALLOC_START), (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); - else - kasan_populate_early_shadow( - (void *)kasan_mem_to_shadow((void *)VMALLOC_START), - (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); /* Populate the linear mapping */ for_each_mem_range(i, &p_start, &p_end) { diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index fed86f42dfbe..753d85bdfad0 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -125,7 +125,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) if (i == NR_JIT_ITERATIONS) { pr_err("bpf-jit: image did not converge in <%d passes!\n", i); - bpf_jit_binary_free(jit_data->header); + if (jit_data->header) + bpf_jit_binary_free(jit_data->header); prog = orig_prog; goto out_offset; } @@ -166,6 +167,11 @@ out: return prog; } +u64 bpf_jit_alloc_exec_limit(void) +{ + return BPF_JIT_REGION_SIZE; +} + void *bpf_jit_alloc_exec(unsigned long size) { return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index b9f85b2dc053..6af59c59cc1b 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -894,6 +894,11 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, /** * guest_translate_address - translate guest logical into guest absolute address + * @vcpu: virtual cpu + * @gva: Guest virtual address + * @ar: Access register + * @gpa: Guest physical address + * @mode: Translation access mode * * Parameter semantics are the same as the ones from guest_translate. * The memory contents at the guest address are not changed. @@ -934,6 +939,11 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, /** * check_gva_range - test a range of guest virtual addresses for accessibility + * @vcpu: virtual cpu + * @gva: Guest virtual address + * @ar: Access register + * @length: Length of test range + * @mode: Translation access mode */ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, unsigned long length, enum gacc_mode mode) @@ -956,6 +966,7 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, /** * kvm_s390_check_low_addr_prot_real - check for low-address protection + * @vcpu: virtual cpu * @gra: Guest real address * * Checks whether an address is subject to low-address protection and set @@ -979,6 +990,7 @@ int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra) * @pgt: pointer to the beginning of the page table for the given address if * successful (return value 0), or to the first invalid DAT entry in * case of exceptions (return value > 0) + * @dat_protection: referenced memory is write protected * @fake: pgt references contiguous guest memory block, not a pgtable */ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 72b25b7cc6ae..2bd8f854f1b4 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -269,6 +269,7 @@ static int handle_prog(struct kvm_vcpu *vcpu) /** * handle_external_interrupt - used for external interruption interceptions + * @vcpu: virtual cpu * * This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if * the new PSW does not have external interrupts disabled. In the first case, @@ -315,7 +316,8 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu) } /** - * Handle MOVE PAGE partial execution interception. + * handle_mvpg_pei - Handle MOVE PAGE partial execution interception. + * @vcpu: virtual cpu * * This interception can only happen for guests with DAT disabled and * addresses that are currently not mapped in the host. Thus we try to diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 10722455fd02..2245f4b8d362 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -3053,13 +3053,14 @@ static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask) int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus); struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; struct kvm_vcpu *vcpu; + u8 vcpu_isc_mask; for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) { vcpu = kvm_get_vcpu(kvm, vcpu_idx); if (psw_ioint_disabled(vcpu)) continue; - deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24); - if (deliverable_mask) { + vcpu_isc_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24); + if (deliverable_mask & vcpu_isc_mask) { /* lately kicked but not yet running */ if (test_and_set_bit(vcpu_idx, gi->kicked_mask)) return; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6a6dd5e1daf6..1c97493d21e1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3363,6 +3363,7 @@ out_free_sie_block: int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { + clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); return kvm_s390_vcpu_has_irq(vcpu, 0); } diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h index 372afa82fee6..c7a97f32432f 100644 --- a/arch/sh/include/asm/cacheflush.h +++ b/arch/sh/include/asm/cacheflush.h @@ -42,7 +42,8 @@ extern void flush_cache_page(struct vm_area_struct *vma, extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 -extern void flush_dcache_page(struct page *page); +void flush_dcache_page(struct page *page); +void flush_dcache_folio(struct folio *folio); extern void flush_icache_range(unsigned long start, unsigned long end); #define flush_icache_user_range flush_icache_range extern void flush_icache_page(struct vm_area_struct *vma, diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S index 18d2f5199194..1cc72b4804fa 100644 --- a/arch/x86/crypto/sm4-aesni-avx-asm_64.S +++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S @@ -78,7 +78,7 @@ vpxor tmp0, x, x; -.section .rodata.cst164, "aM", @progbits, 164 +.section .rodata.cst16, "aM", @progbits, 16 .align 16 /* @@ -133,6 +133,10 @@ .L0f0f0f0f: .long 0x0f0f0f0f +/* 12 bytes, only for padding */ +.Lpadding_deadbeef: + .long 0xdeadbeef, 0xdeadbeef, 0xdeadbeef + .text .align 16 diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S index d2ffd7f76ee2..9c5d3f3ad45a 100644 --- a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S @@ -93,7 +93,7 @@ vpxor tmp0, x, x; -.section .rodata.cst164, "aM", @progbits, 164 +.section .rodata.cst16, "aM", @progbits, 16 .align 16 /* @@ -148,6 +148,10 @@ .L0f0f0f0f: .long 0x0f0f0f0f +/* 12 bytes, only for padding */ +.Lpadding_deadbeef: + .long 0xdeadbeef, 0xdeadbeef, 0xdeadbeef + .text .align 16 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f8f48a7ec577..13f64654dfff 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -702,7 +702,8 @@ struct kvm_vcpu_arch { struct kvm_pio_request pio; void *pio_data; - void *guest_ins_data; + void *sev_pio_data; + unsigned sev_pio_count; u8 event_exit_inst_len; @@ -1097,7 +1098,7 @@ struct kvm_arch { u64 cur_tsc_generation; int nr_vcpus_matched_tsc; - spinlock_t pvclock_gtod_sync_lock; + raw_spinlock_t pvclock_gtod_sync_lock; bool use_master_clock; u64 master_kernel_ns; u64 master_cycle_now; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 76fb00921203..d6ac32f3f650 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2321,13 +2321,14 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_apicv); void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) { struct kvm_lapic *apic = vcpu->arch.apic; + u64 msr_val; int i; if (!init_event) { - vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE | - MSR_IA32_APICBASE_ENABLE; + msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; if (kvm_vcpu_is_reset_bsp(vcpu)) - vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; + msr_val |= MSR_IA32_APICBASE_BSP; + kvm_lapic_set_base(vcpu, msr_val); } if (!apic) @@ -2336,11 +2337,9 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) /* Stop the timer in case it's a reset to an active apic */ hrtimer_cancel(&apic->lapic_timer.timer); - if (!init_event) { - apic->base_address = APIC_DEFAULT_PHYS_BASE; - + /* The xAPIC ID is set at RESET even if the APIC was already enabled. */ + if (!init_event) kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); - } kvm_apic_set_version(apic->vcpu); for (i = 0; i < KVM_APIC_LVT_NUM; i++) @@ -2481,6 +2480,11 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) lapic_timer_advance_dynamic = false; } + /* + * Stuff the APIC ENABLE bit in lieu of temporarily incrementing + * apic_hw_disabled; the full RESET value is set by kvm_lapic_reset(). + */ + vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */ kvm_iodevice_init(&apic->dev, &apic_mmio_ops); @@ -2942,5 +2946,7 @@ int kvm_apic_accept_events(struct kvm_vcpu *vcpu) void kvm_lapic_exit(void) { static_key_deferred_flush(&apic_hw_disabled); + WARN_ON(static_branch_unlikely(&apic_hw_disabled.key)); static_key_deferred_flush(&apic_sw_disabled); + WARN_ON(static_branch_unlikely(&apic_sw_disabled.key)); } diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 1a64ba5b9437..0cc58901bf7a 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4596,10 +4596,10 @@ static void update_pkru_bitmask(struct kvm_mmu *mmu) unsigned bit; bool wp; - if (!is_cr4_pke(mmu)) { - mmu->pkru_mask = 0; + mmu->pkru_mask = 0; + + if (!is_cr4_pke(mmu)) return; - } wp = is_cr0_wp(mmu); diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c36b5fe4c27c..7e34d7163ada 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -618,7 +618,12 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, vmsa.handle = to_kvm_svm(kvm)->sev_info.handle; vmsa.address = __sme_pa(svm->vmsa); vmsa.len = PAGE_SIZE; - return sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error); + ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error); + if (ret) + return ret; + + vcpu->arch.guest_state_protected = true; + return 0; } static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) @@ -1479,6 +1484,13 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) goto e_free_trans; } + /* + * Flush (on non-coherent CPUs) before RECEIVE_UPDATE_DATA, the PSP + * encrypts the written data with the guest's key, and the cache may + * contain dirty, unencrypted data. + */ + sev_clflush_pages(guest_page, n); + /* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */ data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset; data.guest_address |= sev_me_mask; @@ -2579,11 +2591,20 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) { - if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2)) + int count; + int bytes; + + if (svm->vmcb->control.exit_info_2 > INT_MAX) + return -EINVAL; + + count = svm->vmcb->control.exit_info_2; + if (unlikely(check_mul_overflow(count, size, &bytes))) + return -EINVAL; + + if (!setup_vmgexit_scratch(svm, in, bytes)) return -EINVAL; - return kvm_sev_es_string_io(&svm->vcpu, size, port, - svm->ghcb_sa, svm->ghcb_sa_len, in); + return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, count, in); } void sev_es_init_vmcb(struct vcpu_svm *svm) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 128a54b1fbf1..5d30db599e10 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -191,7 +191,7 @@ struct vcpu_svm { /* SEV-ES scratch area support */ void *ghcb_sa; - u64 ghcb_sa_len; + u32 ghcb_sa_len; bool ghcb_sa_sync; bool ghcb_sa_free; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 116b08904ac3..7d595effb66f 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5562,9 +5562,13 @@ static int handle_encls(struct kvm_vcpu *vcpu) static int handle_bus_lock_vmexit(struct kvm_vcpu *vcpu) { - vcpu->run->exit_reason = KVM_EXIT_X86_BUS_LOCK; - vcpu->run->flags |= KVM_RUN_X86_BUS_LOCK; - return 0; + /* + * Hardware may or may not set the BUS_LOCK_DETECTED flag on BUS_LOCK + * VM-Exits. Unconditionally set the flag here and leave the handling to + * vmx_handle_exit(). + */ + to_vmx(vcpu)->exit_reason.bus_lock_detected = true; + return 1; } /* @@ -6051,9 +6055,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) int ret = __vmx_handle_exit(vcpu, exit_fastpath); /* - * Even when current exit reason is handled by KVM internally, we - * still need to exit to user space when bus lock detected to inform - * that there is a bus lock in guest. + * Exit to user space when bus lock detected to inform that there is + * a bus lock in guest. */ if (to_vmx(vcpu)->exit_reason.bus_lock_detected) { if (ret > 0) @@ -6302,18 +6305,13 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) /* * If we are running L2 and L1 has a new pending interrupt - * which can be injected, we should re-evaluate - * what should be done with this new L1 interrupt. - * If L1 intercepts external-interrupts, we should - * exit from L2 to L1. Otherwise, interrupt should be - * delivered directly to L2. + * which can be injected, this may cause a vmexit or it may + * be injected into L2. Either way, this interrupt will be + * processed via KVM_REQ_EVENT, not RVI, because we do not use + * virtual interrupt delivery to inject L1 interrupts into L2. */ - if (is_guest_mode(vcpu) && max_irr_updated) { - if (nested_exit_on_intr(vcpu)) - kvm_vcpu_exiting_guest_mode(vcpu); - else - kvm_make_request(KVM_REQ_EVENT, vcpu); - } + if (is_guest_mode(vcpu) && max_irr_updated) + kvm_make_request(KVM_REQ_EVENT, vcpu); } else { max_irr = kvm_lapic_find_highest_irr(vcpu); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index aabd3a2ec1bc..bfe0de3008a6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2542,7 +2542,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data) kvm_vcpu_write_tsc_offset(vcpu, offset); raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); - spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags); + raw_spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags); if (!matched) { kvm->arch.nr_vcpus_matched_tsc = 0; } else if (!already_matched) { @@ -2550,7 +2550,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data) } kvm_track_tsc_matching(vcpu); - spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags); + raw_spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags); } static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, @@ -2780,9 +2780,9 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) kvm_make_mclock_inprogress_request(kvm); /* no guest entries from this point */ - spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); pvclock_update_vm_gtod_copy(kvm); - spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); kvm_for_each_vcpu(i, vcpu, kvm) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); @@ -2800,15 +2800,15 @@ u64 get_kvmclock_ns(struct kvm *kvm) unsigned long flags; u64 ret; - spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); if (!ka->use_master_clock) { - spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); return get_kvmclock_base_ns() + ka->kvmclock_offset; } hv_clock.tsc_timestamp = ka->master_cycle_now; hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset; - spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); /* both __this_cpu_read() and rdtsc() should be on the same cpu */ get_cpu(); @@ -2902,13 +2902,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) * If the host uses TSC clock, then passthrough TSC as stable * to the guest. */ - spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); use_master_clock = ka->use_master_clock; if (use_master_clock) { host_tsc = ka->master_cycle_now; kernel_ns = ka->master_kernel_ns; } - spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); @@ -6100,13 +6100,13 @@ set_pit2_out: * is slightly ahead) here we risk going negative on unsigned * 'system_time' when 'user_ns.clock' is very small. */ - spin_lock_irq(&ka->pvclock_gtod_sync_lock); + raw_spin_lock_irq(&ka->pvclock_gtod_sync_lock); if (kvm->arch.use_master_clock) now_ns = ka->master_kernel_ns; else now_ns = get_kvmclock_base_ns(); ka->kvmclock_offset = user_ns.clock - now_ns; - spin_unlock_irq(&ka->pvclock_gtod_sync_lock); + raw_spin_unlock_irq(&ka->pvclock_gtod_sync_lock); kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE); break; @@ -6906,7 +6906,7 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) } static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size, - unsigned short port, void *val, + unsigned short port, unsigned int count, bool in) { vcpu->arch.pio.port = port; @@ -6914,10 +6914,8 @@ static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size, vcpu->arch.pio.count = count; vcpu->arch.pio.size = size; - if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { - vcpu->arch.pio.count = 0; + if (!kernel_pio(vcpu, vcpu->arch.pio_data)) return 1; - } vcpu->run->exit_reason = KVM_EXIT_IO; vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; @@ -6929,26 +6927,39 @@ static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size, return 0; } -static int emulator_pio_in(struct kvm_vcpu *vcpu, int size, - unsigned short port, void *val, unsigned int count) +static int __emulator_pio_in(struct kvm_vcpu *vcpu, int size, + unsigned short port, unsigned int count) { - int ret; + WARN_ON(vcpu->arch.pio.count); + memset(vcpu->arch.pio_data, 0, size * count); + return emulator_pio_in_out(vcpu, size, port, count, true); +} - if (vcpu->arch.pio.count) - goto data_avail; +static void complete_emulator_pio_in(struct kvm_vcpu *vcpu, void *val) +{ + int size = vcpu->arch.pio.size; + unsigned count = vcpu->arch.pio.count; + memcpy(val, vcpu->arch.pio_data, size * count); + trace_kvm_pio(KVM_PIO_IN, vcpu->arch.pio.port, size, count, vcpu->arch.pio_data); + vcpu->arch.pio.count = 0; +} - memset(vcpu->arch.pio_data, 0, size * count); +static int emulator_pio_in(struct kvm_vcpu *vcpu, int size, + unsigned short port, void *val, unsigned int count) +{ + if (vcpu->arch.pio.count) { + /* Complete previous iteration. */ + } else { + int r = __emulator_pio_in(vcpu, size, port, count); + if (!r) + return r; - ret = emulator_pio_in_out(vcpu, size, port, val, count, true); - if (ret) { -data_avail: - memcpy(val, vcpu->arch.pio_data, size * count); - trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data); - vcpu->arch.pio.count = 0; - return 1; + /* Results already available, fall through. */ } - return 0; + WARN_ON(count != vcpu->arch.pio.count); + complete_emulator_pio_in(vcpu, val); + return 1; } static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, @@ -6963,9 +6974,15 @@ static int emulator_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port, const void *val, unsigned int count) { + int ret; + memcpy(vcpu->arch.pio_data, val, size * count); trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data); - return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false); + ret = emulator_pio_in_out(vcpu, size, port, count, false); + if (ret) + vcpu->arch.pio.count = 0; + + return ret; } static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, @@ -8139,9 +8156,9 @@ static void kvm_hyperv_tsc_notifier(void) list_for_each_entry(kvm, &vm_list, vm_list) { struct kvm_arch *ka = &kvm->arch; - spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); pvclock_update_vm_gtod_copy(kvm); - spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); + raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); kvm_for_each_vcpu(cpu, vcpu, kvm) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); @@ -8783,9 +8800,17 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); + + /* + * The call to kvm_ready_for_interrupt_injection() may end up in + * kvm_xen_has_interrupt() which may require the srcu lock to be + * held, to protect against changes in the vcpu_info address. + */ + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); kvm_run->ready_for_interrupt_injection = pic_in_kernel(vcpu->kvm) || kvm_vcpu_ready_for_interrupt_injection(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (is_smm(vcpu)) kvm_run->flags |= KVM_RUN_X86_SMM; @@ -9643,14 +9668,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST)) break; - if (unlikely(kvm_vcpu_exit_request(vcpu))) { + if (vcpu->arch.apicv_active) + static_call(kvm_x86_sync_pir_to_irr)(vcpu); + + if (unlikely(kvm_vcpu_exit_request(vcpu))) { exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED; break; } - - if (vcpu->arch.apicv_active) - static_call(kvm_x86_sync_pir_to_irr)(vcpu); - } + } /* * Do this here before restoring debug registers on the host. And @@ -11182,7 +11207,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) raw_spin_lock_init(&kvm->arch.tsc_write_lock); mutex_init(&kvm->arch.apic_map_lock); - spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); + raw_spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); kvm->arch.kvmclock_offset = -get_kvmclock_base_ns(); pvclock_update_vm_gtod_copy(kvm); @@ -11392,7 +11417,8 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot, int level = i + 1; int lpages = __kvm_mmu_slot_lpages(slot, npages, level); - WARN_ON(slot->arch.rmap[i]); + if (slot->arch.rmap[i]) + continue; slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT); if (!slot->arch.rmap[i]) { @@ -12367,44 +12393,81 @@ int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, } EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read); -static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu) +static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size, + unsigned int port); + +static int complete_sev_es_emulated_outs(struct kvm_vcpu *vcpu) { - memcpy(vcpu->arch.guest_ins_data, vcpu->arch.pio_data, - vcpu->arch.pio.count * vcpu->arch.pio.size); - vcpu->arch.pio.count = 0; + int size = vcpu->arch.pio.size; + int port = vcpu->arch.pio.port; + vcpu->arch.pio.count = 0; + if (vcpu->arch.sev_pio_count) + return kvm_sev_es_outs(vcpu, size, port); return 1; } static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size, - unsigned int port, void *data, unsigned int count) + unsigned int port) { - int ret; - - ret = emulator_pio_out_emulated(vcpu->arch.emulate_ctxt, size, port, - data, count); - if (ret) - return ret; + for (;;) { + unsigned int count = + min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count); + int ret = emulator_pio_out(vcpu, size, port, vcpu->arch.sev_pio_data, count); + + /* memcpy done already by emulator_pio_out. */ + vcpu->arch.sev_pio_count -= count; + vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size; + if (!ret) + break; - vcpu->arch.pio.count = 0; + /* Emulation done by the kernel. */ + if (!vcpu->arch.sev_pio_count) + return 1; + } + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_outs; return 0; } static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size, - unsigned int port, void *data, unsigned int count) + unsigned int port); + +static void advance_sev_es_emulated_ins(struct kvm_vcpu *vcpu) { - int ret; + unsigned count = vcpu->arch.pio.count; + complete_emulator_pio_in(vcpu, vcpu->arch.sev_pio_data); + vcpu->arch.sev_pio_count -= count; + vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size; +} - ret = emulator_pio_in_emulated(vcpu->arch.emulate_ctxt, size, port, - data, count); - if (ret) { - vcpu->arch.pio.count = 0; - } else { - vcpu->arch.guest_ins_data = data; - vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins; +static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu) +{ + int size = vcpu->arch.pio.size; + int port = vcpu->arch.pio.port; + + advance_sev_es_emulated_ins(vcpu); + if (vcpu->arch.sev_pio_count) + return kvm_sev_es_ins(vcpu, size, port); + return 1; +} + +static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size, + unsigned int port) +{ + for (;;) { + unsigned int count = + min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count); + if (!__emulator_pio_in(vcpu, size, port, count)) + break; + + /* Emulation done by the kernel. */ + advance_sev_es_emulated_ins(vcpu); + if (!vcpu->arch.sev_pio_count) + return 1; } + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins; return 0; } @@ -12412,8 +12475,10 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size, unsigned int port, void *data, unsigned int count, int in) { - return in ? kvm_sev_es_ins(vcpu, size, port, data, count) - : kvm_sev_es_outs(vcpu, size, port, data, count); + vcpu->arch.sev_pio_data = data; + vcpu->arch.sev_pio_count = count; + return in ? kvm_sev_es_ins(vcpu, size, port) + : kvm_sev_es_outs(vcpu, size, port); } EXPORT_SYMBOL_GPL(kvm_sev_es_string_io); diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 9ea9c3dabe37..8f62baebd028 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -190,6 +190,7 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) int __kvm_xen_has_interrupt(struct kvm_vcpu *v) { + int err; u8 rc = 0; /* @@ -216,13 +217,29 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) if (likely(slots->generation == ghc->generation && !kvm_is_error_hva(ghc->hva) && ghc->memslot)) { /* Fast path */ - __get_user(rc, (u8 __user *)ghc->hva + offset); - } else { - /* Slow path */ - kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset, - sizeof(rc)); + pagefault_disable(); + err = __get_user(rc, (u8 __user *)ghc->hva + offset); + pagefault_enable(); + if (!err) + return rc; } + /* Slow path */ + + /* + * This function gets called from kvm_vcpu_block() after setting the + * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately + * from a HLT. So we really mustn't sleep. If the page ended up absent + * at that point, just return 1 in order to trigger an immediate wake, + * and we'll end up getting called again from a context where we *can* + * fault in the page and wait for it. + */ + if (in_atomic() || !task_is_running(current)) + return 1; + + kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset, + sizeof(rc)); + return rc; } diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h index cf907e5bf2f2..a8a041609c5d 100644 --- a/arch/xtensa/include/asm/cacheflush.h +++ b/arch/xtensa/include/asm/cacheflush.h @@ -120,7 +120,8 @@ void flush_cache_page(struct vm_area_struct*, #define flush_cache_vunmap(start,end) flush_cache_all() #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 -extern void flush_dcache_page(struct page*); +void flush_dcache_page(struct page *); +void flush_dcache_folio(struct folio *); void local_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -137,7 +138,9 @@ void local_flush_cache_page(struct vm_area_struct *vma, #define flush_cache_vunmap(start,end) do { } while (0) #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO #define flush_dcache_page(page) do { } while (0) +static inline void flush_dcache_folio(struct folio *folio) { } #define flush_icache_range local_flush_icache_range #define flush_cache_page(vma, addr, pfn) do { } while (0) |