diff options
Diffstat (limited to 'arch')
272 files changed, 3945 insertions, 1810 deletions
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 3162db540ee9..1b0483c51cc1 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -49,7 +49,6 @@ config ARC select OF select OF_EARLY_FLATTREE select PCI_SYSCALL if PCI - select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32 select TRACE_IRQFLAGS_SUPPORT @@ -232,10 +231,6 @@ config ARC_CACHE_PAGES Note that Global I/D ENABLE + Per Page DISABLE works but corollary Global DISABLE + Per Page ENABLE won't work -config ARC_CACHE_VIPT_ALIASING - bool "Support VIPT Aliasing D$" - depends on ARC_HAS_DCACHE && ISA_ARCOMPACT - endif #ARC_CACHE config ARC_HAS_ICCM diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h index bd5b1a9a0544..563af3e75f01 100644 --- a/arch/arc/include/asm/cacheflush.h +++ b/arch/arc/include/asm/cacheflush.h @@ -44,31 +44,10 @@ void dma_cache_wback(phys_addr_t start, unsigned long sz); #define flush_cache_dup_mm(mm) /* called on fork (VIVT only) */ -#ifndef CONFIG_ARC_CACHE_VIPT_ALIASING - #define flush_cache_mm(mm) /* called on munmap/exit */ #define flush_cache_range(mm, u_vstart, u_vend) #define flush_cache_page(vma, u_vaddr, pfn) /* PF handling/COW-break */ -#else /* VIPT aliasing dcache */ - -/* To clear out stale userspace mappings */ -void flush_cache_mm(struct mm_struct *mm); -void flush_cache_range(struct vm_area_struct *vma, - unsigned long start,unsigned long end); -void flush_cache_page(struct vm_area_struct *vma, - unsigned long user_addr, unsigned long page); - -/* - * To make sure that userspace mapping is flushed to memory before - * get_user_pages() uses a kernel mapping to access the page - */ -#define ARCH_HAS_FLUSH_ANON_PAGE -void flush_anon_page(struct vm_area_struct *vma, - struct page *page, unsigned long u_vaddr); - -#endif /* CONFIG_ARC_CACHE_VIPT_ALIASING */ - /* * A new pagecache page has PG_arch_1 clear - thus dcache dirty by default * This works around some PIO based drivers which don't call flush_dcache_page @@ -76,28 +55,6 @@ void flush_anon_page(struct vm_area_struct *vma, */ #define PG_dc_clean PG_arch_1 -#define CACHE_COLORS_NUM 4 -#define CACHE_COLORS_MSK (CACHE_COLORS_NUM - 1) -#define CACHE_COLOR(addr) (((unsigned long)(addr) >> (PAGE_SHIFT)) & CACHE_COLORS_MSK) - -/* - * Simple wrapper over config option - * Bootup code ensures that hardware matches kernel configuration - */ -static inline int cache_is_vipt_aliasing(void) -{ - return IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); -} - -/* - * checks if two addresses (after page aligning) index into same cache set - */ -#define addr_not_cache_congruent(addr1, addr2) \ -({ \ - cache_is_vipt_aliasing() ? \ - (CACHE_COLOR(addr1) != CACHE_COLOR(addr2)) : 0; \ -}) - #define copy_to_user_page(vma, page, vaddr, dst, src, len) \ do { \ memcpy(dst, src, len); \ diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h index 4d13320e0c1b..3802a2daaf86 100644 --- a/arch/arc/include/asm/entry-arcv2.h +++ b/arch/arc/include/asm/entry-arcv2.h @@ -291,4 +291,36 @@ /* M = 8-1 N = 8 */ .endm +.macro SAVE_ABI_CALLEE_REGS + push r13 + push r14 + push r15 + push r16 + push r17 + push r18 + push r19 + push r20 + push r21 + push r22 + push r23 + push r24 + push r25 +.endm + +.macro RESTORE_ABI_CALLEE_REGS + pop r25 + pop r24 + pop r23 + pop r22 + pop r21 + pop r20 + pop r19 + pop r18 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 +.endm + #endif diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index a0e760eb35a8..92c3e9f13252 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -33,6 +33,91 @@ #include <asm/irqflags-compact.h> #include <asm/thread_info.h> /* For THREAD_SIZE */ +/* Note on the LD/ST addr modes with addr reg wback + * + * LD.a same as LD.aw + * + * LD.a reg1, [reg2, x] => Pre Incr + * Eff Addr for load = [reg2 + x] + * + * LD.ab reg1, [reg2, x] => Post Incr + * Eff Addr for load = [reg2] + */ + +.macro PUSHAX aux + lr r9, [\aux] + push r9 +.endm + +.macro POPAX aux + pop r9 + sr r9, [\aux] +.endm + +.macro SAVE_R0_TO_R12 + push r0 + push r1 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 +.endm + +.macro RESTORE_R12_TO_R0 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r1 + pop r0 +.endm + +.macro SAVE_ABI_CALLEE_REGS + push r13 + push r14 + push r15 + push r16 + push r17 + push r18 + push r19 + push r20 + push r21 + push r22 + push r23 + push r24 + push r25 +.endm + +.macro RESTORE_ABI_CALLEE_REGS + pop r25 + pop r24 + pop r23 + pop r22 + pop r21 + pop r20 + pop r19 + pop r18 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 +.endm + /*-------------------------------------------------------------- * Switch to Kernel Mode stack if SP points to User Mode stack * @@ -235,7 +320,7 @@ SWITCH_TO_KERNEL_STK - PUSH 0x003\LVL\()abcd /* Dummy ECR */ + st.a 0x003\LVL\()abcd, [sp, -4] /* Dummy ECR */ sub sp, sp, 8 /* skip orig_r0 (not needed) skip pt_regs->sp, already saved above */ diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index 49c2e090cb5c..cf1ba376e992 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -21,114 +21,12 @@ #include <asm/entry-arcv2.h> #endif -/* Note on the LD/ST addr modes with addr reg wback - * - * LD.a same as LD.aw - * - * LD.a reg1, [reg2, x] => Pre Incr - * Eff Addr for load = [reg2 + x] - * - * LD.ab reg1, [reg2, x] => Post Incr - * Eff Addr for load = [reg2] - */ - -.macro PUSH reg - st.a \reg, [sp, -4] -.endm - -.macro PUSHAX aux - lr r9, [\aux] - PUSH r9 -.endm - -.macro POP reg - ld.ab \reg, [sp, 4] -.endm - -.macro POPAX aux - POP r9 - sr r9, [\aux] -.endm - -/*-------------------------------------------------------------- - * Helpers to save/restore Scratch Regs: - * used by Interrupt/Exception Prologue/Epilogue - *-------------------------------------------------------------*/ -.macro SAVE_R0_TO_R12 - PUSH r0 - PUSH r1 - PUSH r2 - PUSH r3 - PUSH r4 - PUSH r5 - PUSH r6 - PUSH r7 - PUSH r8 - PUSH r9 - PUSH r10 - PUSH r11 - PUSH r12 -.endm - -.macro RESTORE_R12_TO_R0 - POP r12 - POP r11 - POP r10 - POP r9 - POP r8 - POP r7 - POP r6 - POP r5 - POP r4 - POP r3 - POP r2 - POP r1 - POP r0 - -.endm - -/*-------------------------------------------------------------- - * Helpers to save/restore callee-saved regs: - * used by several macros below - *-------------------------------------------------------------*/ -.macro SAVE_R13_TO_R25 - PUSH r13 - PUSH r14 - PUSH r15 - PUSH r16 - PUSH r17 - PUSH r18 - PUSH r19 - PUSH r20 - PUSH r21 - PUSH r22 - PUSH r23 - PUSH r24 - PUSH r25 -.endm - -.macro RESTORE_R25_TO_R13 - POP r25 - POP r24 - POP r23 - POP r22 - POP r21 - POP r20 - POP r19 - POP r18 - POP r17 - POP r16 - POP r15 - POP r14 - POP r13 -.endm - /* * save user mode callee regs as struct callee_regs * - needed by fork/do_signal/unaligned-access-emulation. */ .macro SAVE_CALLEE_SAVED_USER - SAVE_R13_TO_R25 + SAVE_ABI_CALLEE_REGS .endm /* @@ -136,18 +34,18 @@ * - could have been changed by ptrace tracer or unaligned-access fixup */ .macro RESTORE_CALLEE_SAVED_USER - RESTORE_R25_TO_R13 + RESTORE_ABI_CALLEE_REGS .endm /* * save/restore kernel mode callee regs at the time of context switch */ .macro SAVE_CALLEE_SAVED_KERNEL - SAVE_R13_TO_R25 + SAVE_ABI_CALLEE_REGS .endm .macro RESTORE_CALLEE_SAVED_KERNEL - RESTORE_R25_TO_R13 + RESTORE_ABI_CALLEE_REGS .endm /*-------------------------------------------------------------- diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h index ef8d4166370c..8a2441670a8f 100644 --- a/arch/arc/include/asm/hugepage.h +++ b/arch/arc/include/asm/hugepage.h @@ -10,6 +10,13 @@ #include <linux/types.h> #include <asm-generic/pgtable-nopmd.h> +/* + * Hugetlb definitions. + */ +#define HPAGE_SHIFT PMD_SHIFT +#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) + static inline pte_t pmd_pte(pmd_t pmd) { return __pte(pmd_val(pmd)); diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index 4a2b30fb5a98..00b9318e551e 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -54,6 +54,10 @@ struct pt_regs { ecr_reg ecr; }; +struct callee_regs { + unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13; +}; + #define MAX_REG_OFFSET offsetof(struct pt_regs, ecr) #else @@ -92,16 +96,14 @@ struct pt_regs { unsigned long status32; }; -#define MAX_REG_OFFSET offsetof(struct pt_regs, status32) - -#endif - -/* Callee saved registers - need to be saved only when you are scheduled out */ - struct callee_regs { unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13; }; +#define MAX_REG_OFFSET offsetof(struct pt_regs, status32) + +#endif + #define instruction_pointer(regs) ((regs)->ret) #define profile_pc(regs) instruction_pointer(regs) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 4dcf8589b708..d08a5092c2b4 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -153,7 +153,7 @@ static int arcv2_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len) { int n = 0; #ifdef CONFIG_ISA_ARCV2 - const char *release, *cpu_nm, *isa_nm = "ARCv2"; + const char *release = "", *cpu_nm = "HS38", *isa_nm = "ARCv2"; int dual_issue = 0, dual_enb = 0, mpy_opt, present; int bpu_full, bpu_cache, bpu_pred, bpu_ret_stk; char mpy_nm[16], lpb_nm[32]; @@ -172,8 +172,6 @@ static int arcv2_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len) * releases only update it. */ - cpu_nm = "HS38"; - if (info->arcver > 0x50 && info->arcver <= 0x53) { release = arc_hs_rel[info->arcver - 0x51].str; } else { diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index 0b3bb529d246..8f6f4a542964 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -62,7 +62,7 @@ struct rt_sigframe { unsigned int sigret_magic; }; -static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) +static int save_arcv2_regs(struct sigcontext __user *mctx, struct pt_regs *regs) { int err = 0; #ifndef CONFIG_ISA_ARCOMPACT @@ -75,12 +75,12 @@ static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) #else v2abi.r58 = v2abi.r59 = 0; #endif - err = __copy_to_user(&mctx->v2abi, &v2abi, sizeof(v2abi)); + err = __copy_to_user(&mctx->v2abi, (void const *)&v2abi, sizeof(v2abi)); #endif return err; } -static int restore_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) +static int restore_arcv2_regs(struct sigcontext __user *mctx, struct pt_regs *regs) { int err = 0; #ifndef CONFIG_ISA_ARCOMPACT diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index f7e05c146637..9106ceac323c 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -145,10 +145,9 @@ dc_chk: p_dc->sz_k = 1 << (dbcr.sz - 1); n += scnprintf(buf + n, len - n, - "D-Cache\t\t: %uK, %dway/set, %uB Line, %s%s%s\n", + "D-Cache\t\t: %uK, %dway/set, %uB Line, %s%s\n", p_dc->sz_k, assoc, p_dc->line_len, vipt ? "VIPT" : "PIPT", - p_dc->colors > 1 ? " aliasing" : "", IS_USED_CFG(CONFIG_ARC_HAS_DCACHE)); slc_chk: @@ -703,51 +702,10 @@ static inline void arc_slc_enable(void) * Exported APIs */ -/* - * Handle cache congruency of kernel and userspace mappings of page when kernel - * writes-to/reads-from - * - * The idea is to defer flushing of kernel mapping after a WRITE, possible if: - * -dcache is NOT aliasing, hence any U/K-mappings of page are congruent - * -U-mapping doesn't exist yet for page (finalised in update_mmu_cache) - * -In SMP, if hardware caches are coherent - * - * There's a corollary case, where kernel READs from a userspace mapped page. - * If the U-mapping is not congruent to K-mapping, former needs flushing. - */ void flush_dcache_folio(struct folio *folio) { - struct address_space *mapping; - - if (!cache_is_vipt_aliasing()) { - clear_bit(PG_dc_clean, &folio->flags); - return; - } - - /* don't handle anon pages here */ - mapping = folio_flush_mapping(folio); - if (!mapping) - return; - - /* - * pagecache page, file not yet mapped to userspace - * Make a note that K-mapping is dirty - */ - if (!mapping_mapped(mapping)) { - clear_bit(PG_dc_clean, &folio->flags); - } else if (folio_mapped(folio)) { - /* kernel reading from page with U-mapping */ - phys_addr_t paddr = (unsigned long)folio_address(folio); - unsigned long vaddr = folio_pos(folio); - - /* - * vaddr is not actually the virtual address, but is - * congruent to every user mapping. - */ - if (addr_not_cache_congruent(paddr, vaddr)) - __flush_dcache_pages(paddr, vaddr, - folio_nr_pages(folio)); - } + clear_bit(PG_dc_clean, &folio->flags); + return; } EXPORT_SYMBOL(flush_dcache_folio); @@ -921,44 +879,6 @@ noinline void flush_cache_all(void) } -#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING - -void flush_cache_mm(struct mm_struct *mm) -{ - flush_cache_all(); -} - -void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr, - unsigned long pfn) -{ - phys_addr_t paddr = pfn << PAGE_SHIFT; - - u_vaddr &= PAGE_MASK; - - __flush_dcache_pages(paddr, u_vaddr, 1); - - if (vma->vm_flags & VM_EXEC) - __inv_icache_pages(paddr, u_vaddr, 1); -} - -void flush_cache_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - flush_cache_all(); -} - -void flush_anon_page(struct vm_area_struct *vma, struct page *page, - unsigned long u_vaddr) -{ - /* TBD: do we really need to clear the kernel mapping */ - __flush_dcache_pages((phys_addr_t)page_address(page), u_vaddr, 1); - __flush_dcache_pages((phys_addr_t)page_address(page), - (phys_addr_t)page_address(page), 1); - -} - -#endif - void copy_user_highpage(struct page *to, struct page *from, unsigned long u_vaddr, struct vm_area_struct *vma) { @@ -966,46 +886,11 @@ void copy_user_highpage(struct page *to, struct page *from, struct folio *dst = page_folio(to); void *kfrom = kmap_atomic(from); void *kto = kmap_atomic(to); - int clean_src_k_mappings = 0; - - /* - * If SRC page was already mapped in userspace AND it's U-mapping is - * not congruent with K-mapping, sync former to physical page so that - * K-mapping in memcpy below, sees the right data - * - * Note that while @u_vaddr refers to DST page's userspace vaddr, it is - * equally valid for SRC page as well - * - * For !VIPT cache, all of this gets compiled out as - * addr_not_cache_congruent() is 0 - */ - if (page_mapcount(from) && addr_not_cache_congruent(kfrom, u_vaddr)) { - __flush_dcache_pages((unsigned long)kfrom, u_vaddr, 1); - clean_src_k_mappings = 1; - } copy_page(kto, kfrom); - /* - * Mark DST page K-mapping as dirty for a later finalization by - * update_mmu_cache(). Although the finalization could have been done - * here as well (given that both vaddr/paddr are available). - * But update_mmu_cache() already has code to do that for other - * non copied user pages (e.g. read faults which wire in pagecache page - * directly). - */ clear_bit(PG_dc_clean, &dst->flags); - - /* - * if SRC was already usermapped and non-congruent to kernel mapping - * sync the kernel mapping back to physical page - */ - if (clean_src_k_mappings) { - __flush_dcache_pages((unsigned long)kfrom, - (unsigned long)kfrom, 1); - } else { - clear_bit(PG_dc_clean, &src->flags); - } + clear_bit(PG_dc_clean, &src->flags); kunmap_atomic(kto); kunmap_atomic(kfrom); @@ -1140,17 +1025,8 @@ static noinline void __init arc_cache_init_master(void) dc->line_len, L1_CACHE_BYTES); /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */ - if (is_isa_arcompact()) { - int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); - - if (dc->colors > 1) { - if (!handled) - panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); - if (CACHE_COLORS_NUM != dc->colors) - panic("CACHE_COLORS_NUM not optimized for config\n"); - } else if (handled && dc->colors == 1) { - panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); - } + if (is_isa_arcompact() && dc->colors > 1) { + panic("Aliasing VIPT cache not supported\n"); } } diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c index fce5fa2b4f52..3c1c7ae73292 100644 --- a/arch/arc/mm/mmap.c +++ b/arch/arc/mm/mmap.c @@ -14,10 +14,6 @@ #include <asm/cacheflush.h> -#define COLOUR_ALIGN(addr, pgoff) \ - ((((addr) + SHMLBA - 1) & ~(SHMLBA - 1)) + \ - (((pgoff) << PAGE_SHIFT) & (SHMLBA - 1))) - /* * Ensure that shared mappings are correctly aligned to * avoid aliasing issues with VIPT caches. @@ -31,21 +27,13 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - int do_align = 0; - int aliasing = cache_is_vipt_aliasing(); struct vm_unmapped_area_info info; /* - * We only need to do colour alignment if D cache aliases. - */ - if (aliasing) - do_align = filp || (flags & MAP_SHARED); - - /* * We enforce the MAP_FIXED case. */ if (flags & MAP_FIXED) { - if (aliasing && flags & MAP_SHARED && + if (flags & MAP_SHARED && (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)) return -EINVAL; return addr; @@ -55,10 +43,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, return -ENOMEM; if (addr) { - if (do_align) - addr = COLOUR_ALIGN(addr, pgoff); - else - addr = PAGE_ALIGN(addr); + addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && @@ -70,7 +55,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, info.length = len; info.low_limit = mm->mmap_base; info.high_limit = TASK_SIZE; - info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; + info.align_mask = 0; info.align_offset = pgoff << PAGE_SHIFT; return vm_unmapped_area(&info); } diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index e536b2dcd4b0..ad702b49aeb3 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -478,21 +478,15 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, create_tlb(vma, vaddr, ptep); - if (page == ZERO_PAGE(0)) { + if (page == ZERO_PAGE(0)) return; - } /* - * Exec page : Independent of aliasing/page-color considerations, - * since icache doesn't snoop dcache on ARC, any dirty - * K-mapping of a code page needs to be wback+inv so that - * icache fetch by userspace sees code correctly. - * !EXEC page: If K-mapping is NOT congruent to U-mapping, flush it - * so userspace sees the right data. - * (Avoids the flush for Non-exec + congruent mapping case) + * For executable pages, since icache doesn't snoop dcache, any + * dirty K-mapping of a code page needs to be wback+inv so that + * icache fetch by userspace sees code correctly. */ - if ((vma->vm_flags & VM_EXEC) || - addr_not_cache_congruent(paddr, vaddr)) { + if (vma->vm_flags & VM_EXEC) { struct folio *folio = page_folio(page); int dirty = !test_and_set_bit(PG_dc_clean, &folio->flags); if (dirty) { diff --git a/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts b/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts index 1ab8184302db..5a2869a18bd5 100644 --- a/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts +++ b/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts @@ -36,9 +36,7 @@ gpios = <&gpio 42 GPIO_ACTIVE_HIGH>; }; -&leds { - /delete-node/ led_act; -}; +/delete-node/ &led_act; &pm { /delete-property/ system-power-controller; diff --git a/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts b/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts index a3f247c722b4..0342a79ccd5d 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts +++ b/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts @@ -37,9 +37,9 @@ &clks { assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>, - <&clks IMX6QDL_CLK_LDB_DI1_SEL>; + <&clks IMX6QDL_CLK_LDB_DI1_SEL>, <&clks IMX6QDL_CLK_ENET_REF_SEL>; assigned-clock-parents = <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, - <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>; + <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, <&clk50m_phy>; }; &hdmi { diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi index 4ffe99ed55ca..07dcecbe485d 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi @@ -121,6 +121,8 @@ max-speed = <100>; interrupt-parent = <&gpio5>; interrupts = <6 IRQ_TYPE_LEVEL_LOW>; + clocks = <&clks IMX6UL_CLK_ENET_REF>; + clock-names = "rmii-ref"; }; }; }; diff --git a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi index 29b8fd03567a..5387da8a2a0a 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi @@ -454,7 +454,7 @@ }; gpt1: timer@302d0000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x302d0000 0x10000>; interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX7D_GPT1_ROOT_CLK>, @@ -463,7 +463,7 @@ }; gpt2: timer@302e0000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x302e0000 0x10000>; interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX7D_GPT2_ROOT_CLK>, @@ -473,7 +473,7 @@ }; gpt3: timer@302f0000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x302f0000 0x10000>; interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX7D_GPT3_ROOT_CLK>, @@ -483,7 +483,7 @@ }; gpt4: timer@30300000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x30300000 0x10000>; interrupts = <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX7D_GPT4_ROOT_CLK>, diff --git a/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts b/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts index a400c108f66a..6c5e6856648a 100644 --- a/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts +++ b/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts @@ -8,6 +8,7 @@ #include "imx28-lwe.dtsi" / { + model = "Liebherr XEA board"; compatible = "lwn,imx28-xea", "fsl,imx28"; }; diff --git a/arch/arm/boot/dts/rockchip/rk3128.dtsi b/arch/arm/boot/dts/rockchip/rk3128.dtsi index 7bf557c99561..01edf244ddee 100644 --- a/arch/arm/boot/dts/rockchip/rk3128.dtsi +++ b/arch/arm/boot/dts/rockchip/rk3128.dtsi @@ -848,7 +848,7 @@ }; sdmmc_pwren: sdmmc-pwren { - rockchip,pins = <1 RK_PB6 1 &pcfg_pull_default>; + rockchip,pins = <1 RK_PB6 RK_FUNC_GPIO &pcfg_pull_default>; }; sdmmc_bus4: sdmmc-bus4 { diff --git a/arch/arm/boot/dts/rockchip/rk322x.dtsi b/arch/arm/boot/dts/rockchip/rk322x.dtsi index ffc16d6b97e1..a721744cbfd1 100644 --- a/arch/arm/boot/dts/rockchip/rk322x.dtsi +++ b/arch/arm/boot/dts/rockchip/rk322x.dtsi @@ -215,9 +215,9 @@ power-domain@RK3228_PD_VOP { reg = <RK3228_PD_VOP>; - clocks =<&cru ACLK_VOP>, - <&cru DCLK_VOP>, - <&cru HCLK_VOP>; + clocks = <&cru ACLK_VOP>, + <&cru DCLK_VOP>, + <&cru HCLK_VOP>; pm_qos = <&qos_vop>; #power-domain-cells = <0>; }; diff --git a/arch/arm/boot/dts/ti/omap/am33xx.dtsi b/arch/arm/boot/dts/ti/omap/am33xx.dtsi index 1a2cd5baf402..5b9e01a8aa5d 100644 --- a/arch/arm/boot/dts/ti/omap/am33xx.dtsi +++ b/arch/arm/boot/dts/ti/omap/am33xx.dtsi @@ -359,6 +359,7 @@ <SYSC_IDLE_NO>, <SYSC_IDLE_SMART>, <SYSC_IDLE_SMART_WKUP>; + ti,sysc-delay-us = <2>; clocks = <&l3s_clkctrl AM3_L3S_USB_OTG_HS_CLKCTRL 0>; clock-names = "fck"; #address-cells = <1>; diff --git a/arch/arm/boot/dts/ti/omap/dra7.dtsi b/arch/arm/boot/dts/ti/omap/dra7.dtsi index 3f3e52e3b375..6509c742fb58 100644 --- a/arch/arm/boot/dts/ti/omap/dra7.dtsi +++ b/arch/arm/boot/dts/ti/omap/dra7.dtsi @@ -147,7 +147,7 @@ l3-noc@44000000 { compatible = "ti,dra7-l3-noc"; - reg = <0x44000000 0x1000>, + reg = <0x44000000 0x1000000>, <0x45000000 0x1000>; interrupts-extended = <&crossbar_mpu GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, <&wakeupgen GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm/include/asm/kexec.h b/arch/arm/include/asm/kexec.h index e62832dcba76..a8287e7ab9d4 100644 --- a/arch/arm/include/asm/kexec.h +++ b/arch/arm/include/asm/kexec.h @@ -2,8 +2,6 @@ #ifndef _ARM_KEXEC_H #define _ARM_KEXEC_H -#ifdef CONFIG_KEXEC - /* Maximum physical address we can use pages from */ #define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) /* Maximum address we can reach in physical address mode */ @@ -82,6 +80,4 @@ static inline struct page *boot_pfn_to_page(unsigned long boot_pfn) #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_KEXEC */ - #endif /* _ARM_KEXEC_H */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index d53f56d6f840..771264d4726a 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -59,7 +59,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += entry-ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o patch.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o patch.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o insn.o patch.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o # Main staffs in KPROBES are in arch/arm/probes/ . obj-$(CONFIG_KPROBES) += patch.o insn.o obj-$(CONFIG_OABI_COMPAT) += sys_oabi-compat.o diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index 2157493b78a9..df69af932375 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -501,6 +501,10 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "mmdc%d", ret); + if (!name) { + ret = -ENOMEM; + goto pmu_release_id; + } pmu_mmdc->mmdc_ipg_clk = mmdc_ipg_clk; pmu_mmdc->devtype_data = (struct fsl_mmdc_devtype_data *)of_id->data; @@ -523,9 +527,10 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b pmu_register_err: pr_warn("MMDC Perf PMU failed (%d), disabled\n", ret); - ida_simple_remove(&mmdc_ida, pmu_mmdc->id); cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node); hrtimer_cancel(&pmu_mmdc->hrtimer); +pmu_release_id: + ida_simple_remove(&mmdc_ida, pmu_mmdc->id); pmu_free: kfree(pmu_mmdc); return ret; diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index 98999aa8cc0c..7f387706368a 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -793,11 +793,16 @@ void __init omap_soc_device_init(void) soc_dev_attr->machine = soc_name; soc_dev_attr->family = omap_get_family(); + if (!soc_dev_attr->family) { + kfree(soc_dev_attr); + return; + } soc_dev_attr->revision = soc_rev; soc_dev_attr->custom_attr_group = omap_soc_groups[0]; soc_dev = soc_device_register(soc_dev_attr); if (IS_ERR(soc_dev)) { + kfree(soc_dev_attr->family); kfree(soc_dev_attr); return; } diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 9afdc4c4a5dc..a395b6c0aae2 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -484,7 +484,8 @@ static int __init xen_guest_init(void) * for secondary CPUs as they are brought up. * For uniformity we use VCPUOP_register_vcpu_info even on cpu0. */ - xen_vcpu_info = alloc_percpu(struct vcpu_info); + xen_vcpu_info = __alloc_percpu(sizeof(struct vcpu_info), + 1 << fls(sizeof(struct vcpu_info) - 1)); if (xen_vcpu_info == NULL) return -ENOMEM; diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 4bd85cc0d32b..9a2d3723cd0f 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -158,7 +158,7 @@ endif all: $(notdir $(KBUILD_IMAGE)) - +vmlinuz.efi: Image Image vmlinuz.efi: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi index 15290e6892fc..fc7315b94406 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi @@ -68,10 +68,7 @@ &emac0 { pinctrl-names = "default"; pinctrl-0 = <&ext_rgmii_pins>; - phy-mode = "rgmii"; phy-handle = <&ext_rgmii_phy>; - allwinner,rx-delay-ps = <3100>; - allwinner,tx-delay-ps = <700>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts index d83852e72f06..b5d713926a34 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts @@ -13,6 +13,9 @@ }; &emac0 { + allwinner,rx-delay-ps = <3100>; + allwinner,tx-delay-ps = <700>; + phy-mode = "rgmii"; phy-supply = <®_dcdce>; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts b/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts index 00fe28caac93..b3b1b8692125 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts @@ -13,6 +13,8 @@ }; &emac0 { + allwinner,tx-delay-ps = <700>; + phy-mode = "rgmii-rxid"; phy-supply = <®_dldo1>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi b/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi index 5ce5fbf2b38e..f69b0c17560a 100644 --- a/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi @@ -82,12 +82,9 @@ pinctrl-0 = <&pinctrl_wifi_pdn>; gpio = <&lsio_gpio1 28 GPIO_ACTIVE_HIGH>; enable-active-high; + regulator-always-on; regulator-name = "wifi_pwrdn_fake_regulator"; regulator-settling-time-us = <100>; - - regulator-state-mem { - regulator-off-in-suspend; - }; }; reg_pcie_switch: regulator-pcie-switch { diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi index ce66d30a4839..b0bb77150adc 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi @@ -149,7 +149,7 @@ dma_subsys: bus@5a000000 { clock-names = "ipg", "per"; assigned-clocks = <&clk IMX_SC_R_LCD_0_PWM_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; power-domains = <&pd IMX_SC_R_LCD_0_PWM_0>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi index 49ad3413db94..7e510b21bbac 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi @@ -29,7 +29,7 @@ lsio_subsys: bus@5d000000 { <&pwm0_lpcg 1>; assigned-clocks = <&clk IMX_SC_R_PWM_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>; status = "disabled"; }; @@ -42,7 +42,7 @@ lsio_subsys: bus@5d000000 { <&pwm1_lpcg 1>; assigned-clocks = <&clk IMX_SC_R_PWM_1 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; interrupts = <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>; status = "disabled"; }; @@ -55,7 +55,7 @@ lsio_subsys: bus@5d000000 { <&pwm2_lpcg 1>; assigned-clocks = <&clk IMX_SC_R_PWM_2 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>; status = "disabled"; }; @@ -68,7 +68,7 @@ lsio_subsys: bus@5d000000 { <&pwm3_lpcg 1>; assigned-clocks = <&clk IMX_SC_R_PWM_3 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index c9a610ba4836..1264da6012f9 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -2072,6 +2072,7 @@ phys = <&usb3_phy0>, <&usb3_phy0>; phy-names = "usb2-phy", "usb3-phy"; snps,gfladj-refclk-lpm-sel-quirk; + snps,parkmode-disable-ss-quirk; }; }; @@ -2114,6 +2115,7 @@ phys = <&usb3_phy1>, <&usb3_phy1>; phy-names = "usb2-phy", "usb3-phy"; snps,gfladj-refclk-lpm-sel-quirk; + snps,parkmode-disable-ss-quirk; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 4b1ce9fc1758..c6dc3ba0d43b 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -1649,6 +1649,7 @@ phys = <&usb3_phy0>, <&usb3_phy0>; phy-names = "usb2-phy", "usb3-phy"; power-domains = <&pgc_otg1>; + snps,parkmode-disable-ss-quirk; status = "disabled"; }; @@ -1680,6 +1681,7 @@ phys = <&usb3_phy1>, <&usb3_phy1>; phy-names = "usb2-phy", "usb3-phy"; power-domains = <&pgc_otg2>; + snps,parkmode-disable-ss-quirk; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi index 01539df335f8..8439dd6b3935 100644 --- a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi @@ -96,6 +96,17 @@ status = "okay"; }; +&edma3 { + power-domains = <&pd IMX_SC_R_DMA_1_CH0>, + <&pd IMX_SC_R_DMA_1_CH1>, + <&pd IMX_SC_R_DMA_1_CH2>, + <&pd IMX_SC_R_DMA_1_CH3>, + <&pd IMX_SC_R_DMA_1_CH4>, + <&pd IMX_SC_R_DMA_1_CH5>, + <&pd IMX_SC_R_DMA_1_CH6>, + <&pd IMX_SC_R_DMA_1_CH7>; +}; + &flexcan1 { fsl,clk-source = /bits/ 8 <1>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi index f22c1ac391c9..c4a0082f30d3 100644 --- a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi @@ -483,7 +483,7 @@ }; }; - gpioe: gpio@2d000080 { + gpioe: gpio@2d000000 { compatible = "fsl,imx8ulp-gpio"; reg = <0x2d000000 0x1000>; gpio-controller; @@ -498,7 +498,7 @@ gpio-ranges = <&iomuxc1 0 32 24>; }; - gpiof: gpio@2d010080 { + gpiof: gpio@2d010000 { compatible = "fsl,imx8ulp-gpio"; reg = <0x2d010000 0x1000>; gpio-controller; @@ -534,7 +534,7 @@ }; }; - gpiod: gpio@2e200080 { + gpiod: gpio@2e200000 { compatible = "fsl,imx8ulp-gpio"; reg = <0x2e200000 0x1000>; gpio-controller; diff --git a/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts b/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts index f06139bdff97..3c5c67ebee5d 100644 --- a/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts +++ b/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts @@ -577,7 +577,7 @@ fsl,pins = < MX93_PAD_UART2_TXD__LPUART2_TX 0x31e MX93_PAD_UART2_RXD__LPUART2_RX 0x31e - MX93_PAD_SAI1_TXD0__LPUART2_RTS_B 0x31e + MX93_PAD_SAI1_TXD0__LPUART2_RTS_B 0x51e >; }; diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index ceccf4766440..34c0540276d1 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -417,7 +417,7 @@ compatible = "fsl,imx93-src-slice"; reg = <0x44462400 0x400>, <0x44465800 0x400>; #power-domain-cells = <0>; - clocks = <&clk IMX93_CLK_MEDIA_AXI>, + clocks = <&clk IMX93_CLK_NIC_MEDIA_GATE>, <&clk IMX93_CLK_MEDIA_APB>; }; }; @@ -957,7 +957,7 @@ }; }; - gpio2: gpio@43810080 { + gpio2: gpio@43810000 { compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio"; reg = <0x43810000 0x1000>; gpio-controller; @@ -972,7 +972,7 @@ gpio-ranges = <&iomuxc 0 4 30>; }; - gpio3: gpio@43820080 { + gpio3: gpio@43820000 { compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio"; reg = <0x43820000 0x1000>; gpio-controller; @@ -988,7 +988,7 @@ <&iomuxc 26 34 2>, <&iomuxc 28 0 4>; }; - gpio4: gpio@43830080 { + gpio4: gpio@43830000 { compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio"; reg = <0x43830000 0x1000>; gpio-controller; @@ -1003,7 +1003,7 @@ gpio-ranges = <&iomuxc 0 38 28>, <&iomuxc 28 36 2>; }; - gpio1: gpio@47400080 { + gpio1: gpio@47400000 { compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio"; reg = <0x47400000 0x1000>; gpio-controller; diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts index 3b7a176b7904..c46682150e50 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts @@ -73,7 +73,7 @@ }; }; - memory { + memory@40000000 { reg = <0 0x40000000 0 0x40000000>; }; diff --git a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts index a885a3fbe456..2dc1bdc74e21 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts @@ -55,7 +55,7 @@ }; }; - memory { + memory@40000000 { reg = <0 0x40000000 0 0x20000000>; }; diff --git a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts index af4a4309bda4..b876e501216b 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts +++ b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts @@ -126,6 +126,7 @@ compatible = "sff,sfp"; i2c-bus = <&i2c_sfp1>; los-gpios = <&pio 46 GPIO_ACTIVE_HIGH>; + maximum-power-milliwatt = <3000>; mod-def0-gpios = <&pio 49 GPIO_ACTIVE_LOW>; tx-disable-gpios = <&pio 20 GPIO_ACTIVE_HIGH>; tx-fault-gpios = <&pio 7 GPIO_ACTIVE_HIGH>; @@ -137,6 +138,7 @@ i2c-bus = <&i2c_sfp2>; los-gpios = <&pio 31 GPIO_ACTIVE_HIGH>; mod-def0-gpios = <&pio 47 GPIO_ACTIVE_LOW>; + maximum-power-milliwatt = <3000>; tx-disable-gpios = <&pio 15 GPIO_ACTIVE_HIGH>; tx-fault-gpios = <&pio 48 GPIO_ACTIVE_HIGH>; }; @@ -150,16 +152,16 @@ trip = <&cpu_trip_active_high>; }; - cpu-active-low { + cpu-active-med { /* active: set fan to cooling level 1 */ cooling-device = <&fan 1 1>; - trip = <&cpu_trip_active_low>; + trip = <&cpu_trip_active_med>; }; - cpu-passive { - /* passive: set fan to cooling level 0 */ + cpu-active-low { + /* active: set fan to cooling level 0 */ cooling-device = <&fan 0 0>; - trip = <&cpu_trip_passive>; + trip = <&cpu_trip_active_low>; }; }; }; diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index 24eda00e320d..fc751e049953 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -374,6 +374,10 @@ reg = <0 0x11230000 0 0x1000>, <0 0x11c20000 0 0x1000>; interrupts = <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>; + assigned-clocks = <&topckgen CLK_TOP_EMMC_416M_SEL>, + <&topckgen CLK_TOP_EMMC_250M_SEL>; + assigned-clock-parents = <&apmixedsys CLK_APMIXED_MPLL>, + <&topckgen CLK_TOP_NET1PLL_D5_D2>; clocks = <&topckgen CLK_TOP_EMMC_416M_SEL>, <&infracfg CLK_INFRA_MSDC_HCK_CK>, <&infracfg CLK_INFRA_MSDC_CK>, @@ -610,22 +614,34 @@ thermal-sensors = <&thermal 0>; trips { + cpu_trip_crit: crit { + temperature = <125000>; + hysteresis = <2000>; + type = "critical"; + }; + + cpu_trip_hot: hot { + temperature = <120000>; + hysteresis = <2000>; + type = "hot"; + }; + cpu_trip_active_high: active-high { temperature = <115000>; hysteresis = <2000>; type = "active"; }; - cpu_trip_active_low: active-low { + cpu_trip_active_med: active-med { temperature = <85000>; hysteresis = <2000>; type = "active"; }; - cpu_trip_passive: passive { - temperature = <40000>; + cpu_trip_active_low: active-low { + temperature = <60000>; hysteresis = <2000>; - type = "passive"; + type = "active"; }; }; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts index 5122963d8743..d258c80213b2 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts @@ -44,7 +44,7 @@ id-gpio = <&pio 16 GPIO_ACTIVE_HIGH>; }; - usb_p1_vbus: regulator@0 { + usb_p1_vbus: regulator-usb-p1 { compatible = "regulator-fixed"; regulator-name = "usb_vbus"; regulator-min-microvolt = <5000000>; @@ -53,7 +53,7 @@ enable-active-high; }; - usb_p0_vbus: regulator@1 { + usb_p0_vbus: regulator-usb-p0 { compatible = "regulator-fixed"; regulator-name = "vbus"; regulator-min-microvolt = <5000000>; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts index ce336a48c897..77f9ab94c00b 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts @@ -31,14 +31,14 @@ #address-cells = <2>; #size-cells = <2>; ranges; - scp_mem_reserved: scp_mem_region { + scp_mem_reserved: memory@50000000 { compatible = "shared-dma-pool"; reg = <0 0x50000000 0 0x2900000>; no-map; }; }; - ntc@0 { + thermal-sensor { compatible = "murata,ncp03wf104"; pullup-uv = <1800000>; pullup-ohm = <390000>; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi index bf97b60ae4d1..820260348de9 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi @@ -91,6 +91,8 @@ &dsi0 { status = "okay"; + /delete-property/#size-cells; + /delete-property/#address-cells; /delete-node/panel@0; ports { port { @@ -441,20 +443,20 @@ }; touchscreen_pins: touchscreen-pins { - touch_int_odl { + touch-int-odl { pinmux = <PINMUX_GPIO155__FUNC_GPIO155>; input-enable; bias-pull-up; }; - touch_rst_l { + touch-rst-l { pinmux = <PINMUX_GPIO156__FUNC_GPIO156>; output-high; }; }; trackpad_pins: trackpad-pins { - trackpad_int { + trackpad-int { pinmux = <PINMUX_GPIO7__FUNC_GPIO7>; input-enable; bias-disable; /* pulled externally */ diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index bf7de35ffcbc..7881a27be029 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -116,7 +116,7 @@ #size-cells = <2>; ranges; - scp_mem_reserved: scp_mem_region { + scp_mem_reserved: memory@50000000 { compatible = "shared-dma-pool"; reg = <0 0x50000000 0 0x2900000>; no-map; @@ -460,7 +460,7 @@ &pio { aud_pins_default: audiopins { - pins_bus { + pins-bus { pinmux = <PINMUX_GPIO97__FUNC_I2S2_MCK>, <PINMUX_GPIO98__FUNC_I2S2_BCK>, <PINMUX_GPIO101__FUNC_I2S2_LRCK>, @@ -482,7 +482,7 @@ }; aud_pins_tdm_out_on: audiotdmouton { - pins_bus { + pins-bus { pinmux = <PINMUX_GPIO169__FUNC_TDM_BCK_2ND>, <PINMUX_GPIO170__FUNC_TDM_LRCK_2ND>, <PINMUX_GPIO171__FUNC_TDM_DATA0_2ND>, @@ -494,7 +494,7 @@ }; aud_pins_tdm_out_off: audiotdmoutoff { - pins_bus { + pins-bus { pinmux = <PINMUX_GPIO169__FUNC_GPIO169>, <PINMUX_GPIO170__FUNC_GPIO170>, <PINMUX_GPIO171__FUNC_GPIO171>, @@ -508,13 +508,13 @@ }; bt_pins: bt-pins { - pins_bt_en { + pins-bt-en { pinmux = <PINMUX_GPIO120__FUNC_GPIO120>; output-low; }; }; - ec_ap_int_odl: ec_ap_int_odl { + ec_ap_int_odl: ec-ap-int-odl { pins1 { pinmux = <PINMUX_GPIO151__FUNC_GPIO151>; input-enable; @@ -522,7 +522,7 @@ }; }; - h1_int_od_l: h1_int_od_l { + h1_int_od_l: h1-int-od-l { pins1 { pinmux = <PINMUX_GPIO153__FUNC_GPIO153>; input-enable; @@ -530,7 +530,7 @@ }; i2c0_pins: i2c0 { - pins_bus { + pins-bus { pinmux = <PINMUX_GPIO82__FUNC_SDA0>, <PINMUX_GPIO83__FUNC_SCL0>; mediatek,pull-up-adv = <3>; @@ -539,7 +539,7 @@ }; i2c1_pins: i2c1 { - pins_bus { + pins-bus { pinmux = <PINMUX_GPIO81__FUNC_SDA1>, <PINMUX_GPIO84__FUNC_SCL1>; mediatek,pull-up-adv = <3>; @@ -548,7 +548,7 @@ }; i2c2_pins: i2c2 { - pins_bus { + pins-bus { pinmux = <PINMUX_GPIO103__FUNC_SCL2>, <PINMUX_GPIO104__FUNC_SDA2>; bias-disable; @@ -557,7 +557,7 @@ }; i2c3_pins: i2c3 { - pins_bus { + pins-bus { pinmux = <PINMUX_GPIO50__FUNC_SCL3>, <PINMUX_GPIO51__FUNC_SDA3>; mediatek,pull-up-adv = <3>; @@ -566,7 +566,7 @@ }; i2c4_pins: i2c4 { - pins_bus { + pins-bus { pinmux = <PINMUX_GPIO105__FUNC_SCL4>, <PINMUX_GPIO106__FUNC_SDA4>; bias-disable; @@ -575,7 +575,7 @@ }; i2c5_pins: i2c5 { - pins_bus { + pins-bus { pinmux = <PINMUX_GPIO48__FUNC_SCL5>, <PINMUX_GPIO49__FUNC_SDA5>; mediatek,pull-up-adv = <3>; @@ -584,7 +584,7 @@ }; i2c6_pins: i2c6 { - pins_bus { + pins-bus { pinmux = <PINMUX_GPIO11__FUNC_SCL6>, <PINMUX_GPIO12__FUNC_SDA6>; bias-disable; @@ -592,7 +592,7 @@ }; mmc0_pins_default: mmc0-pins-default { - pins_cmd_dat { + pins-cmd-dat { pinmux = <PINMUX_GPIO123__FUNC_MSDC0_DAT0>, <PINMUX_GPIO128__FUNC_MSDC0_DAT1>, <PINMUX_GPIO125__FUNC_MSDC0_DAT2>, @@ -607,13 +607,13 @@ mediatek,pull-up-adv = <01>; }; - pins_clk { + pins-clk { pinmux = <PINMUX_GPIO124__FUNC_MSDC0_CLK>; drive-strength = <MTK_DRIVE_14mA>; mediatek,pull-down-adv = <10>; }; - pins_rst { + pins-rst { pinmux = <PINMUX_GPIO133__FUNC_MSDC0_RSTB>; drive-strength = <MTK_DRIVE_14mA>; mediatek,pull-down-adv = <01>; @@ -621,7 +621,7 @@ }; mmc0_pins_uhs: mmc0-pins-uhs { - pins_cmd_dat { + pins-cmd-dat { pinmux = <PINMUX_GPIO123__FUNC_MSDC0_DAT0>, <PINMUX_GPIO128__FUNC_MSDC0_DAT1>, <PINMUX_GPIO125__FUNC_MSDC0_DAT2>, @@ -636,19 +636,19 @@ mediatek,pull-up-adv = <01>; }; - pins_clk { + pins-clk { pinmux = <PINMUX_GPIO124__FUNC_MSDC0_CLK>; drive-strength = <MTK_DRIVE_14mA>; mediatek,pull-down-adv = <10>; }; - pins_ds { + pins-ds { pinmux = <PINMUX_GPIO131__FUNC_MSDC0_DSL>; drive-strength = <MTK_DRIVE_14mA>; mediatek,pull-down-adv = <10>; }; - pins_rst { + pins-rst { pinmux = <PINMUX_GPIO133__FUNC_MSDC0_RSTB>; drive-strength = <MTK_DRIVE_14mA>; mediatek,pull-up-adv = <01>; @@ -656,7 +656,7 @@ }; mmc1_pins_default: mmc1-pins-default { - pins_cmd_dat { + pins-cmd-dat { pinmux = <PINMUX_GPIO31__FUNC_MSDC1_CMD>, <PINMUX_GPIO32__FUNC_MSDC1_DAT0>, <PINMUX_GPIO34__FUNC_MSDC1_DAT1>, @@ -666,7 +666,7 @@ mediatek,pull-up-adv = <10>; }; - pins_clk { + pins-clk { pinmux = <PINMUX_GPIO29__FUNC_MSDC1_CLK>; input-enable; mediatek,pull-down-adv = <10>; @@ -674,7 +674,7 @@ }; mmc1_pins_uhs: mmc1-pins-uhs { - pins_cmd_dat { + pins-cmd-dat { pinmux = <PINMUX_GPIO31__FUNC_MSDC1_CMD>, <PINMUX_GPIO32__FUNC_MSDC1_DAT0>, <PINMUX_GPIO34__FUNC_MSDC1_DAT1>, @@ -685,7 +685,7 @@ mediatek,pull-up-adv = <10>; }; - pins_clk { + pins-clk { pinmux = <PINMUX_GPIO29__FUNC_MSDC1_CLK>; drive-strength = <MTK_DRIVE_8mA>; mediatek,pull-down-adv = <10>; @@ -693,15 +693,15 @@ }; }; - panel_pins_default: panel_pins_default { - panel_reset { + panel_pins_default: panel-pins-default { + panel-reset { pinmux = <PINMUX_GPIO45__FUNC_GPIO45>; output-low; bias-pull-up; }; }; - pwm0_pin_default: pwm0_pin_default { + pwm0_pin_default: pwm0-pin-default { pins1 { pinmux = <PINMUX_GPIO176__FUNC_GPIO176>; output-high; @@ -713,14 +713,14 @@ }; scp_pins: scp { - pins_scp_uart { + pins-scp-uart { pinmux = <PINMUX_GPIO110__FUNC_TP_URXD1_AO>, <PINMUX_GPIO112__FUNC_TP_UTXD1_AO>; }; }; spi0_pins: spi0 { - pins_spi { + pins-spi { pinmux = <PINMUX_GPIO85__FUNC_SPI0_MI>, <PINMUX_GPIO86__FUNC_GPIO86>, <PINMUX_GPIO87__FUNC_SPI0_MO>, @@ -730,7 +730,7 @@ }; spi1_pins: spi1 { - pins_spi { + pins-spi { pinmux = <PINMUX_GPIO161__FUNC_SPI1_A_MI>, <PINMUX_GPIO162__FUNC_SPI1_A_CSB>, <PINMUX_GPIO163__FUNC_SPI1_A_MO>, @@ -740,20 +740,20 @@ }; spi2_pins: spi2 { - pins_spi { + pins-spi { pinmux = <PINMUX_GPIO0__FUNC_SPI2_CSB>, <PINMUX_GPIO1__FUNC_SPI2_MO>, <PINMUX_GPIO2__FUNC_SPI2_CLK>; bias-disable; }; - pins_spi_mi { + pins-spi-mi { pinmux = <PINMUX_GPIO94__FUNC_SPI2_MI>; mediatek,pull-down-adv = <00>; }; }; spi3_pins: spi3 { - pins_spi { + pins-spi { pinmux = <PINMUX_GPIO21__FUNC_SPI3_MI>, <PINMUX_GPIO22__FUNC_SPI3_CSB>, <PINMUX_GPIO23__FUNC_SPI3_MO>, @@ -763,7 +763,7 @@ }; spi4_pins: spi4 { - pins_spi { + pins-spi { pinmux = <PINMUX_GPIO17__FUNC_SPI4_MI>, <PINMUX_GPIO18__FUNC_SPI4_CSB>, <PINMUX_GPIO19__FUNC_SPI4_MO>, @@ -773,7 +773,7 @@ }; spi5_pins: spi5 { - pins_spi { + pins-spi { pinmux = <PINMUX_GPIO13__FUNC_SPI5_MI>, <PINMUX_GPIO14__FUNC_SPI5_CSB>, <PINMUX_GPIO15__FUNC_SPI5_MO>, @@ -783,63 +783,63 @@ }; uart0_pins_default: uart0-pins-default { - pins_rx { + pins-rx { pinmux = <PINMUX_GPIO95__FUNC_URXD0>; input-enable; bias-pull-up; }; - pins_tx { + pins-tx { pinmux = <PINMUX_GPIO96__FUNC_UTXD0>; }; }; uart1_pins_default: uart1-pins-default { - pins_rx { + pins-rx { pinmux = <PINMUX_GPIO121__FUNC_URXD1>; input-enable; bias-pull-up; }; - pins_tx { + pins-tx { pinmux = <PINMUX_GPIO115__FUNC_UTXD1>; }; - pins_rts { + pins-rts { pinmux = <PINMUX_GPIO47__FUNC_URTS1>; output-enable; }; - pins_cts { + pins-cts { pinmux = <PINMUX_GPIO46__FUNC_UCTS1>; input-enable; }; }; uart1_pins_sleep: uart1-pins-sleep { - pins_rx { + pins-rx { pinmux = <PINMUX_GPIO121__FUNC_GPIO121>; input-enable; bias-pull-up; }; - pins_tx { + pins-tx { pinmux = <PINMUX_GPIO115__FUNC_UTXD1>; }; - pins_rts { + pins-rts { pinmux = <PINMUX_GPIO47__FUNC_URTS1>; output-enable; }; - pins_cts { + pins-cts { pinmux = <PINMUX_GPIO46__FUNC_UCTS1>; input-enable; }; }; wifi_pins_pwrseq: wifi-pins-pwrseq { - pins_wifi_enable { + pins-wifi-enable { pinmux = <PINMUX_GPIO119__FUNC_GPIO119>; output-low; }; }; wifi_pins_wakeup: wifi-pins-wakeup { - pins_wifi_wakeup { + pins-wifi-wakeup { pinmux = <PINMUX_GPIO113__FUNC_GPIO113>; input-enable; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi index 5169779d01df..976dc968b3ca 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi @@ -1210,127 +1210,6 @@ nvmem-cell-names = "calibration-data"; }; - thermal_zones: thermal-zones { - cpu_thermal: cpu-thermal { - polling-delay-passive = <100>; - polling-delay = <500>; - thermal-sensors = <&thermal 0>; - sustainable-power = <5000>; - - trips { - threshold: trip-point0 { - temperature = <68000>; - hysteresis = <2000>; - type = "passive"; - }; - - target: trip-point1 { - temperature = <80000>; - hysteresis = <2000>; - type = "passive"; - }; - - cpu_crit: cpu-crit { - temperature = <115000>; - hysteresis = <2000>; - type = "critical"; - }; - }; - - cooling-maps { - map0 { - trip = <&target>; - cooling-device = <&cpu0 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu1 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu2 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu3 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>; - contribution = <3072>; - }; - map1 { - trip = <&target>; - cooling-device = <&cpu4 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu5 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu6 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu7 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>; - contribution = <1024>; - }; - }; - }; - - /* The tzts1 ~ tzts6 don't need to polling */ - /* The tzts1 ~ tzts6 don't need to thermal throttle */ - - tzts1: tzts1 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 1>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tzts2: tzts2 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 2>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tzts3: tzts3 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 3>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tzts4: tzts4 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 4>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tzts5: tzts5 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 5>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tztsABB: tztsABB { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 6>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - }; - pwm0: pwm@1100e000 { compatible = "mediatek,mt8183-disp-pwm"; reg = <0 0x1100e000 0 0x1000>; @@ -2105,4 +1984,125 @@ power-domains = <&spm MT8183_POWER_DOMAIN_CAM>; }; }; + + thermal_zones: thermal-zones { + cpu_thermal: cpu-thermal { + polling-delay-passive = <100>; + polling-delay = <500>; + thermal-sensors = <&thermal 0>; + sustainable-power = <5000>; + + trips { + threshold: trip-point0 { + temperature = <68000>; + hysteresis = <2000>; + type = "passive"; + }; + + target: trip-point1 { + temperature = <80000>; + hysteresis = <2000>; + type = "passive"; + }; + + cpu_crit: cpu-crit { + temperature = <115000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + + cooling-maps { + map0 { + trip = <&target>; + cooling-device = <&cpu0 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu1 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu2 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu3 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + contribution = <3072>; + }; + map1 { + trip = <&target>; + cooling-device = <&cpu4 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu5 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu6 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu7 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + contribution = <1024>; + }; + }; + }; + + /* The tzts1 ~ tzts6 don't need to polling */ + /* The tzts1 ~ tzts6 don't need to thermal throttle */ + + tzts1: tzts1 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 1>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tzts2: tzts2 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 2>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tzts3: tzts3 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 3>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tzts4: tzts4 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 4>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tzts5: tzts5 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 5>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tztsABB: tztsABB { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 6>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + }; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8186.dtsi b/arch/arm64/boot/dts/mediatek/mt8186.dtsi index f04ae70c470a..df0c04f2ba1d 100644 --- a/arch/arm64/boot/dts/mediatek/mt8186.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8186.dtsi @@ -924,7 +924,8 @@ reg = <MT8186_POWER_DOMAIN_CSIRX_TOP>; clocks = <&topckgen CLK_TOP_SENINF>, <&topckgen CLK_TOP_SENINF1>; - clock-names = "csirx_top0", "csirx_top1"; + clock-names = "subsys-csirx-top0", + "subsys-csirx-top1"; #power-domain-cells = <0>; }; @@ -942,7 +943,8 @@ reg = <MT8186_POWER_DOMAIN_ADSP_AO>; clocks = <&topckgen CLK_TOP_AUDIODSP>, <&topckgen CLK_TOP_ADSP_BUS>; - clock-names = "audioadsp", "adsp_bus"; + clock-names = "audioadsp", + "subsys-adsp-bus"; #address-cells = <1>; #size-cells = <0>; #power-domain-cells = <1>; @@ -975,8 +977,11 @@ <&mmsys CLK_MM_SMI_COMMON>, <&mmsys CLK_MM_SMI_GALS>, <&mmsys CLK_MM_SMI_IOMMU>; - clock-names = "disp", "mdp", "smi_infra", "smi_common", - "smi_gals", "smi_iommu"; + clock-names = "disp", "mdp", + "subsys-smi-infra", + "subsys-smi-common", + "subsys-smi-gals", + "subsys-smi-iommu"; mediatek,infracfg = <&infracfg_ao>; #address-cells = <1>; #size-cells = <0>; @@ -993,15 +998,17 @@ power-domain@MT8186_POWER_DOMAIN_CAM { reg = <MT8186_POWER_DOMAIN_CAM>; - clocks = <&topckgen CLK_TOP_CAM>, - <&topckgen CLK_TOP_SENINF>, + clocks = <&topckgen CLK_TOP_SENINF>, <&topckgen CLK_TOP_SENINF1>, <&topckgen CLK_TOP_SENINF2>, <&topckgen CLK_TOP_SENINF3>, + <&camsys CLK_CAM2MM_GALS>, <&topckgen CLK_TOP_CAMTM>, - <&camsys CLK_CAM2MM_GALS>; - clock-names = "cam-top", "cam0", "cam1", "cam2", - "cam3", "cam-tm", "gals"; + <&topckgen CLK_TOP_CAM>; + clock-names = "cam0", "cam1", "cam2", + "cam3", "gals", + "subsys-cam-tm", + "subsys-cam-top"; mediatek,infracfg = <&infracfg_ao>; #address-cells = <1>; #size-cells = <0>; @@ -1020,9 +1027,9 @@ power-domain@MT8186_POWER_DOMAIN_IMG { reg = <MT8186_POWER_DOMAIN_IMG>; - clocks = <&topckgen CLK_TOP_IMG1>, - <&imgsys1 CLK_IMG1_GALS_IMG1>; - clock-names = "img-top", "gals"; + clocks = <&imgsys1 CLK_IMG1_GALS_IMG1>, + <&topckgen CLK_TOP_IMG1>; + clock-names = "gals", "subsys-img-top"; mediatek,infracfg = <&infracfg_ao>; #address-cells = <1>; #size-cells = <0>; @@ -1041,8 +1048,11 @@ <&ipesys CLK_IPE_LARB20>, <&ipesys CLK_IPE_SMI_SUBCOM>, <&ipesys CLK_IPE_GALS_IPE>; - clock-names = "ipe-top", "ipe-larb0", "ipe-larb1", - "ipe-smi", "ipe-gals"; + clock-names = "subsys-ipe-top", + "subsys-ipe-larb0", + "subsys-ipe-larb1", + "subsys-ipe-smi", + "subsys-ipe-gals"; mediatek,infracfg = <&infracfg_ao>; #power-domain-cells = <0>; }; @@ -1061,7 +1071,9 @@ clocks = <&topckgen CLK_TOP_WPE>, <&wpesys CLK_WPE_SMI_LARB8_CK_EN>, <&wpesys CLK_WPE_SMI_LARB8_PCLK_EN>; - clock-names = "wpe0", "larb-ck", "larb-pclk"; + clock-names = "wpe0", + "subsys-larb-ck", + "subsys-larb-pclk"; mediatek,infracfg = <&infracfg_ao>; #power-domain-cells = <0>; }; @@ -1656,7 +1668,7 @@ #address-cells = <1>; #size-cells = <1>; - gpu_speedbin: gpu-speed-bin@59c { + gpu_speedbin: gpu-speedbin@59c { reg = <0x59c 0x4>; bits = <0 3>; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi index dd5b89b73190..5a7cab489ff3 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi @@ -389,7 +389,7 @@ pinctrl-0 = <&i2c7_pins>; pmic@34 { - #interrupt-cells = <1>; + #interrupt-cells = <2>; compatible = "mediatek,mt6360"; reg = <0x34>; interrupt-controller; diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index 54c674c45b49..e0ac2e9f5b72 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -627,6 +627,8 @@ power-domain@MT8195_POWER_DOMAIN_VENC_CORE1 { reg = <MT8195_POWER_DOMAIN_VENC_CORE1>; + clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>; + clock-names = "venc1-larb"; mediatek,infracfg = <&infracfg_ao>; #power-domain-cells = <0>; }; @@ -689,6 +691,8 @@ power-domain@MT8195_POWER_DOMAIN_VENC { reg = <MT8195_POWER_DOMAIN_VENC>; + clocks = <&vencsys CLK_VENC_LARB>; + clock-names = "venc0-larb"; mediatek,infracfg = <&infracfg_ao>; #power-domain-cells = <0>; }; @@ -2665,7 +2669,7 @@ reg = <0 0x1b010000 0 0x1000>; mediatek,larb-id = <20>; mediatek,smi = <&smi_common_vpp>; - clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>, + clocks = <&vencsys_core1 CLK_VENC_CORE1_VENC>, <&vencsys_core1 CLK_VENC_CORE1_GALS>, <&vppsys0 CLK_VPP0_GALS_VDO0_VDO1_VENCSYS_CORE1>; clock-names = "apb", "smi", "gals"; diff --git a/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts b/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts index 70b465f7c6a7..00ac59a873e8 100644 --- a/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts +++ b/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts @@ -238,6 +238,7 @@ mt6360: pmic@34 { compatible = "mediatek,mt6360"; reg = <0x34>; + interrupt-parent = <&pio>; interrupts = <128 IRQ_TYPE_EDGE_FALLING>; interrupt-names = "IRQB"; interrupt-controller; diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts index de0a1f2af983..7d4c5324c61b 100644 --- a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts @@ -86,7 +86,7 @@ sgtl5000_clk: sgtl5000-oscillator { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <24576000>; + clock-frequency = <24576000>; }; dc_12v: dc-12v-regulator { diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index e729e7a22b23..cc8209795c3e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -668,7 +668,7 @@ vdec: video-codec@ff360000 { compatible = "rockchip,rk3328-vdec", "rockchip,rk3399-vdec"; - reg = <0x0 0xff360000 0x0 0x400>; + reg = <0x0 0xff360000 0x0 0x480>; interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru ACLK_RKVDEC>, <&cru HCLK_RKVDEC>, <&cru SCLK_VDEC_CABAC>, <&cru SCLK_VDEC_CORE>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi index 5c1929d41cc0..cacbad35cfc8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi @@ -509,8 +509,7 @@ ap_i2c_tp: &i2c5 { &pci_rootport { mvl_wifi: wifi@0,0 { compatible = "pci1b4b,2b42"; - reg = <0x83010000 0x0 0x00000000 0x0 0x00100000 - 0x83010000 0x0 0x00100000 0x0 0x00100000>; + reg = <0x0000 0x0 0x0 0x0 0x0>; interrupt-parent = <&gpio0>; interrupts = <8 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts index 853e88455e75..9e4b12ed62cb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts @@ -34,8 +34,8 @@ &pci_rootport { wifi@0,0 { compatible = "qcom,ath10k"; - reg = <0x00010000 0x0 0x00000000 0x0 0x00000000>, - <0x03010010 0x0 0x00000000 0x0 0x00200000>; + reg = <0x00000000 0x0 0x00000000 0x0 0x00000000>, + <0x03000010 0x0 0x00000000 0x0 0x00200000>; qcom,ath10k-calibration-variant = "GO_DUMO"; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi index c9bf1d5c3a42..789fd0dcc88b 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi @@ -489,6 +489,7 @@ ap_i2c_audio: &i2c8 { #address-cells = <3>; #size-cells = <2>; ranges; + device_type = "pci"; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index faf02e59d6c7..da0dfb237f85 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1109,7 +1109,9 @@ power-domain@RK3399_PD_VDU { reg = <RK3399_PD_VDU>; clocks = <&cru ACLK_VDU>, - <&cru HCLK_VDU>; + <&cru HCLK_VDU>, + <&cru SCLK_VDU_CA>, + <&cru SCLK_VDU_CORE>; pm_qos = <&qos_video_m1_r>, <&qos_video_m1_w>; #power-domain-cells = <0>; @@ -1384,7 +1386,7 @@ vdec: video-codec@ff660000 { compatible = "rockchip,rk3399-vdec"; - reg = <0x0 0xff660000 0x0 0x400>; + reg = <0x0 0xff660000 0x0 0x480>; interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH 0>; clocks = <&cru ACLK_VDU>, <&cru HCLK_VDU>, <&cru SCLK_VDU_CA>, <&cru SCLK_VDU_CORE>; diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi index 0964761e3ce9..c19c0f1b3778 100644 --- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi @@ -977,7 +977,7 @@ <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 71 IRQ_TYPE_LEVEL_HIGH>; - interrupt-names = "sys", "pmc", "msi", "legacy", "err"; + interrupt-names = "sys", "pmc", "msg", "legacy", "err"; bus-range = <0x0 0xf>; clocks = <&cru ACLK_PCIE20_MST>, <&cru ACLK_PCIE20_SLV>, <&cru ACLK_PCIE20_DBI>, <&cru PCLK_PCIE20>, diff --git a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi index 9570b34aca2e..d88c0e852356 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi @@ -235,13 +235,13 @@ &pinctrl { fan { fan_int: fan-int { - rockchip,pins = <0 RK_PA4 RK_FUNC_GPIO &pcfg_pull_none>; + rockchip,pins = <0 RK_PA4 RK_FUNC_GPIO &pcfg_pull_up>; }; }; hym8563 { hym8563_int: hym8563-int { - rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>; + rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_up>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts index 8f399c4317bd..e3a839a12dc6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts @@ -38,7 +38,7 @@ leds { compatible = "gpio-leds"; pinctrl-names = "default"; - pinctrl-0 =<&leds_gpio>; + pinctrl-0 = <&leds_gpio>; led-1 { gpios = <&gpio1 RK_PA2 GPIO_ACTIVE_HIGH>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi index 63151d9d2377..30db12c4fc82 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi @@ -369,7 +369,7 @@ emmc_data_strobe: emmc-data-strobe { rockchip,pins = /* emmc_data_strobe */ - <2 RK_PA2 1 &pcfg_pull_none>; + <2 RK_PA2 1 &pcfg_pull_down>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi index 7064c0e9179f..8aa0499f9b03 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi @@ -1362,7 +1362,6 @@ <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH 0>, <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH 0>, <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH 0>; - interrupt-names = "ch0", "ch1", "ch2", "ch3"; rockchip,pmu = <&pmu1grf>; }; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index f6d416fe49b0..acf109581ac0 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -819,6 +819,11 @@ static inline bool system_supports_tlb_range(void) return alternative_has_cap_unlikely(ARM64_HAS_TLB_RANGE); } +static inline bool system_supports_lpa2(void) +{ + return cpus_have_final_cap(ARM64_HAS_LPA2); +} + int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); bool try_emulate_mrs(struct pt_regs *regs, u32 isn); diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index ae35939f395b..353fe08546cf 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -392,6 +392,21 @@ static inline bool esr_is_data_abort(unsigned long esr) return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR; } +static inline bool esr_fsc_is_translation_fault(unsigned long esr) +{ + return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT; +} + +static inline bool esr_fsc_is_permission_fault(unsigned long esr) +{ + return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM; +} + +static inline bool esr_fsc_is_access_flag_fault(unsigned long esr) +{ + return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS; +} + const char *esr_get_class_string(unsigned long esr); #endif /* __ASSEMBLY */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index b85f46a73e21..3c6f8ba1e479 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -108,6 +108,7 @@ #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En) /* TCR_EL2 Registers bits */ +#define TCR_EL2_DS (1UL << 32) #define TCR_EL2_RES1 ((1U << 31) | (1 << 23)) #define TCR_EL2_TBI (1 << 20) #define TCR_EL2_PS_SHIFT 16 @@ -122,6 +123,7 @@ TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK) /* VTCR_EL2 Registers bits */ +#define VTCR_EL2_DS TCR_EL2_DS #define VTCR_EL2_RES1 (1U << 31) #define VTCR_EL2_HD (1 << 22) #define VTCR_EL2_HA (1 << 21) @@ -344,36 +346,47 @@ * Once we get to a point where the two describe the same thing, we'll * merge the definitions. One day. */ -#define __HFGRTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51)) +#define __HFGRTR_EL2_RES0 HFGxTR_EL2_RES0 #define __HFGRTR_EL2_MASK GENMASK(49, 0) -#define __HFGRTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50)) +#define __HFGRTR_EL2_nMASK ~(__HFGRTR_EL2_RES0 | __HFGRTR_EL2_MASK) -#define __HFGWTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51) | \ - BIT(46) | BIT(42) | BIT(40) | BIT(28) | \ - GENMASK(26, 25) | BIT(21) | BIT(18) | \ +/* + * The HFGWTR bits are a subset of HFGRTR bits. To ensure we don't miss any + * future additions, define __HFGWTR* macros relative to __HFGRTR* ones. + */ +#define __HFGRTR_ONLY_MASK (BIT(46) | BIT(42) | BIT(40) | BIT(28) | \ + GENMASK(26, 25) | BIT(21) | BIT(18) | \ GENMASK(15, 14) | GENMASK(10, 9) | BIT(2)) -#define __HFGWTR_EL2_MASK GENMASK(49, 0) -#define __HFGWTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50)) - -#define __HFGITR_EL2_RES0 GENMASK(63, 57) -#define __HFGITR_EL2_MASK GENMASK(54, 0) -#define __HFGITR_EL2_nMASK GENMASK(56, 55) - -#define __HDFGRTR_EL2_RES0 (BIT(49) | BIT(42) | GENMASK(39, 38) | \ - GENMASK(21, 20) | BIT(8)) -#define __HDFGRTR_EL2_MASK ~__HDFGRTR_EL2_nMASK -#define __HDFGRTR_EL2_nMASK GENMASK(62, 59) - -#define __HDFGWTR_EL2_RES0 (BIT(63) | GENMASK(59, 58) | BIT(51) | BIT(47) | \ - BIT(43) | GENMASK(40, 38) | BIT(34) | BIT(30) | \ - BIT(22) | BIT(9) | BIT(6)) -#define __HDFGWTR_EL2_MASK ~__HDFGWTR_EL2_nMASK -#define __HDFGWTR_EL2_nMASK GENMASK(62, 60) +#define __HFGWTR_EL2_RES0 (__HFGRTR_EL2_RES0 | __HFGRTR_ONLY_MASK) +#define __HFGWTR_EL2_MASK (__HFGRTR_EL2_MASK & ~__HFGRTR_ONLY_MASK) +#define __HFGWTR_EL2_nMASK ~(__HFGWTR_EL2_RES0 | __HFGWTR_EL2_MASK) + +#define __HFGITR_EL2_RES0 HFGITR_EL2_RES0 +#define __HFGITR_EL2_MASK (BIT(62) | BIT(60) | GENMASK(54, 0)) +#define __HFGITR_EL2_nMASK ~(__HFGITR_EL2_RES0 | __HFGITR_EL2_MASK) + +#define __HDFGRTR_EL2_RES0 HDFGRTR_EL2_RES0 +#define __HDFGRTR_EL2_MASK (BIT(63) | GENMASK(58, 50) | GENMASK(48, 43) | \ + GENMASK(41, 40) | GENMASK(37, 22) | \ + GENMASK(19, 9) | GENMASK(7, 0)) +#define __HDFGRTR_EL2_nMASK ~(__HDFGRTR_EL2_RES0 | __HDFGRTR_EL2_MASK) + +#define __HDFGWTR_EL2_RES0 HDFGWTR_EL2_RES0 +#define __HDFGWTR_EL2_MASK (GENMASK(57, 52) | GENMASK(50, 48) | \ + GENMASK(46, 44) | GENMASK(42, 41) | \ + GENMASK(37, 35) | GENMASK(33, 31) | \ + GENMASK(29, 23) | GENMASK(21, 10) | \ + GENMASK(8, 7) | GENMASK(5, 0)) +#define __HDFGWTR_EL2_nMASK ~(__HDFGWTR_EL2_RES0 | __HDFGWTR_EL2_MASK) + +#define __HAFGRTR_EL2_RES0 HAFGRTR_EL2_RES0 +#define __HAFGRTR_EL2_MASK (GENMASK(49, 17) | GENMASK(4, 0)) +#define __HAFGRTR_EL2_nMASK ~(__HAFGRTR_EL2_RES0 | __HAFGRTR_EL2_MASK) /* Similar definitions for HCRX_EL2 */ -#define __HCRX_EL2_RES0 (GENMASK(63, 16) | GENMASK(13, 12)) -#define __HCRX_EL2_MASK (0) -#define __HCRX_EL2_nMASK (GENMASK(15, 14) | GENMASK(4, 0)) +#define __HCRX_EL2_RES0 HCRX_EL2_RES0 +#define __HCRX_EL2_MASK (BIT(6)) +#define __HCRX_EL2_nMASK ~(__HCRX_EL2_RES0 | __HCRX_EL2_MASK) /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~UL(0xf)) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 78a550537b67..b804fe832184 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -17,6 +17,7 @@ #include <asm/esr.h> #include <asm/kvm_arm.h> #include <asm/kvm_hyp.h> +#include <asm/kvm_nested.h> #include <asm/ptrace.h> #include <asm/cputype.h> #include <asm/virt.h> @@ -54,11 +55,6 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu); int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2); int kvm_inject_nested_irq(struct kvm_vcpu *vcpu); -static inline bool vcpu_has_feature(const struct kvm_vcpu *vcpu, int feature) -{ - return test_bit(feature, vcpu->kvm->arch.vcpu_features); -} - #if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__) static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { @@ -248,7 +244,7 @@ static inline bool __is_hyp_ctxt(const struct kvm_cpu_context *ctxt) static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu) { - return __is_hyp_ctxt(&vcpu->arch.ctxt); + return vcpu_has_nv(vcpu) && __is_hyp_ctxt(&vcpu->arch.ctxt); } /* @@ -404,14 +400,25 @@ static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC; } -static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) +static inline +bool kvm_vcpu_trap_is_permission_fault(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE; + return esr_fsc_is_permission_fault(kvm_vcpu_get_esr(vcpu)); } -static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *vcpu) +static inline +bool kvm_vcpu_trap_is_translation_fault(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_LEVEL; + return esr_fsc_is_translation_fault(kvm_vcpu_get_esr(vcpu)); +} + +static inline +u64 kvm_vcpu_trap_get_perm_fault_granule(const struct kvm_vcpu *vcpu) +{ + unsigned long esr = kvm_vcpu_get_esr(vcpu); + + BUG_ON(!esr_fsc_is_permission_fault(esr)); + return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(esr & ESR_ELx_FSC_LEVEL)); } static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu) @@ -454,12 +461,7 @@ static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) * first), then a permission fault to allow the flags * to be set. */ - switch (kvm_vcpu_trap_get_fault_type(vcpu)) { - case ESR_ELx_FSC_PERM: - return true; - default: - return false; - } + return kvm_vcpu_trap_is_permission_fault(vcpu); } if (kvm_vcpu_trap_is_iabt(vcpu)) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 567bbc417e4b..21c57b812569 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -27,6 +27,7 @@ #include <asm/fpsimd.h> #include <asm/kvm.h> #include <asm/kvm_asm.h> +#include <asm/vncr_mapping.h> #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -306,6 +307,7 @@ struct kvm_arch { * Atomic access to multiple idregs are guarded by kvm_arch.config_lock. */ #define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id)) +#define IDX_IDREG(idx) sys_reg(3, 0, 0, ((idx) >> 3) + 1, (idx) & Op2_mask) #define IDREG(kvm, id) ((kvm)->arch.id_regs[IDREG_IDX(id)]) #define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1) u64 id_regs[KVM_ARM_ID_REG_NUM]; @@ -324,33 +326,33 @@ struct kvm_vcpu_fault_info { u64 disr_el1; /* Deferred [SError] Status Register */ }; +/* + * VNCR() just places the VNCR_capable registers in the enum after + * __VNCR_START__, and the value (after correction) to be an 8-byte offset + * from the VNCR base. As we don't require the enum to be otherwise ordered, + * we need the terrible hack below to ensure that we correctly size the + * sys_regs array, no matter what. + * + * The __MAX__ macro has been lifted from Sean Eron Anderson's wonderful + * treasure trove of bit hacks: + * https://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax + */ +#define __MAX__(x,y) ((x) ^ (((x) ^ (y)) & -((x) < (y)))) +#define VNCR(r) \ + __before_##r, \ + r = __VNCR_START__ + ((VNCR_ ## r) / 8), \ + __after_##r = __MAX__(__before_##r - 1, r) + enum vcpu_sysreg { __INVALID_SYSREG__, /* 0 is reserved as an invalid value */ MPIDR_EL1, /* MultiProcessor Affinity Register */ CLIDR_EL1, /* Cache Level ID Register */ CSSELR_EL1, /* Cache Size Selection Register */ - SCTLR_EL1, /* System Control Register */ - ACTLR_EL1, /* Auxiliary Control Register */ - CPACR_EL1, /* Coprocessor Access Control */ - ZCR_EL1, /* SVE Control */ - TTBR0_EL1, /* Translation Table Base Register 0 */ - TTBR1_EL1, /* Translation Table Base Register 1 */ - TCR_EL1, /* Translation Control Register */ - TCR2_EL1, /* Extended Translation Control Register */ - ESR_EL1, /* Exception Syndrome Register */ - AFSR0_EL1, /* Auxiliary Fault Status Register 0 */ - AFSR1_EL1, /* Auxiliary Fault Status Register 1 */ - FAR_EL1, /* Fault Address Register */ - MAIR_EL1, /* Memory Attribute Indirection Register */ - VBAR_EL1, /* Vector Base Address Register */ - CONTEXTIDR_EL1, /* Context ID Register */ TPIDR_EL0, /* Thread ID, User R/W */ TPIDRRO_EL0, /* Thread ID, User R/O */ TPIDR_EL1, /* Thread ID, Privileged */ - AMAIR_EL1, /* Aux Memory Attribute Indirection Register */ CNTKCTL_EL1, /* Timer Control Register (EL1) */ PAR_EL1, /* Physical Address Register */ - MDSCR_EL1, /* Monitor Debug System Control Register */ MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */ OSLSR_EL1, /* OS Lock Status Register */ DISR_EL1, /* Deferred Interrupt Status Register */ @@ -381,26 +383,11 @@ enum vcpu_sysreg { APGAKEYLO_EL1, APGAKEYHI_EL1, - ELR_EL1, - SP_EL1, - SPSR_EL1, - - CNTVOFF_EL2, - CNTV_CVAL_EL0, - CNTV_CTL_EL0, - CNTP_CVAL_EL0, - CNTP_CTL_EL0, - /* Memory Tagging Extension registers */ RGSR_EL1, /* Random Allocation Tag Seed Register */ GCR_EL1, /* Tag Control Register */ - TFSR_EL1, /* Tag Fault Status Register (EL1) */ TFSRE0_EL1, /* Tag Fault Status Register (EL0) */ - /* Permission Indirection Extension registers */ - PIR_EL1, /* Permission Indirection Register 1 (EL1) */ - PIRE0_EL1, /* Permission Indirection Register 0 (EL1) */ - /* 32bit specific registers. */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ @@ -408,21 +395,14 @@ enum vcpu_sysreg { DBGVCR32_EL2, /* Debug Vector Catch Register */ /* EL2 registers */ - VPIDR_EL2, /* Virtualization Processor ID Register */ - VMPIDR_EL2, /* Virtualization Multiprocessor ID Register */ SCTLR_EL2, /* System Control Register (EL2) */ ACTLR_EL2, /* Auxiliary Control Register (EL2) */ - HCR_EL2, /* Hypervisor Configuration Register */ MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */ CPTR_EL2, /* Architectural Feature Trap Register (EL2) */ - HSTR_EL2, /* Hypervisor System Trap Register */ HACR_EL2, /* Hypervisor Auxiliary Control Register */ - HCRX_EL2, /* Extended Hypervisor Configuration Register */ TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */ TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */ TCR_EL2, /* Translation Control Register (EL2) */ - VTTBR_EL2, /* Virtualization Translation Table Base Register */ - VTCR_EL2, /* Virtualization Translation Control Register */ SPSR_EL2, /* EL2 saved program status register */ ELR_EL2, /* EL2 exception link register */ AFSR0_EL2, /* Auxiliary Fault Status Register 0 (EL2) */ @@ -435,19 +415,62 @@ enum vcpu_sysreg { VBAR_EL2, /* Vector Base Address Register (EL2) */ RVBAR_EL2, /* Reset Vector Base Address Register */ CONTEXTIDR_EL2, /* Context ID Register (EL2) */ - TPIDR_EL2, /* EL2 Software Thread ID Register */ CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */ SP_EL2, /* EL2 Stack Pointer */ - HFGRTR_EL2, - HFGWTR_EL2, - HFGITR_EL2, - HDFGRTR_EL2, - HDFGWTR_EL2, CNTHP_CTL_EL2, CNTHP_CVAL_EL2, CNTHV_CTL_EL2, CNTHV_CVAL_EL2, + __VNCR_START__, /* Any VNCR-capable reg goes after this point */ + + VNCR(SCTLR_EL1),/* System Control Register */ + VNCR(ACTLR_EL1),/* Auxiliary Control Register */ + VNCR(CPACR_EL1),/* Coprocessor Access Control */ + VNCR(ZCR_EL1), /* SVE Control */ + VNCR(TTBR0_EL1),/* Translation Table Base Register 0 */ + VNCR(TTBR1_EL1),/* Translation Table Base Register 1 */ + VNCR(TCR_EL1), /* Translation Control Register */ + VNCR(TCR2_EL1), /* Extended Translation Control Register */ + VNCR(ESR_EL1), /* Exception Syndrome Register */ + VNCR(AFSR0_EL1),/* Auxiliary Fault Status Register 0 */ + VNCR(AFSR1_EL1),/* Auxiliary Fault Status Register 1 */ + VNCR(FAR_EL1), /* Fault Address Register */ + VNCR(MAIR_EL1), /* Memory Attribute Indirection Register */ + VNCR(VBAR_EL1), /* Vector Base Address Register */ + VNCR(CONTEXTIDR_EL1), /* Context ID Register */ + VNCR(AMAIR_EL1),/* Aux Memory Attribute Indirection Register */ + VNCR(MDSCR_EL1),/* Monitor Debug System Control Register */ + VNCR(ELR_EL1), + VNCR(SP_EL1), + VNCR(SPSR_EL1), + VNCR(TFSR_EL1), /* Tag Fault Status Register (EL1) */ + VNCR(VPIDR_EL2),/* Virtualization Processor ID Register */ + VNCR(VMPIDR_EL2),/* Virtualization Multiprocessor ID Register */ + VNCR(HCR_EL2), /* Hypervisor Configuration Register */ + VNCR(HSTR_EL2), /* Hypervisor System Trap Register */ + VNCR(VTTBR_EL2),/* Virtualization Translation Table Base Register */ + VNCR(VTCR_EL2), /* Virtualization Translation Control Register */ + VNCR(TPIDR_EL2),/* EL2 Software Thread ID Register */ + VNCR(HCRX_EL2), /* Extended Hypervisor Configuration Register */ + + /* Permission Indirection Extension registers */ + VNCR(PIR_EL1), /* Permission Indirection Register 1 (EL1) */ + VNCR(PIRE0_EL1), /* Permission Indirection Register 0 (EL1) */ + + VNCR(HFGRTR_EL2), + VNCR(HFGWTR_EL2), + VNCR(HFGITR_EL2), + VNCR(HDFGRTR_EL2), + VNCR(HDFGWTR_EL2), + VNCR(HAFGRTR_EL2), + + VNCR(CNTVOFF_EL2), + VNCR(CNTV_CVAL_EL0), + VNCR(CNTV_CTL_EL0), + VNCR(CNTP_CVAL_EL0), + VNCR(CNTP_CTL_EL0), + NR_SYS_REGS /* Nothing after this line! */ }; @@ -464,6 +487,9 @@ struct kvm_cpu_context { u64 sys_regs[NR_SYS_REGS]; struct kvm_vcpu *__hyp_running_vcpu; + + /* This pointer has to be 4kB aligned. */ + u64 *vncr_array; }; struct kvm_host_data { @@ -826,8 +852,19 @@ struct kvm_vcpu_arch { * accessed by a running VCPU. For example, for userspace access or * for system registers that are never context switched, but only * emulated. + * + * Don't bother with VNCR-based accesses in the nVHE code, it has no + * business dealing with NV. */ -#define __ctxt_sys_reg(c,r) (&(c)->sys_regs[(r)]) +static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r) +{ +#if !defined (__KVM_NVHE_HYPERVISOR__) + if (unlikely(cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) && + r >= __VNCR_START__ && ctxt->vncr_array)) + return &ctxt->vncr_array[r - __VNCR_START__]; +#endif + return (u64 *)&ctxt->sys_regs[r]; +} #define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r)) @@ -871,6 +908,7 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; + case SPSR_EL1: *val = read_sysreg_s(SYS_SPSR_EL12); break; case PAR_EL1: *val = read_sysreg_par(); break; case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; @@ -915,6 +953,7 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; + case SPSR_EL1: write_sysreg_s(val, SYS_SPSR_EL12); break; case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; @@ -1175,6 +1214,13 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_vm_has_ran_once(kvm) \ (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &(kvm)->arch.flags)) +static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature) +{ + return test_bit(feature, ka->vcpu_features); +} + +#define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f)) + int kvm_trng_call(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM extern phys_addr_t hyp_mem_base; diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 6cec8e9c6c91..4882905357f4 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -2,8 +2,9 @@ #ifndef __ARM64_KVM_NESTED_H #define __ARM64_KVM_NESTED_H -#include <asm/kvm_emulate.h> +#include <linux/bitfield.h> #include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu) { @@ -12,12 +13,55 @@ static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu) vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2)); } -extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu); +/* Translation helpers from non-VHE EL2 to EL1 */ +static inline u64 tcr_el2_ps_to_tcr_el1_ips(u64 tcr_el2) +{ + return (u64)FIELD_GET(TCR_EL2_PS_MASK, tcr_el2) << TCR_IPS_SHIFT; +} + +static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr) +{ + return TCR_EPD1_MASK | /* disable TTBR1_EL1 */ + ((tcr & TCR_EL2_TBI) ? TCR_TBI0 : 0) | + tcr_el2_ps_to_tcr_el1_ips(tcr) | + (tcr & TCR_EL2_TG0_MASK) | + (tcr & TCR_EL2_ORGN0_MASK) | + (tcr & TCR_EL2_IRGN0_MASK) | + (tcr & TCR_EL2_T0SZ_MASK); +} + +static inline u64 translate_cptr_el2_to_cpacr_el1(u64 cptr_el2) +{ + u64 cpacr_el1 = 0; + + if (cptr_el2 & CPTR_EL2_TTA) + cpacr_el1 |= CPACR_ELx_TTA; + if (!(cptr_el2 & CPTR_EL2_TFP)) + cpacr_el1 |= CPACR_ELx_FPEN; + if (!(cptr_el2 & CPTR_EL2_TZ)) + cpacr_el1 |= CPACR_ELx_ZEN; -struct sys_reg_params; -struct sys_reg_desc; + return cpacr_el1; +} + +static inline u64 translate_sctlr_el2_to_sctlr_el1(u64 val) +{ + /* Only preserve the minimal set of bits we support */ + val &= (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | SCTLR_ELx_SA | + SCTLR_ELx_I | SCTLR_ELx_IESB | SCTLR_ELx_WXN | SCTLR_ELx_EE); + val |= SCTLR_EL1_RES1; + + return val; +} + +static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0) +{ + /* Clear the ASID field */ + return ttbr0 & ~GENMASK_ULL(63, 48); +} + +extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu); -void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p, - const struct sys_reg_desc *r); +int kvm_init_nv_sysregs(struct kvm *kvm); #endif /* __ARM64_KVM_NESTED_H */ diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index d3e354bb8351..cfdf40f734b1 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -11,7 +11,8 @@ #include <linux/kvm_host.h> #include <linux/types.h> -#define KVM_PGTABLE_MAX_LEVELS 4U +#define KVM_PGTABLE_FIRST_LEVEL -1 +#define KVM_PGTABLE_LAST_LEVEL 3 /* * The largest supported block sizes for KVM (no 52-bit PA support): @@ -20,17 +21,29 @@ * - 64K (level 2): 512MB */ #ifdef CONFIG_ARM64_4K_PAGES -#define KVM_PGTABLE_MIN_BLOCK_LEVEL 1U +#define KVM_PGTABLE_MIN_BLOCK_LEVEL 1 #else -#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2U +#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2 #endif +#define kvm_lpa2_is_enabled() system_supports_lpa2() + +static inline u64 kvm_get_parange_max(void) +{ + if (kvm_lpa2_is_enabled() || + (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && PAGE_SHIFT == 16)) + return ID_AA64MMFR0_EL1_PARANGE_52; + else + return ID_AA64MMFR0_EL1_PARANGE_48; +} + static inline u64 kvm_get_parange(u64 mmfr0) { + u64 parange_max = kvm_get_parange_max(); u64 parange = cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_PARANGE_SHIFT); - if (parange > ID_AA64MMFR0_EL1_PARANGE_MAX) - parange = ID_AA64MMFR0_EL1_PARANGE_MAX; + if (parange > parange_max) + parange = parange_max; return parange; } @@ -41,6 +54,8 @@ typedef u64 kvm_pte_t; #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) #define KVM_PTE_ADDR_51_48 GENMASK(15, 12) +#define KVM_PTE_ADDR_MASK_LPA2 GENMASK(49, PAGE_SHIFT) +#define KVM_PTE_ADDR_51_50_LPA2 GENMASK(9, 8) #define KVM_PHYS_INVALID (-1ULL) @@ -51,21 +66,34 @@ static inline bool kvm_pte_valid(kvm_pte_t pte) static inline u64 kvm_pte_to_phys(kvm_pte_t pte) { - u64 pa = pte & KVM_PTE_ADDR_MASK; - - if (PAGE_SHIFT == 16) - pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48; + u64 pa; + + if (kvm_lpa2_is_enabled()) { + pa = pte & KVM_PTE_ADDR_MASK_LPA2; + pa |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, pte) << 50; + } else { + pa = pte & KVM_PTE_ADDR_MASK; + if (PAGE_SHIFT == 16) + pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48; + } return pa; } static inline kvm_pte_t kvm_phys_to_pte(u64 pa) { - kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK; - - if (PAGE_SHIFT == 16) { - pa &= GENMASK(51, 48); - pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); + kvm_pte_t pte; + + if (kvm_lpa2_is_enabled()) { + pte = pa & KVM_PTE_ADDR_MASK_LPA2; + pa &= GENMASK(51, 50); + pte |= FIELD_PREP(KVM_PTE_ADDR_51_50_LPA2, pa >> 50); + } else { + pte = pa & KVM_PTE_ADDR_MASK; + if (PAGE_SHIFT == 16) { + pa &= GENMASK(51, 48); + pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); + } } return pte; @@ -76,28 +104,28 @@ static inline kvm_pfn_t kvm_pte_to_pfn(kvm_pte_t pte) return __phys_to_pfn(kvm_pte_to_phys(pte)); } -static inline u64 kvm_granule_shift(u32 level) +static inline u64 kvm_granule_shift(s8 level) { - /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */ + /* Assumes KVM_PGTABLE_LAST_LEVEL is 3 */ return ARM64_HW_PGTABLE_LEVEL_SHIFT(level); } -static inline u64 kvm_granule_size(u32 level) +static inline u64 kvm_granule_size(s8 level) { return BIT(kvm_granule_shift(level)); } -static inline bool kvm_level_supports_block_mapping(u32 level) +static inline bool kvm_level_supports_block_mapping(s8 level) { return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL; } static inline u32 kvm_supported_block_sizes(void) { - u32 level = KVM_PGTABLE_MIN_BLOCK_LEVEL; + s8 level = KVM_PGTABLE_MIN_BLOCK_LEVEL; u32 r = 0; - for (; level < KVM_PGTABLE_MAX_LEVELS; level++) + for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) r |= BIT(kvm_granule_shift(level)); return r; @@ -142,7 +170,7 @@ struct kvm_pgtable_mm_ops { void* (*zalloc_page)(void *arg); void* (*zalloc_pages_exact)(size_t size); void (*free_pages_exact)(void *addr, size_t size); - void (*free_unlinked_table)(void *addr, u32 level); + void (*free_unlinked_table)(void *addr, s8 level); void (*get_page)(void *addr); void (*put_page)(void *addr); int (*page_count)(void *addr); @@ -238,7 +266,7 @@ struct kvm_pgtable_visit_ctx { u64 start; u64 addr; u64 end; - u32 level; + s8 level; enum kvm_pgtable_walk_flags flags; }; @@ -341,7 +369,7 @@ static inline bool kvm_pgtable_walk_lock_held(void) */ struct kvm_pgtable { u32 ia_bits; - u32 start_level; + s8 start_level; kvm_pteref_t pgd; struct kvm_pgtable_mm_ops *mm_ops; @@ -475,7 +503,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); * The page-table is assumed to be unreachable by any hardware walkers prior to * freeing and therefore no TLB invalidation is performed. */ -void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level); +void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level); /** * kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure. @@ -499,7 +527,7 @@ void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *p * an ERR_PTR(error) on failure. */ kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, - u64 phys, u32 level, + u64 phys, s8 level, enum kvm_pgtable_prot prot, void *mc, bool force_pte); @@ -725,7 +753,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, * Return: 0 on success, negative error code on failure. */ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, - kvm_pte_t *ptep, u32 *level); + kvm_pte_t *ptep, s8 *level); /** * kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index e46250a02017..ad9cfb5c1ff4 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -56,10 +56,11 @@ static inline unsigned long hyp_vm_table_pages(void) static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages) { - unsigned long total = 0, i; + unsigned long total = 0; + int i; /* Provision the worst case scenario */ - for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) { + for (i = KVM_PGTABLE_FIRST_LEVEL; i <= KVM_PGTABLE_LAST_LEVEL; i++) { nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE); total += nr_pages; } diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index e9624f6326dd..483dbfa39c4c 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -71,6 +71,8 @@ extern bool arm64_use_ng_mappings; #define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0) #define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0) +#define lpa2_is_enabled() false + /* * If we have userspace only BTI we don't want to mark kernel pages * guarded even if the system does support BTI. diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b19a8aee684c..79ce70fbb751 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -834,6 +834,12 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); + /* + * If we end up clearing hw dirtiness for a sw-dirty PTE, set hardware + * dirtiness again. + */ + if (pte_sw_dirty(pte)) + pte = pte_mkdirty(pte); return pte; } diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h index f4af547ef54c..2e4d7da74fb8 100644 --- a/arch/arm64/include/asm/setup.h +++ b/arch/arm64/include/asm/setup.h @@ -21,9 +21,22 @@ static inline bool arch_parse_debug_rodata(char *arg) extern bool rodata_enabled; extern bool rodata_full; - if (arg && !strcmp(arg, "full")) { + if (!arg) + return false; + + if (!strcmp(arg, "full")) { + rodata_enabled = rodata_full = true; + return true; + } + + if (!strcmp(arg, "off")) { + rodata_enabled = rodata_full = false; + return true; + } + + if (!strcmp(arg, "on")) { rodata_enabled = true; - rodata_full = true; + rodata_full = false; return true; } diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h index d977713ec0ba..abb57bc54305 100644 --- a/arch/arm64/include/asm/syscall_wrapper.h +++ b/arch/arm64/include/asm/syscall_wrapper.h @@ -44,9 +44,6 @@ return sys_ni_syscall(); \ } -#define COMPAT_SYS_NI(name) \ - SYSCALL_ALIAS(__arm64_compat_sys_##name, sys_ni_posix_timers); - #endif /* CONFIG_COMPAT */ #define __SYSCALL_DEFINEx(x, name, ...) \ @@ -81,6 +78,5 @@ } asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused); -#define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers); #endif /* __ASM_SYSCALL_WRAPPER_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 5e65f51c10d2..c3b19b376c86 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -645,6 +645,7 @@ #define OP_AT_S1E0W sys_insn(AT_Op0, 0, AT_CRn, 8, 3) #define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0) #define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1) +#define OP_AT_S1E1A sys_insn(AT_Op0, 0, AT_CRn, 9, 2) #define OP_AT_S1E2R sys_insn(AT_Op0, 4, AT_CRn, 8, 0) #define OP_AT_S1E2W sys_insn(AT_Op0, 4, AT_CRn, 8, 1) #define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4) @@ -781,10 +782,16 @@ #define OP_TLBI_VMALLS12E1NXS sys_insn(1, 4, 9, 7, 6) /* Misc instructions */ +#define OP_GCSPUSHX sys_insn(1, 0, 7, 7, 4) +#define OP_GCSPOPCX sys_insn(1, 0, 7, 7, 5) +#define OP_GCSPOPX sys_insn(1, 0, 7, 7, 6) +#define OP_GCSPUSHM sys_insn(1, 3, 7, 7, 0) + #define OP_BRB_IALL sys_insn(1, 1, 7, 2, 4) #define OP_BRB_INJ sys_insn(1, 1, 7, 2, 5) #define OP_CFP_RCTX sys_insn(1, 3, 7, 3, 4) #define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5) +#define OP_COSP_RCTX sys_insn(1, 3, 7, 3, 6) #define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7) /* Common SCTLR_ELx flags. */ @@ -871,10 +878,12 @@ /* id_aa64mmfr0 */ #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0 +#define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7 #define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN 0x0 #define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX 0x7 #define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 0x1 +#define ID_AA64MMFR0_EL1_TGRAN16_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT #define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 0xf #define ARM64_MIN_PARANGE_BITS 32 @@ -882,6 +891,7 @@ #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT 0x0 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE 0x1 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN 0x2 +#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2 0x3 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX 0x7 #ifdef CONFIG_ARM64_PA_BITS_52 @@ -892,11 +902,13 @@ #if defined(CONFIG_ARM64_4K_PAGES) #define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX #define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT #elif defined(CONFIG_ARM64_16K_PAGES) #define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN16_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX #define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT @@ -1039,6 +1051,19 @@ #define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4)) +/* + * Permission Overlay Extension (POE) permission encodings. + */ +#define POE_NONE UL(0x0) +#define POE_R UL(0x1) +#define POE_X UL(0x2) +#define POE_RX UL(0x3) +#define POE_W UL(0x4) +#define POE_RW UL(0x5) +#define POE_XW UL(0x6) +#define POE_RXW UL(0x7) +#define POE_MASK UL(0xf) + #define ARM64_FEATURE_FIELD_BITS 4 /* Defined for compatibility only, do not add new users. */ diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 846c563689a8..0150deb332af 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -22,15 +22,15 @@ static void tlb_flush(struct mmu_gather *tlb); #include <asm-generic/tlb.h> /* - * get the tlbi levels in arm64. Default value is 0 if more than one - * of cleared_* is set or neither is set. - * Arm64 doesn't support p4ds now. + * get the tlbi levels in arm64. Default value is TLBI_TTL_UNKNOWN if more than + * one of cleared_* is set or neither is set - this elides the level hinting to + * the hardware. */ static inline int tlb_get_level(struct mmu_gather *tlb) { /* The TTL field is only valid for the leaf entry. */ if (tlb->freed_tables) - return 0; + return TLBI_TTL_UNKNOWN; if (tlb->cleared_ptes && !(tlb->cleared_pmds || tlb->cleared_puds || @@ -47,7 +47,12 @@ static inline int tlb_get_level(struct mmu_gather *tlb) tlb->cleared_p4ds)) return 1; - return 0; + if (tlb->cleared_p4ds && !(tlb->cleared_ptes || + tlb->cleared_pmds || + tlb->cleared_puds)) + return 0; + + return TLBI_TTL_UNKNOWN; } static inline void tlb_flush(struct mmu_gather *tlb) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index bb2c2833a987..1deb5d789c2e 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -94,19 +94,22 @@ static inline unsigned long get_trans_granule(void) * When ARMv8.4-TTL exists, TLBI operations take an additional hint for * the level at which the invalidation must take place. If the level is * wrong, no invalidation may take place. In the case where the level - * cannot be easily determined, a 0 value for the level parameter will - * perform a non-hinted invalidation. + * cannot be easily determined, the value TLBI_TTL_UNKNOWN will perform + * a non-hinted invalidation. Any provided level outside the hint range + * will also cause fall-back to non-hinted invalidation. * * For Stage-2 invalidation, use the level values provided to that effect * in asm/stage2_pgtable.h. */ #define TLBI_TTL_MASK GENMASK_ULL(47, 44) +#define TLBI_TTL_UNKNOWN INT_MAX + #define __tlbi_level(op, addr, level) do { \ u64 arg = addr; \ \ if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && \ - level) { \ + level >= 0 && level <= 3) { \ u64 ttl = level & 3; \ ttl |= get_trans_granule() << 2; \ arg &= ~TLBI_TTL_MASK; \ @@ -122,28 +125,34 @@ static inline unsigned long get_trans_granule(void) } while (0) /* - * This macro creates a properly formatted VA operand for the TLB RANGE. - * The value bit assignments are: + * This macro creates a properly formatted VA operand for the TLB RANGE. The + * value bit assignments are: * * +----------+------+-------+-------+-------+----------------------+ * | ASID | TG | SCALE | NUM | TTL | BADDR | * +-----------------+-------+-------+-------+----------------------+ * |63 48|47 46|45 44|43 39|38 37|36 0| * - * The address range is determined by below formula: - * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE) + * The address range is determined by below formula: [BADDR, BADDR + (NUM + 1) * + * 2^(5*SCALE + 1) * PAGESIZE) + * + * Note that the first argument, baddr, is pre-shifted; If LPA2 is in use, BADDR + * holds addr[52:16]. Else BADDR holds page number. See for example ARM DDI + * 0487J.a section C5.5.60 "TLBI VAE1IS, TLBI VAE1ISNXS, TLB Invalidate by VA, + * EL1, Inner Shareable". * */ -#define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \ - ({ \ - unsigned long __ta = (addr) >> PAGE_SHIFT; \ - __ta &= GENMASK_ULL(36, 0); \ - __ta |= (unsigned long)(ttl) << 37; \ - __ta |= (unsigned long)(num) << 39; \ - __ta |= (unsigned long)(scale) << 44; \ - __ta |= get_trans_granule() << 46; \ - __ta |= (unsigned long)(asid) << 48; \ - __ta; \ +#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \ + ({ \ + unsigned long __ta = (baddr); \ + unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \ + __ta &= GENMASK_ULL(36, 0); \ + __ta |= __ttl << 37; \ + __ta |= (unsigned long)(num) << 39; \ + __ta |= (unsigned long)(scale) << 44; \ + __ta |= get_trans_granule() << 46; \ + __ta |= (unsigned long)(asid) << 48; \ + __ta; \ }) /* These macros are used by the TLBI RANGE feature. */ @@ -216,12 +225,16 @@ static inline unsigned long get_trans_granule(void) * CPUs, ensuring that any walk-cache entries associated with the * translation are also invalidated. * - * __flush_tlb_range(vma, start, end, stride, last_level) + * __flush_tlb_range(vma, start, end, stride, last_level, tlb_level) * Invalidate the virtual-address range '[start, end)' on all * CPUs for the user address space corresponding to 'vma->mm'. * The invalidation operations are issued at a granularity * determined by 'stride' and only affect any walk-cache entries - * if 'last_level' is equal to false. + * if 'last_level' is equal to false. tlb_level is the level at + * which the invalidation must take place. If the level is wrong, + * no invalidation may take place. In the case where the level + * cannot be easily determined, the value TLBI_TTL_UNKNOWN will + * perform a non-hinted invalidation. * * * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented @@ -345,34 +358,44 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) * @tlb_level: Translation Table level hint, if known * @tlbi_user: If 'true', call an additional __tlbi_user() * (typically for user ASIDs). 'flase' for IPA instructions + * @lpa2: If 'true', the lpa2 scheme is used as set out below * * When the CPU does not support TLB range operations, flush the TLB * entries one by one at the granularity of 'stride'. If the TLB * range ops are supported, then: * - * 1. If 'pages' is odd, flush the first page through non-range - * operations; + * 1. If FEAT_LPA2 is in use, the start address of a range operation must be + * 64KB aligned, so flush pages one by one until the alignment is reached + * using the non-range operations. This step is skipped if LPA2 is not in + * use. + * + * 2. The minimum range granularity is decided by 'scale', so multiple range + * TLBI operations may be required. Start from scale = 3, flush the largest + * possible number of pages ((num+1)*2^(5*scale+1)) that fit into the + * requested range, then decrement scale and continue until one or zero pages + * are left. We must start from highest scale to ensure 64KB start alignment + * is maintained in the LPA2 case. * - * 2. For remaining pages: the minimum range granularity is decided - * by 'scale', so multiple range TLBI operations may be required. - * Start from scale = 0, flush the corresponding number of pages - * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it - * until no pages left. + * 3. If there is 1 page remaining, flush it through non-range operations. Range + * operations can only span an even number of pages. We save this for last to + * ensure 64KB start alignment is maintained for the LPA2 case. * * Note that certain ranges can be represented by either num = 31 and * scale or num = 0 and scale + 1. The loop below favours the latter * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. */ #define __flush_tlb_range_op(op, start, pages, stride, \ - asid, tlb_level, tlbi_user) \ + asid, tlb_level, tlbi_user, lpa2) \ do { \ int num = 0; \ - int scale = 0; \ + int scale = 3; \ + int shift = lpa2 ? 16 : PAGE_SHIFT; \ unsigned long addr; \ \ while (pages > 0) { \ if (!system_supports_tlb_range() || \ - pages % 2 == 1) { \ + pages == 1 || \ + (lpa2 && start != ALIGN(start, SZ_64K))) { \ addr = __TLBI_VADDR(start, asid); \ __tlbi_level(op, addr, tlb_level); \ if (tlbi_user) \ @@ -384,20 +407,20 @@ do { \ \ num = __TLBI_RANGE_NUM(pages, scale); \ if (num >= 0) { \ - addr = __TLBI_VADDR_RANGE(start, asid, scale, \ - num, tlb_level); \ + addr = __TLBI_VADDR_RANGE(start >> shift, asid, \ + scale, num, tlb_level); \ __tlbi(r##op, addr); \ if (tlbi_user) \ __tlbi_user(r##op, addr); \ start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ pages -= __TLBI_RANGE_PAGES(num, scale); \ } \ - scale++; \ + scale--; \ } \ } while (0) #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \ - __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false) + __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled()); static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, @@ -427,9 +450,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, asid = ASID(vma->vm_mm); if (last_level) - __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true); + __flush_tlb_range_op(vale1is, start, pages, stride, asid, + tlb_level, true, lpa2_is_enabled()); else - __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true); + __flush_tlb_range_op(vae1is, start, pages, stride, asid, + tlb_level, true, lpa2_is_enabled()); dsb(ish); mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end); @@ -441,9 +466,10 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, /* * We cannot use leaf-only invalidation here, since we may be invalidating * table entries as part of collapsing hugepages or moving page tables. - * Set the tlb_level to 0 because we can not get enough information here. + * Set the tlb_level to TLBI_TTL_UNKNOWN because we can not get enough + * information here. */ - __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0); + __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h new file mode 100644 index 000000000000..df2c47c55972 --- /dev/null +++ b/arch/arm64/include/asm/vncr_mapping.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * System register offsets in the VNCR page + * All offsets are *byte* displacements! + */ + +#ifndef __ARM64_VNCR_MAPPING_H__ +#define __ARM64_VNCR_MAPPING_H__ + +#define VNCR_VTTBR_EL2 0x020 +#define VNCR_VTCR_EL2 0x040 +#define VNCR_VMPIDR_EL2 0x050 +#define VNCR_CNTVOFF_EL2 0x060 +#define VNCR_HCR_EL2 0x078 +#define VNCR_HSTR_EL2 0x080 +#define VNCR_VPIDR_EL2 0x088 +#define VNCR_TPIDR_EL2 0x090 +#define VNCR_HCRX_EL2 0x0A0 +#define VNCR_VNCR_EL2 0x0B0 +#define VNCR_CPACR_EL1 0x100 +#define VNCR_CONTEXTIDR_EL1 0x108 +#define VNCR_SCTLR_EL1 0x110 +#define VNCR_ACTLR_EL1 0x118 +#define VNCR_TCR_EL1 0x120 +#define VNCR_AFSR0_EL1 0x128 +#define VNCR_AFSR1_EL1 0x130 +#define VNCR_ESR_EL1 0x138 +#define VNCR_MAIR_EL1 0x140 +#define VNCR_AMAIR_EL1 0x148 +#define VNCR_MDSCR_EL1 0x158 +#define VNCR_SPSR_EL1 0x160 +#define VNCR_CNTV_CVAL_EL0 0x168 +#define VNCR_CNTV_CTL_EL0 0x170 +#define VNCR_CNTP_CVAL_EL0 0x178 +#define VNCR_CNTP_CTL_EL0 0x180 +#define VNCR_SCXTNUM_EL1 0x188 +#define VNCR_TFSR_EL1 0x190 +#define VNCR_HFGRTR_EL2 0x1B8 +#define VNCR_HFGWTR_EL2 0x1C0 +#define VNCR_HFGITR_EL2 0x1C8 +#define VNCR_HDFGRTR_EL2 0x1D0 +#define VNCR_HDFGWTR_EL2 0x1D8 +#define VNCR_ZCR_EL1 0x1E0 +#define VNCR_HAFGRTR_EL2 0x1E8 +#define VNCR_TTBR0_EL1 0x200 +#define VNCR_TTBR1_EL1 0x210 +#define VNCR_FAR_EL1 0x220 +#define VNCR_ELR_EL1 0x230 +#define VNCR_SP_EL1 0x240 +#define VNCR_VBAR_EL1 0x250 +#define VNCR_TCR2_EL1 0x270 +#define VNCR_PIRE0_EL1 0x290 +#define VNCR_PIRE0_EL2 0x298 +#define VNCR_PIR_EL1 0x2A0 +#define VNCR_ICH_LR0_EL2 0x400 +#define VNCR_ICH_LR1_EL2 0x408 +#define VNCR_ICH_LR2_EL2 0x410 +#define VNCR_ICH_LR3_EL2 0x418 +#define VNCR_ICH_LR4_EL2 0x420 +#define VNCR_ICH_LR5_EL2 0x428 +#define VNCR_ICH_LR6_EL2 0x430 +#define VNCR_ICH_LR7_EL2 0x438 +#define VNCR_ICH_LR8_EL2 0x440 +#define VNCR_ICH_LR9_EL2 0x448 +#define VNCR_ICH_LR10_EL2 0x450 +#define VNCR_ICH_LR11_EL2 0x458 +#define VNCR_ICH_LR12_EL2 0x460 +#define VNCR_ICH_LR13_EL2 0x468 +#define VNCR_ICH_LR14_EL2 0x470 +#define VNCR_ICH_LR15_EL2 0x478 +#define VNCR_ICH_AP0R0_EL2 0x480 +#define VNCR_ICH_AP0R1_EL2 0x488 +#define VNCR_ICH_AP0R2_EL2 0x490 +#define VNCR_ICH_AP0R3_EL2 0x498 +#define VNCR_ICH_AP1R0_EL2 0x4A0 +#define VNCR_ICH_AP1R1_EL2 0x4A8 +#define VNCR_ICH_AP1R2_EL2 0x4B0 +#define VNCR_ICH_AP1R3_EL2 0x4B8 +#define VNCR_ICH_HCR_EL2 0x4C0 +#define VNCR_ICH_VMCR_EL2 0x4C8 +#define VNCR_VDISR_EL2 0x500 +#define VNCR_PMBLIMITR_EL1 0x800 +#define VNCR_PMBPTR_EL1 0x810 +#define VNCR_PMBSR_EL1 0x820 +#define VNCR_PMSCR_EL1 0x828 +#define VNCR_PMSEVFR_EL1 0x830 +#define VNCR_PMSICR_EL1 0x838 +#define VNCR_PMSIRR_EL1 0x840 +#define VNCR_PMSLATFR_EL1 0x848 +#define VNCR_TRFCR_EL1 0x880 +#define VNCR_MPAM1_EL1 0x900 +#define VNCR_MPAMHCR_EL2 0x930 +#define VNCR_MPAMVPMV_EL2 0x938 +#define VNCR_MPAMVPM0_EL2 0x940 +#define VNCR_MPAMVPM1_EL2 0x948 +#define VNCR_MPAMVPM2_EL2 0x950 +#define VNCR_MPAMVPM3_EL2 0x958 +#define VNCR_MPAMVPM4_EL2 0x960 +#define VNCR_MPAMVPM5_EL2 0x968 +#define VNCR_MPAMVPM6_EL2 0x970 +#define VNCR_MPAMVPM7_EL2 0x978 + +#endif /* __ARM64_VNCR_MAPPING_H__ */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 646591c67e7a..1322652a22de 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1768,6 +1768,39 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, return !meltdown_safe; } +#if defined(ID_AA64MMFR0_EL1_TGRAN_LPA2) && defined(ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2) +static bool has_lpa2_at_stage1(u64 mmfr0) +{ + unsigned int tgran; + + tgran = cpuid_feature_extract_unsigned_field(mmfr0, + ID_AA64MMFR0_EL1_TGRAN_SHIFT); + return tgran == ID_AA64MMFR0_EL1_TGRAN_LPA2; +} + +static bool has_lpa2_at_stage2(u64 mmfr0) +{ + unsigned int tgran; + + tgran = cpuid_feature_extract_unsigned_field(mmfr0, + ID_AA64MMFR0_EL1_TGRAN_2_SHIFT); + return tgran == ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2; +} + +static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope) +{ + u64 mmfr0; + + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + return has_lpa2_at_stage1(mmfr0) && has_lpa2_at_stage2(mmfr0); +} +#else +static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope) +{ + return false; +} +#endif + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 #define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT)) @@ -1839,6 +1872,10 @@ static int __init __kpti_install_ng_mappings(void *__unused) static void __init kpti_install_ng_mappings(void) { + /* Check whether KPTI is going to be used */ + if (!cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0)) + return; + /* * We don't need to rewrite the page-tables if either we've done * it already or we have KASLR enabled and therefore have not @@ -2339,7 +2376,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_NESTED_VIRT, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_nested_virt_support, - ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, NV, IMP) + ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, NV, NV2) }, { .capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE, @@ -2731,6 +2768,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP) }, + { + .desc = "52-bit Virtual Addressing for KVM (LPA2)", + .capability = ARM64_HAS_LPA2, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_lpa2, + }, {}, }; diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 1a777715199f..6c3c8ca73e7f 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -21,16 +21,14 @@ if VIRTUALIZATION menuconfig KVM bool "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM + select KVM_COMMON select KVM_GENERIC_HARDWARE_ENABLING select KVM_GENERIC_MMU_NOTIFIER - select PREEMPT_NOTIFIERS select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_XFER_TO_GUEST_WORK select KVM_VFIO - select HAVE_KVM_EVENTFD - select HAVE_KVM_IRQFD select HAVE_KVM_DIRTY_RING_ACQ_REL select NEED_KVM_DIRTY_RING_WITH_BITMAP select HAVE_KVM_MSI @@ -41,7 +39,6 @@ menuconfig KVM select HAVE_KVM_VCPU_RUN_PID_CHANGE select SCHED_INFO select GUEST_PERF_EVENTS if PERF_EVENTS - select INTERVAL_TREE select XARRAY_MULTI help Support hosting virtualized guest machines. diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 13ba691b848f..9dec8c419bf4 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -295,8 +295,7 @@ static u64 wfit_delay_ns(struct kvm_vcpu *vcpu) u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); struct arch_timer_context *ctx; - ctx = (vcpu_has_nv(vcpu) && is_hyp_ctxt(vcpu)) ? vcpu_hvtimer(vcpu) - : vcpu_vtimer(vcpu); + ctx = is_hyp_ctxt(vcpu) ? vcpu_hvtimer(vcpu) : vcpu_vtimer(vcpu); return kvm_counter_compute_delta(ctx, val); } diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e5f75f1f1085..a25265aca432 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -221,7 +221,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = vgic_present; break; case KVM_CAP_IOEVENTFD: - case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: case KVM_CAP_SYNC_MMU: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: @@ -410,7 +409,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); kvm_timer_vcpu_terminate(vcpu); kvm_pmu_vcpu_destroy(vcpu); - + kvm_vgic_vcpu_destroy(vcpu); kvm_arm_vcpu_destroy(vcpu); } @@ -669,6 +668,12 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) return ret; } + if (vcpu_has_nv(vcpu)) { + ret = kvm_init_nv_sysregs(vcpu->kvm); + if (ret) + return ret; + } + ret = kvm_timer_enable(vcpu); if (ret) return ret; @@ -1837,6 +1842,7 @@ static int kvm_init_vector_slots(void) static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) { struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); + u64 mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); unsigned long tcr; /* @@ -1859,6 +1865,10 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) } tcr &= ~TCR_T0SZ_MASK; tcr |= TCR_T0SZ(hyp_va_bits); + tcr &= ~TCR_EL2_PS_MASK; + tcr |= FIELD_PREP(TCR_EL2_PS_MASK, kvm_get_parange(mmfr0)); + if (kvm_lpa2_is_enabled()) + tcr |= TCR_EL2_DS; params->tcr_el2 = tcr; params->pgd_pa = kvm_mmu_get_httbr(); diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 06185216a297..431fd429932d 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -1012,6 +1012,7 @@ enum fgt_group_id { HDFGRTR_GROUP, HDFGWTR_GROUP, HFGITR_GROUP, + HAFGRTR_GROUP, /* Must be last */ __NR_FGT_GROUP_IDS__ @@ -1042,10 +1043,20 @@ enum fg_filter_id { static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = { /* HFGRTR_EL2, HFGWTR_EL2 */ + SR_FGT(SYS_AMAIR2_EL1, HFGxTR, nAMAIR2_EL1, 0), + SR_FGT(SYS_MAIR2_EL1, HFGxTR, nMAIR2_EL1, 0), + SR_FGT(SYS_S2POR_EL1, HFGxTR, nS2POR_EL1, 0), + SR_FGT(SYS_POR_EL1, HFGxTR, nPOR_EL1, 0), + SR_FGT(SYS_POR_EL0, HFGxTR, nPOR_EL0, 0), SR_FGT(SYS_PIR_EL1, HFGxTR, nPIR_EL1, 0), SR_FGT(SYS_PIRE0_EL1, HFGxTR, nPIRE0_EL1, 0), + SR_FGT(SYS_RCWMASK_EL1, HFGxTR, nRCWMASK_EL1, 0), SR_FGT(SYS_TPIDR2_EL0, HFGxTR, nTPIDR2_EL0, 0), SR_FGT(SYS_SMPRI_EL1, HFGxTR, nSMPRI_EL1, 0), + SR_FGT(SYS_GCSCR_EL1, HFGxTR, nGCS_EL1, 0), + SR_FGT(SYS_GCSPR_EL1, HFGxTR, nGCS_EL1, 0), + SR_FGT(SYS_GCSCRE0_EL1, HFGxTR, nGCS_EL0, 0), + SR_FGT(SYS_GCSPR_EL0, HFGxTR, nGCS_EL0, 0), SR_FGT(SYS_ACCDATA_EL1, HFGxTR, nACCDATA_EL1, 0), SR_FGT(SYS_ERXADDR_EL1, HFGxTR, ERXADDR_EL1, 1), SR_FGT(SYS_ERXPFGCDN_EL1, HFGxTR, ERXPFGCDN_EL1, 1), @@ -1107,6 +1118,11 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = { SR_FGT(SYS_AFSR1_EL1, HFGxTR, AFSR1_EL1, 1), SR_FGT(SYS_AFSR0_EL1, HFGxTR, AFSR0_EL1, 1), /* HFGITR_EL2 */ + SR_FGT(OP_AT_S1E1A, HFGITR, ATS1E1A, 1), + SR_FGT(OP_COSP_RCTX, HFGITR, COSPRCTX, 1), + SR_FGT(OP_GCSPUSHX, HFGITR, nGCSEPP, 0), + SR_FGT(OP_GCSPOPX, HFGITR, nGCSEPP, 0), + SR_FGT(OP_GCSPUSHM, HFGITR, nGCSPUSHM_EL1, 0), SR_FGT(OP_BRB_IALL, HFGITR, nBRBIALL, 0), SR_FGT(OP_BRB_INJ, HFGITR, nBRBINJ, 0), SR_FGT(SYS_DC_CVAC, HFGITR, DCCVAC, 1), @@ -1674,6 +1690,49 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = { SR_FGT(SYS_PMCR_EL0, HDFGWTR, PMCR_EL0, 1), SR_FGT(SYS_PMSWINC_EL0, HDFGWTR, PMSWINC_EL0, 1), SR_FGT(SYS_OSLAR_EL1, HDFGWTR, OSLAR_EL1, 1), + /* + * HAFGRTR_EL2 + */ + SR_FGT(SYS_AMEVTYPER1_EL0(15), HAFGRTR, AMEVTYPER115_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(14), HAFGRTR, AMEVTYPER114_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(13), HAFGRTR, AMEVTYPER113_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(12), HAFGRTR, AMEVTYPER112_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(11), HAFGRTR, AMEVTYPER111_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(10), HAFGRTR, AMEVTYPER110_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(9), HAFGRTR, AMEVTYPER19_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(8), HAFGRTR, AMEVTYPER18_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(7), HAFGRTR, AMEVTYPER17_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(6), HAFGRTR, AMEVTYPER16_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(5), HAFGRTR, AMEVTYPER15_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(4), HAFGRTR, AMEVTYPER14_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(3), HAFGRTR, AMEVTYPER13_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(2), HAFGRTR, AMEVTYPER12_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(1), HAFGRTR, AMEVTYPER11_EL0, 1), + SR_FGT(SYS_AMEVTYPER1_EL0(0), HAFGRTR, AMEVTYPER10_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(15), HAFGRTR, AMEVCNTR115_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(14), HAFGRTR, AMEVCNTR114_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(13), HAFGRTR, AMEVCNTR113_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(12), HAFGRTR, AMEVCNTR112_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(11), HAFGRTR, AMEVCNTR111_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(10), HAFGRTR, AMEVCNTR110_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(9), HAFGRTR, AMEVCNTR19_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(8), HAFGRTR, AMEVCNTR18_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(7), HAFGRTR, AMEVCNTR17_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(6), HAFGRTR, AMEVCNTR16_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(5), HAFGRTR, AMEVCNTR15_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(4), HAFGRTR, AMEVCNTR14_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(3), HAFGRTR, AMEVCNTR13_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(2), HAFGRTR, AMEVCNTR12_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(1), HAFGRTR, AMEVCNTR11_EL0, 1), + SR_FGT(SYS_AMEVCNTR1_EL0(0), HAFGRTR, AMEVCNTR10_EL0, 1), + SR_FGT(SYS_AMCNTENCLR1_EL0, HAFGRTR, AMCNTEN1, 1), + SR_FGT(SYS_AMCNTENSET1_EL0, HAFGRTR, AMCNTEN1, 1), + SR_FGT(SYS_AMCNTENCLR0_EL0, HAFGRTR, AMCNTEN0, 1), + SR_FGT(SYS_AMCNTENSET0_EL0, HAFGRTR, AMCNTEN0, 1), + SR_FGT(SYS_AMEVCNTR0_EL0(3), HAFGRTR, AMEVCNTR03_EL0, 1), + SR_FGT(SYS_AMEVCNTR0_EL0(2), HAFGRTR, AMEVCNTR02_EL0, 1), + SR_FGT(SYS_AMEVCNTR0_EL0(1), HAFGRTR, AMEVCNTR01_EL0, 1), + SR_FGT(SYS_AMEVCNTR0_EL0(0), HAFGRTR, AMEVCNTR00_EL0, 1), }; static union trap_config get_trap_config(u32 sysreg) @@ -1894,6 +1953,10 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu) val = sanitised_sys_reg(vcpu, HDFGWTR_EL2); break; + case HAFGRTR_GROUP: + val = sanitised_sys_reg(vcpu, HAFGRTR_EL2); + break; + case HFGITR_GROUP: val = sanitised_sys_reg(vcpu, HFGITR_EL2); switch (tc.fgf) { diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h index 9ddcfe2c3e57..9e13c1bc2ad5 100644 --- a/arch/arm64/kvm/hyp/include/hyp/fault.h +++ b/arch/arm64/kvm/hyp/include/hyp/fault.h @@ -60,7 +60,7 @@ static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) */ if (!(esr & ESR_ELx_S1PTW) && (cpus_have_final_cap(ARM64_WORKAROUND_834220) || - (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM)) { + esr_fsc_is_permission_fault(esr))) { if (!__translate_far_to_hpfar(far, &hpfar)) return false; } else { diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index f99d8af0b9af..a038320cdb08 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -79,6 +79,45 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) clr |= ~hfg & __ ## reg ## _nMASK; \ } while(0) +#define update_fgt_traps_cs(vcpu, reg, clr, set) \ + do { \ + struct kvm_cpu_context *hctxt = \ + &this_cpu_ptr(&kvm_host_data)->host_ctxt; \ + u64 c = 0, s = 0; \ + \ + ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \ + compute_clr_set(vcpu, reg, c, s); \ + s |= set; \ + c |= clr; \ + if (c || s) { \ + u64 val = __ ## reg ## _nMASK; \ + val |= s; \ + val &= ~c; \ + write_sysreg_s(val, SYS_ ## reg); \ + } \ + } while(0) + +#define update_fgt_traps(vcpu, reg) \ + update_fgt_traps_cs(vcpu, reg, 0, 0) + +/* + * Validate the fine grain trap masks. + * Check that the masks do not overlap and that all bits are accounted for. + */ +#define CHECK_FGT_MASKS(reg) \ + do { \ + BUILD_BUG_ON((__ ## reg ## _MASK) & (__ ## reg ## _nMASK)); \ + BUILD_BUG_ON(~((__ ## reg ## _RES0) ^ (__ ## reg ## _MASK) ^ \ + (__ ## reg ## _nMASK))); \ + } while(0) + +static inline bool cpu_has_amu(void) +{ + u64 pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1); + + return cpuid_feature_extract_unsigned_field(pfr0, + ID_AA64PFR0_EL1_AMU_SHIFT); +} static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) { @@ -86,6 +125,14 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp; u64 r_val, w_val; + CHECK_FGT_MASKS(HFGRTR_EL2); + CHECK_FGT_MASKS(HFGWTR_EL2); + CHECK_FGT_MASKS(HFGITR_EL2); + CHECK_FGT_MASKS(HDFGRTR_EL2); + CHECK_FGT_MASKS(HDFGWTR_EL2); + CHECK_FGT_MASKS(HAFGRTR_EL2); + CHECK_FGT_MASKS(HCRX_EL2); + if (!cpus_have_final_cap(ARM64_HAS_FGT)) return; @@ -110,12 +157,15 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) compute_clr_set(vcpu, HFGWTR_EL2, w_clr, w_set); } - /* The default is not to trap anything but ACCDATA_EL1 */ - r_val = __HFGRTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1; + /* The default to trap everything not handled or supported in KVM. */ + tmp = HFGxTR_EL2_nAMAIR2_EL1 | HFGxTR_EL2_nMAIR2_EL1 | HFGxTR_EL2_nS2POR_EL1 | + HFGxTR_EL2_nPOR_EL1 | HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nACCDATA_EL1; + + r_val = __HFGRTR_EL2_nMASK & ~tmp; r_val |= r_set; r_val &= ~r_clr; - w_val = __HFGWTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1; + w_val = __HFGWTR_EL2_nMASK & ~tmp; w_val |= w_set; w_val &= ~w_clr; @@ -125,34 +175,12 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu)) return; - ctxt_sys_reg(hctxt, HFGITR_EL2) = read_sysreg_s(SYS_HFGITR_EL2); + update_fgt_traps(vcpu, HFGITR_EL2); + update_fgt_traps(vcpu, HDFGRTR_EL2); + update_fgt_traps(vcpu, HDFGWTR_EL2); - r_set = r_clr = 0; - compute_clr_set(vcpu, HFGITR_EL2, r_clr, r_set); - r_val = __HFGITR_EL2_nMASK; - r_val |= r_set; - r_val &= ~r_clr; - - write_sysreg_s(r_val, SYS_HFGITR_EL2); - - ctxt_sys_reg(hctxt, HDFGRTR_EL2) = read_sysreg_s(SYS_HDFGRTR_EL2); - ctxt_sys_reg(hctxt, HDFGWTR_EL2) = read_sysreg_s(SYS_HDFGWTR_EL2); - - r_clr = r_set = w_clr = w_set = 0; - - compute_clr_set(vcpu, HDFGRTR_EL2, r_clr, r_set); - compute_clr_set(vcpu, HDFGWTR_EL2, w_clr, w_set); - - r_val = __HDFGRTR_EL2_nMASK; - r_val |= r_set; - r_val &= ~r_clr; - - w_val = __HDFGWTR_EL2_nMASK; - w_val |= w_set; - w_val &= ~w_clr; - - write_sysreg_s(r_val, SYS_HDFGRTR_EL2); - write_sysreg_s(w_val, SYS_HDFGWTR_EL2); + if (cpu_has_amu()) + update_fgt_traps(vcpu, HAFGRTR_EL2); } static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu) @@ -171,6 +199,9 @@ static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu) write_sysreg_s(ctxt_sys_reg(hctxt, HFGITR_EL2), SYS_HFGITR_EL2); write_sysreg_s(ctxt_sys_reg(hctxt, HDFGRTR_EL2), SYS_HDFGRTR_EL2); write_sysreg_s(ctxt_sys_reg(hctxt, HDFGWTR_EL2), SYS_HDFGWTR_EL2); + + if (cpu_has_amu()) + write_sysreg_s(ctxt_sys_reg(hctxt, HAFGRTR_EL2), SYS_HAFGRTR_EL2); } static inline void __activate_traps_common(struct kvm_vcpu *vcpu) @@ -591,7 +622,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { bool valid; - valid = kvm_vcpu_trap_get_fault_type(vcpu) == ESR_ELx_FSC_FAULT && + valid = kvm_vcpu_trap_is_translation_fault(vcpu) && kvm_vcpu_dabt_isvalid(vcpu) && !kvm_vcpu_abt_issea(vcpu) && !kvm_vcpu_abt_iss1tw(vcpu); diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index e91922daa8ca..51f043649146 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -69,6 +69,8 @@ ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \ ) +#define PVM_ID_AA64PFR2_ALLOW 0ULL + /* * Allow for protected VMs: * - Mixed-endian @@ -101,6 +103,7 @@ * - Privileged Access Never * - SError interrupt exceptions from speculative reads * - Enhanced Translation Synchronization + * - Control for cache maintenance permission */ #define PVM_ID_AA64MMFR1_ALLOW (\ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \ @@ -108,7 +111,8 @@ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_CMOW) \ ) /* @@ -133,6 +137,8 @@ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \ ) +#define PVM_ID_AA64MMFR3_ALLOW (0ULL) + /* * No support for Scalable Vectors for protected VMs: * Requires additional support from KVM, e.g., context-switching and @@ -178,10 +184,18 @@ ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \ ) +/* Restrict pointer authentication to the basic version. */ +#define PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED (\ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA), ID_AA64ISAR1_EL1_APA_PAuth) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API), ID_AA64ISAR1_EL1_API_PAuth) \ + ) + +#define PVM_ID_AA64ISAR2_RESTRICT_UNSIGNED (\ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3), ID_AA64ISAR2_EL1_APA3_PAuth) \ + ) + #define PVM_ID_AA64ISAR1_ALLOW (\ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | \ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \ ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \ @@ -196,8 +210,8 @@ ) #define PVM_ID_AA64ISAR2_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_ATS1A)| \ ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) | \ ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_MOPS) \ ) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 1cc06e6797bd..2994878d68ea 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -122,11 +122,7 @@ alternative_if ARM64_HAS_CNP alternative_else_nop_endif msr ttbr0_el2, x2 - /* - * Set the PS bits in TCR_EL2. - */ ldr x0, [x0, #NVHE_INIT_TCR_EL2] - tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2 msr tcr_el2, x0 isb @@ -292,6 +288,8 @@ alternative_else_nop_endif mov sp, x0 /* And turn the MMU back on! */ + dsb nsh + isb set_sctlr_el2 x2 ret x1 SYM_FUNC_END(__pkvm_init_switch_pgd) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 8d0a5834e883..861c76021a25 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -91,7 +91,7 @@ static void host_s2_put_page(void *addr) hyp_put_page(&host_s2_pool, addr); } -static void host_s2_free_unlinked_table(void *addr, u32 level) +static void host_s2_free_unlinked_table(void *addr, s8 level) { kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level); } @@ -443,7 +443,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) { struct kvm_mem_range cur; kvm_pte_t pte; - u32 level; + s8 level; int ret; hyp_assert_lock_held(&host_mmu.lock); @@ -462,7 +462,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) cur.start = ALIGN_DOWN(addr, granule); cur.end = cur.start + granule; level++; - } while ((level < KVM_PGTABLE_MAX_LEVELS) && + } while ((level <= KVM_PGTABLE_LAST_LEVEL) && !(kvm_level_supports_block_mapping(level) && range_included(&cur, range))); diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index 65a7a186d7b2..b01a3d1078a8 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -260,7 +260,7 @@ static void fixmap_clear_slot(struct hyp_fixmap_slot *slot) * https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03 */ dsb(ishst); - __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), (KVM_PGTABLE_MAX_LEVELS - 1)); + __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), KVM_PGTABLE_LAST_LEVEL); dsb(ish); isb(); } @@ -275,7 +275,7 @@ static int __create_fixmap_slot_cb(const struct kvm_pgtable_visit_ctx *ctx, { struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)ctx->arg); - if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_MAX_LEVELS - 1) + if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_LAST_LEVEL) return -EINVAL; slot->addr = ctx->addr; diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 9d23a51d7f75..84b5c3f387d8 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -136,6 +136,10 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) cptr_set |= CPTR_EL2_TTA; } + /* Trap External Trace */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_ExtTrcBuff), feature_ids)) + mdcr_clear |= MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT; + vcpu->arch.mdcr_el2 |= mdcr_set; vcpu->arch.mdcr_el2 &= ~mdcr_clear; vcpu->arch.cptr_el2 |= cptr_set; diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 0d5e0a89ddce..bc58d1b515af 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -181,7 +181,7 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx, if (!kvm_pte_valid(ctx->old)) return 0; - if (ctx->level != (KVM_PGTABLE_MAX_LEVELS - 1)) + if (ctx->level != KVM_PGTABLE_LAST_LEVEL) return -EINVAL; phys = kvm_pte_to_phys(ctx->old); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 1966fdee740e..c651df904fe3 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -79,7 +79,10 @@ static bool kvm_pgtable_walk_skip_cmo(const struct kvm_pgtable_visit_ctx *ctx) static bool kvm_phys_is_valid(u64 phys) { - return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX)); + u64 parange_max = kvm_get_parange_max(); + u8 shift = id_aa64mmfr0_parange_to_phys_shift(parange_max); + + return phys < BIT(shift); } static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, u64 phys) @@ -98,7 +101,7 @@ static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, return IS_ALIGNED(ctx->addr, granule); } -static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level) +static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, s8 level) { u64 shift = kvm_granule_shift(level); u64 mask = BIT(PAGE_SHIFT - 3) - 1; @@ -114,7 +117,7 @@ static u32 kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) return (addr & mask) >> shift; } -static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) +static u32 kvm_pgd_pages(u32 ia_bits, s8 start_level) { struct kvm_pgtable pgt = { .ia_bits = ia_bits, @@ -124,9 +127,9 @@ static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) return kvm_pgd_page_idx(&pgt, -1ULL) + 1; } -static bool kvm_pte_table(kvm_pte_t pte, u32 level) +static bool kvm_pte_table(kvm_pte_t pte, s8 level) { - if (level == KVM_PGTABLE_MAX_LEVELS - 1) + if (level == KVM_PGTABLE_LAST_LEVEL) return false; if (!kvm_pte_valid(pte)) @@ -154,11 +157,11 @@ static kvm_pte_t kvm_init_table_pte(kvm_pte_t *childp, struct kvm_pgtable_mm_ops return pte; } -static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level) +static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, s8 level) { kvm_pte_t pte = kvm_phys_to_pte(pa); - u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE : - KVM_PTE_TYPE_BLOCK; + u64 type = (level == KVM_PGTABLE_LAST_LEVEL) ? KVM_PTE_TYPE_PAGE : + KVM_PTE_TYPE_BLOCK; pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI); pte |= FIELD_PREP(KVM_PTE_TYPE, type); @@ -203,11 +206,11 @@ static bool kvm_pgtable_walk_continue(const struct kvm_pgtable_walker *walker, } static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, - struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level); + struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, s8 level); static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, struct kvm_pgtable_mm_ops *mm_ops, - kvm_pteref_t pteref, u32 level) + kvm_pteref_t pteref, s8 level) { enum kvm_pgtable_walk_flags flags = data->walker->flags; kvm_pte_t *ptep = kvm_dereference_pteref(data->walker, pteref); @@ -272,12 +275,13 @@ out: } static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, - struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level) + struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, s8 level) { u32 idx; int ret = 0; - if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS)) + if (WARN_ON_ONCE(level < KVM_PGTABLE_FIRST_LEVEL || + level > KVM_PGTABLE_LAST_LEVEL)) return -EINVAL; for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) { @@ -340,7 +344,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, struct leaf_walk_data { kvm_pte_t pte; - u32 level; + s8 level; }; static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx, @@ -355,7 +359,7 @@ static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx, } int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, - kvm_pte_t *ptep, u32 *level) + kvm_pte_t *ptep, s8 *level) { struct leaf_walk_data data; struct kvm_pgtable_walker walker = { @@ -408,7 +412,8 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) } attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap); - attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); + if (!kvm_lpa2_is_enabled()) + attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW; *ptep = attr; @@ -467,7 +472,7 @@ static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx, if (hyp_map_walker_try_leaf(ctx, data)) return 0; - if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1)) + if (WARN_ON(ctx->level == KVM_PGTABLE_LAST_LEVEL)) return -EINVAL; childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL); @@ -563,14 +568,19 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, struct kvm_pgtable_mm_ops *mm_ops) { - u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits); + s8 start_level = KVM_PGTABLE_LAST_LEVEL + 1 - + ARM64_HW_PGTABLE_LEVELS(va_bits); + + if (start_level < KVM_PGTABLE_FIRST_LEVEL || + start_level > KVM_PGTABLE_LAST_LEVEL) + return -EINVAL; pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_page(NULL); if (!pgt->pgd) return -ENOMEM; pgt->ia_bits = va_bits; - pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels; + pgt->start_level = start_level; pgt->mm_ops = mm_ops; pgt->mmu = NULL; pgt->force_pte_cb = NULL; @@ -624,7 +634,7 @@ struct stage2_map_data { u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) { u64 vtcr = VTCR_EL2_FLAGS; - u8 lvls; + s8 lvls; vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT; vtcr |= VTCR_EL2_T0SZ(phys_shift); @@ -635,6 +645,15 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) lvls = stage2_pgtable_levels(phys_shift); if (lvls < 2) lvls = 2; + + /* + * When LPA2 is enabled, the HW supports an extra level of translation + * (for 5 in total) when using 4K pages. It also introduces VTCR_EL2.SL2 + * to as an addition to SL0 to enable encoding this extra start level. + * However, since we always use concatenated pages for the first level + * lookup, we will never need this extra level and therefore do not need + * to touch SL2. + */ vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls); #ifdef CONFIG_ARM64_HW_AFDBM @@ -654,6 +673,9 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) vtcr |= VTCR_EL2_HA; #endif /* CONFIG_ARM64_HW_AFDBM */ + if (kvm_lpa2_is_enabled()) + vtcr |= VTCR_EL2_DS; + /* Set the vmid bits */ vtcr |= (get_vmid_bits(mmfr1) == 16) ? VTCR_EL2_VS_16BIT : @@ -711,7 +733,9 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p if (prot & KVM_PGTABLE_PROT_W) attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; - attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); + if (!kvm_lpa2_is_enabled()) + attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); + attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF; attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW; *ptep = attr; @@ -902,7 +926,7 @@ static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx, { u64 phys = stage2_map_walker_phys_addr(ctx, data); - if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1))) + if (data->force_pte && ctx->level < KVM_PGTABLE_LAST_LEVEL) return false; return kvm_block_mapping_supported(ctx, phys); @@ -981,7 +1005,7 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, if (ret != -E2BIG) return ret; - if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1)) + if (WARN_ON(ctx->level == KVM_PGTABLE_LAST_LEVEL)) return -EINVAL; if (!data->memcache) @@ -1151,7 +1175,7 @@ struct stage2_attr_data { kvm_pte_t attr_set; kvm_pte_t attr_clr; kvm_pte_t pte; - u32 level; + s8 level; }; static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, @@ -1194,7 +1218,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, u64 size, kvm_pte_t attr_set, kvm_pte_t attr_clr, kvm_pte_t *orig_pte, - u32 *level, enum kvm_pgtable_walk_flags flags) + s8 *level, enum kvm_pgtable_walk_flags flags) { int ret; kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI; @@ -1296,7 +1320,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot) { int ret; - u32 level; + s8 level; kvm_pte_t set = 0, clr = 0; if (prot & KVM_PTE_LEAF_ATTR_HI_SW) @@ -1349,7 +1373,7 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) } kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, - u64 phys, u32 level, + u64 phys, s8 level, enum kvm_pgtable_prot prot, void *mc, bool force_pte) { @@ -1407,7 +1431,7 @@ kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, * fully populated tree up to the PTE entries. Note that @level is * interpreted as in "level @level entry". */ -static int stage2_block_get_nr_page_tables(u32 level) +static int stage2_block_get_nr_page_tables(s8 level) { switch (level) { case 1: @@ -1418,7 +1442,7 @@ static int stage2_block_get_nr_page_tables(u32 level) return 0; default: WARN_ON_ONCE(level < KVM_PGTABLE_MIN_BLOCK_LEVEL || - level >= KVM_PGTABLE_MAX_LEVELS); + level > KVM_PGTABLE_LAST_LEVEL); return -EINVAL; }; } @@ -1431,13 +1455,13 @@ static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu; kvm_pte_t pte = ctx->old, new, *childp; enum kvm_pgtable_prot prot; - u32 level = ctx->level; + s8 level = ctx->level; bool force_pte; int nr_pages; u64 phys; /* No huge-pages exist at the last level */ - if (level == KVM_PGTABLE_MAX_LEVELS - 1) + if (level == KVM_PGTABLE_LAST_LEVEL) return 0; /* We only split valid block mappings */ @@ -1514,7 +1538,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, u64 vtcr = mmu->vtcr; u32 ia_bits = VTCR_EL2_IPA(vtcr); u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); - u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; + s8 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_pages_exact(pgd_sz); @@ -1537,7 +1561,7 @@ size_t kvm_pgtable_stage2_pgd_size(u64 vtcr) { u32 ia_bits = VTCR_EL2_IPA(vtcr); u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); - u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; + s8 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; } @@ -1573,7 +1597,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) pgt->pgd = NULL; } -void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level) +void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level) { kvm_pteref_t ptep = (kvm_pteref_t)pgtable; struct kvm_pgtable_walker walker = { diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index d87c8fcc4c24..d14504821b79 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -223,12 +223,12 @@ static void stage2_free_unlinked_table_rcu_cb(struct rcu_head *head) { struct page *page = container_of(head, struct page, rcu_head); void *pgtable = page_to_virt(page); - u32 level = page_private(page); + s8 level = page_private(page); kvm_pgtable_stage2_free_unlinked(&kvm_s2_mm_ops, pgtable, level); } -static void stage2_free_unlinked_table(void *addr, u32 level) +static void stage2_free_unlinked_table(void *addr, s8 level) { struct page *page = virt_to_page(addr); @@ -804,13 +804,13 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr) struct kvm_pgtable pgt = { .pgd = (kvm_pteref_t)kvm->mm->pgd, .ia_bits = vabits_actual, - .start_level = (KVM_PGTABLE_MAX_LEVELS - - CONFIG_PGTABLE_LEVELS), + .start_level = (KVM_PGTABLE_LAST_LEVEL - + CONFIG_PGTABLE_LEVELS + 1), .mm_ops = &kvm_user_mm_ops, }; unsigned long flags; kvm_pte_t pte = 0; /* Keep GCC quiet... */ - u32 level = ~0; + s8 level = S8_MAX; int ret; /* @@ -829,7 +829,9 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr) * Not seeing an error, but not updating level? Something went * deeply wrong... */ - if (WARN_ON(level >= KVM_PGTABLE_MAX_LEVELS)) + if (WARN_ON(level > KVM_PGTABLE_LAST_LEVEL)) + return -EFAULT; + if (WARN_ON(level < KVM_PGTABLE_FIRST_LEVEL)) return -EFAULT; /* Oops, the userspace PTs are gone... Replay the fault */ @@ -1374,7 +1376,7 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma) static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, - unsigned long fault_status) + bool fault_is_perm) { int ret = 0; bool write_fault, writable, force_pte = false; @@ -1388,17 +1390,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, gfn_t gfn; kvm_pfn_t pfn; bool logging_active = memslot_is_logging(memslot); - unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu); long vma_pagesize, fault_granule; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; struct kvm_pgtable *pgt; - fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level); + if (fault_is_perm) + fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu); write_fault = kvm_is_write_fault(vcpu); exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); VM_BUG_ON(write_fault && exec_fault); - if (fault_status == ESR_ELx_FSC_PERM && !write_fault && !exec_fault) { + if (fault_is_perm && !write_fault && !exec_fault) { kvm_err("Unexpected L2 read permission error\n"); return -EFAULT; } @@ -1409,8 +1411,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * only exception to this is when dirty logging is enabled at runtime * and a write fault needs to collapse a block entry into a table. */ - if (fault_status != ESR_ELx_FSC_PERM || - (logging_active && write_fault)) { + if (!fault_is_perm || (logging_active && write_fault)) { ret = kvm_mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu)); if (ret) @@ -1527,8 +1528,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * backed by a THP and thus use block mapping if possible. */ if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) { - if (fault_status == ESR_ELx_FSC_PERM && - fault_granule > PAGE_SIZE) + if (fault_is_perm && fault_granule > PAGE_SIZE) vma_pagesize = fault_granule; else vma_pagesize = transparent_hugepage_adjust(kvm, memslot, @@ -1541,7 +1541,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } } - if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) { + if (!fault_is_perm && !device && kvm_has_mte(kvm)) { /* Check the VMM hasn't introduced a new disallowed VMA */ if (mte_allowed) { sanitise_mte_tags(kvm, pfn, vma_pagesize); @@ -1567,7 +1567,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * permissions only if vma_pagesize equals fault_granule. Otherwise, * kvm_pgtable_stage2_map() should be called to change block size. */ - if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule) + if (fault_is_perm && vma_pagesize == fault_granule) ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); else ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, @@ -1618,7 +1618,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) */ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) { - unsigned long fault_status; + unsigned long esr; phys_addr_t fault_ipa; struct kvm_memory_slot *memslot; unsigned long hva; @@ -1626,12 +1626,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) gfn_t gfn; int ret, idx; - fault_status = kvm_vcpu_trap_get_fault_type(vcpu); + esr = kvm_vcpu_get_esr(vcpu); fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); is_iabt = kvm_vcpu_trap_is_iabt(vcpu); - if (fault_status == ESR_ELx_FSC_FAULT) { + if (esr_fsc_is_permission_fault(esr)) { /* Beyond sanitised PARange (which is the IPA limit) */ if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) { kvm_inject_size_fault(vcpu); @@ -1666,9 +1666,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) kvm_vcpu_get_hfar(vcpu), fault_ipa); /* Check the stage-2 fault is trans. fault or write fault */ - if (fault_status != ESR_ELx_FSC_FAULT && - fault_status != ESR_ELx_FSC_PERM && - fault_status != ESR_ELx_FSC_ACCESS) { + if (!esr_fsc_is_translation_fault(esr) && + !esr_fsc_is_permission_fault(esr) && + !esr_fsc_is_access_flag_fault(esr)) { kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", kvm_vcpu_trap_get_class(vcpu), (unsigned long)kvm_vcpu_trap_get_fault(vcpu), @@ -1730,13 +1730,14 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) /* Userspace should not be able to register out-of-bounds IPAs */ VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->arch.hw_mmu)); - if (fault_status == ESR_ELx_FSC_ACCESS) { + if (esr_fsc_is_access_flag_fault(esr)) { handle_access_fault(vcpu, fault_ipa); ret = 1; goto out_unlock; } - ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); + ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, + esr_fsc_is_permission_fault(esr)); if (ret == 0) ret = 1; out: diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 042695a210ce..ba95d044bc98 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -23,13 +23,9 @@ * This list should get updated as new features get added to the NV * support, and new extension to the architecture. */ -void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p, - const struct sys_reg_desc *r) +static u64 limit_nv_id_reg(u32 id, u64 val) { - u32 id = reg_to_encoding(r); - u64 val, tmp; - - val = p->regval; + u64 tmp; switch (id) { case SYS_ID_AA64ISAR0_EL1: @@ -158,5 +154,17 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p, break; } - p->regval = val; + return val; +} +int kvm_init_nv_sysregs(struct kvm *kvm) +{ + mutex_lock(&kvm->arch.config_lock); + + for (int i = 0; i < KVM_ARM_ID_REG_NUM; i++) + kvm->arch.id_regs[i] = limit_nv_id_reg(IDX_IDREG(i), + kvm->arch.id_regs[i]); + + mutex_unlock(&kvm->arch.config_lock); + + return 0; } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 5bb4de162cab..68d1d05672bd 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -280,12 +280,11 @@ int __init kvm_set_ipa_limit(void) parange = cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_PARANGE_SHIFT); /* - * IPA size beyond 48 bits could not be supported - * on either 4K or 16K page size. Hence let's cap - * it to 48 bits, in case it's reported as larger - * on the system. + * IPA size beyond 48 bits for 4K and 16K page size is only supported + * when LPA2 is available. So if we have LPA2, enable it, else cap to 48 + * bits, in case it's reported as larger on the system. */ - if (PAGE_SIZE != SZ_64K) + if (!kvm_lpa2_is_enabled() && PAGE_SIZE != SZ_64K) parange = min(parange, (unsigned int)ID_AA64MMFR0_EL1_PARANGE_48); /* diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 4735e1b37fb3..b4d3486d66ff 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -45,44 +45,170 @@ static u64 sys_reg_to_index(const struct sys_reg_desc *reg); static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, u64 val); -static bool read_from_write_only(struct kvm_vcpu *vcpu, - struct sys_reg_params *params, - const struct sys_reg_desc *r) +static bool bad_trap(struct kvm_vcpu *vcpu, + struct sys_reg_params *params, + const struct sys_reg_desc *r, + const char *msg) { - WARN_ONCE(1, "Unexpected sys_reg read to write-only register\n"); + WARN_ONCE(1, "Unexpected %s\n", msg); print_sys_reg_instr(params); kvm_inject_undefined(vcpu); return false; } +static bool read_from_write_only(struct kvm_vcpu *vcpu, + struct sys_reg_params *params, + const struct sys_reg_desc *r) +{ + return bad_trap(vcpu, params, r, + "sys_reg read to write-only register"); +} + static bool write_to_read_only(struct kvm_vcpu *vcpu, struct sys_reg_params *params, const struct sys_reg_desc *r) { - WARN_ONCE(1, "Unexpected sys_reg write to read-only register\n"); - print_sys_reg_instr(params); - kvm_inject_undefined(vcpu); - return false; + return bad_trap(vcpu, params, r, + "sys_reg write to read-only register"); +} + +#define PURE_EL2_SYSREG(el2) \ + case el2: { \ + *el1r = el2; \ + return true; \ + } + +#define MAPPED_EL2_SYSREG(el2, el1, fn) \ + case el2: { \ + *xlate = fn; \ + *el1r = el1; \ + return true; \ + } + +static bool get_el2_to_el1_mapping(unsigned int reg, + unsigned int *el1r, u64 (**xlate)(u64)) +{ + switch (reg) { + PURE_EL2_SYSREG( VPIDR_EL2 ); + PURE_EL2_SYSREG( VMPIDR_EL2 ); + PURE_EL2_SYSREG( ACTLR_EL2 ); + PURE_EL2_SYSREG( HCR_EL2 ); + PURE_EL2_SYSREG( MDCR_EL2 ); + PURE_EL2_SYSREG( HSTR_EL2 ); + PURE_EL2_SYSREG( HACR_EL2 ); + PURE_EL2_SYSREG( VTTBR_EL2 ); + PURE_EL2_SYSREG( VTCR_EL2 ); + PURE_EL2_SYSREG( RVBAR_EL2 ); + PURE_EL2_SYSREG( TPIDR_EL2 ); + PURE_EL2_SYSREG( HPFAR_EL2 ); + PURE_EL2_SYSREG( CNTHCTL_EL2 ); + MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1, + translate_sctlr_el2_to_sctlr_el1 ); + MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1, + translate_cptr_el2_to_cpacr_el1 ); + MAPPED_EL2_SYSREG(TTBR0_EL2, TTBR0_EL1, + translate_ttbr0_el2_to_ttbr0_el1 ); + MAPPED_EL2_SYSREG(TTBR1_EL2, TTBR1_EL1, NULL ); + MAPPED_EL2_SYSREG(TCR_EL2, TCR_EL1, + translate_tcr_el2_to_tcr_el1 ); + MAPPED_EL2_SYSREG(VBAR_EL2, VBAR_EL1, NULL ); + MAPPED_EL2_SYSREG(AFSR0_EL2, AFSR0_EL1, NULL ); + MAPPED_EL2_SYSREG(AFSR1_EL2, AFSR1_EL1, NULL ); + MAPPED_EL2_SYSREG(ESR_EL2, ESR_EL1, NULL ); + MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1, NULL ); + MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1, NULL ); + MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1, NULL ); + MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1, NULL ); + MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL ); + default: + return false; + } } u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) { u64 val = 0x8badf00d8badf00d; + u64 (*xlate)(u64) = NULL; + unsigned int el1r; + + if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) + goto memory_read; - if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) && - __vcpu_read_sys_reg_from_cpu(reg, &val)) + if (unlikely(get_el2_to_el1_mapping(reg, &el1r, &xlate))) { + if (!is_hyp_ctxt(vcpu)) + goto memory_read; + + /* + * If this register does not have an EL1 counterpart, + * then read the stored EL2 version. + */ + if (reg == el1r) + goto memory_read; + + /* + * If we have a non-VHE guest and that the sysreg + * requires translation to be used at EL1, use the + * in-memory copy instead. + */ + if (!vcpu_el2_e2h_is_set(vcpu) && xlate) + goto memory_read; + + /* Get the current version of the EL1 counterpart. */ + WARN_ON(!__vcpu_read_sys_reg_from_cpu(el1r, &val)); return val; + } + /* EL1 register can't be on the CPU if the guest is in vEL2. */ + if (unlikely(is_hyp_ctxt(vcpu))) + goto memory_read; + + if (__vcpu_read_sys_reg_from_cpu(reg, &val)) + return val; + +memory_read: return __vcpu_sys_reg(vcpu, reg); } void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) { - if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) && - __vcpu_write_sys_reg_to_cpu(val, reg)) + u64 (*xlate)(u64) = NULL; + unsigned int el1r; + + if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) + goto memory_write; + + if (unlikely(get_el2_to_el1_mapping(reg, &el1r, &xlate))) { + if (!is_hyp_ctxt(vcpu)) + goto memory_write; + + /* + * Always store a copy of the write to memory to avoid having + * to reverse-translate virtual EL2 system registers for a + * non-VHE guest hypervisor. + */ + __vcpu_sys_reg(vcpu, reg) = val; + + /* No EL1 counterpart? We're done here.? */ + if (reg == el1r) + return; + + if (!vcpu_el2_e2h_is_set(vcpu) && xlate) + val = xlate(val); + + /* Redirect this to the EL1 version of the register. */ + WARN_ON(!__vcpu_write_sys_reg_to_cpu(val, el1r)); + return; + } + + /* EL1 register can't be on the CPU if the guest is in vEL2. */ + if (unlikely(is_hyp_ctxt(vcpu))) + goto memory_write; + + if (__vcpu_write_sys_reg_to_cpu(val, reg)) return; - __vcpu_sys_reg(vcpu, reg) = val; +memory_write: + __vcpu_sys_reg(vcpu, reg) = val; } /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */ @@ -1505,8 +1631,6 @@ static bool access_id_reg(struct kvm_vcpu *vcpu, return write_to_read_only(vcpu, p, r); p->regval = read_id_reg(vcpu, r); - if (vcpu_has_nv(vcpu)) - access_nested_id_reg(vcpu, p, r); return true; } @@ -1885,6 +2009,32 @@ static unsigned int el2_visibility(const struct kvm_vcpu *vcpu, return REG_HIDDEN; } +static bool bad_vncr_trap(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + /* + * We really shouldn't be here, and this is likely the result + * of a misconfigured trap, as this register should target the + * VNCR page, and nothing else. + */ + return bad_trap(vcpu, p, r, + "trap of VNCR-backed register"); +} + +static bool bad_redir_trap(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + /* + * We really shouldn't be here, and this is likely the result + * of a misconfigured trap, as this register should target the + * corresponding EL1, and nothing else. + */ + return bad_trap(vcpu, p, r, + "trap of EL2 register redirected to EL1"); +} + #define EL2_REG(name, acc, rst, v) { \ SYS_DESC(SYS_##name), \ .access = acc, \ @@ -1894,6 +2044,9 @@ static unsigned int el2_visibility(const struct kvm_vcpu *vcpu, .val = v, \ } +#define EL2_REG_VNCR(name, rst, v) EL2_REG(name, bad_vncr_trap, rst, v) +#define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v) + /* * EL{0,1}2 registers are the EL2 view on an EL0 or EL1 register when * HCR_EL2.E2H==1, and only in the sysreg table for convenience of @@ -2508,32 +2661,33 @@ static const struct sys_reg_desc sys_reg_descs[] = { { PMU_SYS_REG(PMCCFILTR_EL0), .access = access_pmu_evtyper, .reset = reset_val, .reg = PMCCFILTR_EL0, .val = 0 }, - EL2_REG(VPIDR_EL2, access_rw, reset_unknown, 0), - EL2_REG(VMPIDR_EL2, access_rw, reset_unknown, 0), + EL2_REG_VNCR(VPIDR_EL2, reset_unknown, 0), + EL2_REG_VNCR(VMPIDR_EL2, reset_unknown, 0), EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1), EL2_REG(ACTLR_EL2, access_rw, reset_val, 0), - EL2_REG(HCR_EL2, access_rw, reset_val, 0), + EL2_REG_VNCR(HCR_EL2, reset_val, 0), EL2_REG(MDCR_EL2, access_rw, reset_val, 0), EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1), - EL2_REG(HSTR_EL2, access_rw, reset_val, 0), - EL2_REG(HFGRTR_EL2, access_rw, reset_val, 0), - EL2_REG(HFGWTR_EL2, access_rw, reset_val, 0), - EL2_REG(HFGITR_EL2, access_rw, reset_val, 0), - EL2_REG(HACR_EL2, access_rw, reset_val, 0), + EL2_REG_VNCR(HSTR_EL2, reset_val, 0), + EL2_REG_VNCR(HFGRTR_EL2, reset_val, 0), + EL2_REG_VNCR(HFGWTR_EL2, reset_val, 0), + EL2_REG_VNCR(HFGITR_EL2, reset_val, 0), + EL2_REG_VNCR(HACR_EL2, reset_val, 0), - EL2_REG(HCRX_EL2, access_rw, reset_val, 0), + EL2_REG_VNCR(HCRX_EL2, reset_val, 0), EL2_REG(TTBR0_EL2, access_rw, reset_val, 0), EL2_REG(TTBR1_EL2, access_rw, reset_val, 0), EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1), - EL2_REG(VTTBR_EL2, access_rw, reset_val, 0), - EL2_REG(VTCR_EL2, access_rw, reset_val, 0), + EL2_REG_VNCR(VTTBR_EL2, reset_val, 0), + EL2_REG_VNCR(VTCR_EL2, reset_val, 0), { SYS_DESC(SYS_DACR32_EL2), trap_undef, reset_unknown, DACR32_EL2 }, - EL2_REG(HDFGRTR_EL2, access_rw, reset_val, 0), - EL2_REG(HDFGWTR_EL2, access_rw, reset_val, 0), - EL2_REG(SPSR_EL2, access_rw, reset_val, 0), - EL2_REG(ELR_EL2, access_rw, reset_val, 0), + EL2_REG_VNCR(HDFGRTR_EL2, reset_val, 0), + EL2_REG_VNCR(HDFGWTR_EL2, reset_val, 0), + EL2_REG_VNCR(HAFGRTR_EL2, reset_val, 0), + EL2_REG_REDIR(SPSR_EL2, reset_val, 0), + EL2_REG_REDIR(ELR_EL2, reset_val, 0), { SYS_DESC(SYS_SP_EL1), access_sp_el1}, /* AArch32 SPSR_* are RES0 if trapped from a NV guest */ @@ -2549,10 +2703,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_IFSR32_EL2), trap_undef, reset_unknown, IFSR32_EL2 }, EL2_REG(AFSR0_EL2, access_rw, reset_val, 0), EL2_REG(AFSR1_EL2, access_rw, reset_val, 0), - EL2_REG(ESR_EL2, access_rw, reset_val, 0), + EL2_REG_REDIR(ESR_EL2, reset_val, 0), { SYS_DESC(SYS_FPEXC32_EL2), trap_undef, reset_val, FPEXC32_EL2, 0x700 }, - EL2_REG(FAR_EL2, access_rw, reset_val, 0), + EL2_REG_REDIR(FAR_EL2, reset_val, 0), EL2_REG(HPFAR_EL2, access_rw, reset_val, 0), EL2_REG(MAIR_EL2, access_rw, reset_val, 0), @@ -2565,24 +2719,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(CONTEXTIDR_EL2, access_rw, reset_val, 0), EL2_REG(TPIDR_EL2, access_rw, reset_val, 0), - EL2_REG(CNTVOFF_EL2, access_rw, reset_val, 0), + EL2_REG_VNCR(CNTVOFF_EL2, reset_val, 0), EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0), - EL12_REG(SCTLR, access_vm_reg, reset_val, 0x00C50078), - EL12_REG(CPACR, access_rw, reset_val, 0), - EL12_REG(TTBR0, access_vm_reg, reset_unknown, 0), - EL12_REG(TTBR1, access_vm_reg, reset_unknown, 0), - EL12_REG(TCR, access_vm_reg, reset_val, 0), - { SYS_DESC(SYS_SPSR_EL12), access_spsr}, - { SYS_DESC(SYS_ELR_EL12), access_elr}, - EL12_REG(AFSR0, access_vm_reg, reset_unknown, 0), - EL12_REG(AFSR1, access_vm_reg, reset_unknown, 0), - EL12_REG(ESR, access_vm_reg, reset_unknown, 0), - EL12_REG(FAR, access_vm_reg, reset_unknown, 0), - EL12_REG(MAIR, access_vm_reg, reset_unknown, 0), - EL12_REG(AMAIR, access_vm_reg, reset_amair_el1, 0), - EL12_REG(VBAR, access_rw, reset_val, 0), - EL12_REG(CONTEXTIDR, access_vm_reg, reset_val, 0), EL12_REG(CNTKCTL, access_rw, reset_val, 0), EL2_REG(SP_EL2, NULL, reset_unknown, 0), diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index c8c3cb812783..e949e1d0fd9f 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -368,7 +368,7 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm) vgic_v4_teardown(kvm); } -void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) +static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -379,29 +379,39 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) vgic_flush_pending_lpis(vcpu); INIT_LIST_HEAD(&vgic_cpu->ap_list_head); - vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + vgic_unregister_redist_iodev(vcpu); + vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; + } } -static void __kvm_vgic_destroy(struct kvm *kvm) +void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + mutex_lock(&kvm->slots_lock); + __kvm_vgic_vcpu_destroy(vcpu); + mutex_unlock(&kvm->slots_lock); +} + +void kvm_vgic_destroy(struct kvm *kvm) { struct kvm_vcpu *vcpu; unsigned long i; - lockdep_assert_held(&kvm->arch.config_lock); + mutex_lock(&kvm->slots_lock); vgic_debug_destroy(kvm); kvm_for_each_vcpu(i, vcpu, kvm) - kvm_vgic_vcpu_destroy(vcpu); + __kvm_vgic_vcpu_destroy(vcpu); + + mutex_lock(&kvm->arch.config_lock); kvm_vgic_dist_destroy(kvm); -} -void kvm_vgic_destroy(struct kvm *kvm) -{ - mutex_lock(&kvm->arch.config_lock); - __kvm_vgic_destroy(kvm); mutex_unlock(&kvm->arch.config_lock); + mutex_unlock(&kvm->slots_lock); } /** @@ -469,25 +479,26 @@ int kvm_vgic_map_resources(struct kvm *kvm) type = VGIC_V3; } - if (ret) { - __kvm_vgic_destroy(kvm); + if (ret) goto out; - } + dist->ready = true; dist_base = dist->vgic_dist_base; mutex_unlock(&kvm->arch.config_lock); ret = vgic_register_dist_iodev(kvm, dist_base, type); - if (ret) { + if (ret) kvm_err("Unable to register VGIC dist MMIO regions\n"); - kvm_vgic_destroy(kvm); - } - mutex_unlock(&kvm->slots_lock); - return ret; + goto out_slots; out: mutex_unlock(&kvm->arch.config_lock); +out_slots: mutex_unlock(&kvm->slots_lock); + + if (ret) + kvm_vgic_destroy(kvm); + return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 2dad2d095160..e2764d0ffa9f 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -590,7 +590,11 @@ static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db, unsigned long flags; raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + irq = __vgic_its_check_cache(dist, db, devid, eventid); + if (irq) + vgic_get_irq_kref(irq); + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); return irq; @@ -769,6 +773,7 @@ int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi) raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = true; vgic_queue_irq_unlock(kvm, irq, flags); + vgic_put_irq(kvm, irq); return 0; } diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 89117ba2528a..c15ee1df036a 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -357,31 +357,13 @@ static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { - u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - int i; - unsigned long flags; - - for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - - raw_spin_lock_irqsave(&irq->irq_lock, flags); - if (test_bit(i, &val)) { - /* - * pending_latch is set irrespective of irq type - * (level or edge) to avoid dependency that VM should - * restore irq config before pending info. - */ - irq->pending_latch = true; - vgic_queue_irq_unlock(vcpu->kvm, irq, flags); - } else { - irq->pending_latch = false; - raw_spin_unlock_irqrestore(&irq->irq_lock, flags); - } + int ret; - vgic_put_irq(vcpu->kvm, irq); - } + ret = vgic_uaccess_write_spending(vcpu, addr, len, val); + if (ret) + return ret; - return 0; + return vgic_uaccess_write_cpending(vcpu, addr, len, ~val); } /* We want to avoid outer shareable. */ @@ -820,7 +802,7 @@ out_unlock: return ret; } -static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) +void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) { struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; @@ -833,6 +815,8 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm) unsigned long c; int ret = 0; + lockdep_assert_held(&kvm->slots_lock); + kvm_for_each_vcpu(c, vcpu, kvm) { ret = vgic_register_redist_iodev(vcpu); if (ret) diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index ff558c05e990..cf76523a2194 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -301,9 +301,8 @@ static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq) vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2); } -void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) +static void __set_pending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, + unsigned long val, bool is_user) { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; @@ -312,14 +311,22 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - /* GICD_ISPENDR0 SGI bits are WI */ - if (is_vgic_v2_sgi(vcpu, irq)) { + /* GICD_ISPENDR0 SGI bits are WI when written from the guest. */ + if (is_vgic_v2_sgi(vcpu, irq) && !is_user) { vgic_put_irq(vcpu->kvm, irq); continue; } raw_spin_lock_irqsave(&irq->irq_lock, flags); + /* + * GICv2 SGIs are terribly broken. We can't restore + * the source of the interrupt, so just pick the vcpu + * itself as the source... + */ + if (is_vgic_v2_sgi(vcpu, irq)) + irq->source |= BIT(vcpu->vcpu_id); + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { /* HW SGI? Ask the GIC to inject it */ int err; @@ -335,7 +342,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, } irq->pending_latch = true; - if (irq->hw) + if (irq->hw && !is_user) vgic_irq_set_phys_active(irq, true); vgic_queue_irq_unlock(vcpu->kvm, irq, flags); @@ -343,33 +350,18 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, } } +void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + __set_pending(vcpu, addr, len, val, false); +} + int vgic_uaccess_write_spending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { - u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - int i; - unsigned long flags; - - for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - - raw_spin_lock_irqsave(&irq->irq_lock, flags); - irq->pending_latch = true; - - /* - * GICv2 SGIs are terribly broken. We can't restore - * the source of the interrupt, so just pick the vcpu - * itself as the source... - */ - if (is_vgic_v2_sgi(vcpu, irq)) - irq->source |= BIT(vcpu->vcpu_id); - - vgic_queue_irq_unlock(vcpu->kvm, irq, flags); - - vgic_put_irq(vcpu->kvm, irq); - } - + __set_pending(vcpu, addr, len, val, true); return 0; } @@ -394,9 +386,9 @@ static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq) vgic_irq_set_phys_active(irq, false); } -void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) +static void __clear_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val, bool is_user) { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; @@ -405,14 +397,22 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - /* GICD_ICPENDR0 SGI bits are WI */ - if (is_vgic_v2_sgi(vcpu, irq)) { + /* GICD_ICPENDR0 SGI bits are WI when written from the guest. */ + if (is_vgic_v2_sgi(vcpu, irq) && !is_user) { vgic_put_irq(vcpu->kvm, irq); continue; } raw_spin_lock_irqsave(&irq->irq_lock, flags); + /* + * More fun with GICv2 SGIs! If we're clearing one of them + * from userspace, which source vcpu to clear? Let's not + * even think of it, and blow the whole set. + */ + if (is_vgic_v2_sgi(vcpu, irq)) + irq->source = 0; + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { /* HW SGI? Ask the GIC to clear its pending bit */ int err; @@ -427,7 +427,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, continue; } - if (irq->hw) + if (irq->hw && !is_user) vgic_hw_irq_cpending(vcpu, irq); else irq->pending_latch = false; @@ -437,33 +437,18 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, } } +void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + __clear_pending(vcpu, addr, len, val, false); +} + int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { - u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - int i; - unsigned long flags; - - for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - - raw_spin_lock_irqsave(&irq->irq_lock, flags); - /* - * More fun with GICv2 SGIs! If we're clearing one of them - * from userspace, which source vcpu to clear? Let's not - * even think of it, and blow the whole set. - */ - if (is_vgic_v2_sgi(vcpu, irq)) - irq->source = 0; - - irq->pending_latch = false; - - raw_spin_unlock_irqrestore(&irq->irq_lock, flags); - - vgic_put_irq(vcpu->kvm, irq); - } - + __clear_pending(vcpu, addr, len, val, true); return 0; } diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index 339a55194b2c..74a67ad87f29 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -436,6 +436,10 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, if (ret) goto out; + /* Silently exit if the vLPI is already mapped */ + if (irq->hw) + goto out; + /* * Emit the mapping request. If it fails, the ITS probably * isn't v4 compatible, so let's silently bail out. Holding diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 0ab09b0d4440..8d134569d0a1 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -241,6 +241,7 @@ int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq); int vgic_v3_save_pending_tables(struct kvm *kvm); int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count); int vgic_register_redist_iodev(struct kvm_vcpu *vcpu); +void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu); bool vgic_v3_check_base(struct kvm *kvm); void vgic_v3_load(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 8e2017ba5f1b..924843f1f661 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -29,8 +29,8 @@ bool can_set_direct_map(void) * * KFENCE pool requires page-granular mapping if initialized late. */ - return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() || - arm64_kfence_can_set_direct_map(); + return rodata_full || debug_pagealloc_enabled() || + arm64_kfence_can_set_direct_map(); } static int change_page_range(pte_t *ptep, unsigned long addr, void *data) @@ -105,8 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages, * If we are manipulating read-only permissions, apply the same * change to the linear mapping of the pages that back this VM area. */ - if (rodata_enabled && - rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || + if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || pgprot_val(clear_mask) == PTE_RDONLY)) { for (i = 0; i < area->nr_pages; i++) { __change_memory_common((u64)page_address(area->pages[i]), diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index b98c38288a9d..919eceb0b3da 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -37,6 +37,7 @@ HAS_GIC_PRIO_MASKING HAS_GIC_PRIO_RELAXED_SYNC HAS_HCX HAS_LDAPR +HAS_LPA2 HAS_LSE_ATOMICS HAS_MOPS HAS_NESTED_VIRT diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 96cbeeab4eec..5ceaa1d3630e 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1002,6 +1002,27 @@ UnsignedEnum 3:0 BT EndEnum EndSysreg +Sysreg ID_AA64PFR2_EL1 3 0 0 4 2 +Res0 63:36 +UnsignedEnum 35:32 FPMR + 0b0000 NI + 0b0001 IMP +EndEnum +Res0 31:12 +UnsignedEnum 11:8 MTEFAR + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 7:4 MTESTOREONLY + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 3:0 MTEPERM + 0b0000 NI + 0b0001 IMP +EndEnum +EndSysreg + Sysreg ID_AA64ZFR0_EL1 3 0 0 4 4 Res0 63:60 UnsignedEnum 59:56 F64MM @@ -1058,7 +1079,11 @@ UnsignedEnum 63 FA64 0b0 NI 0b1 IMP EndEnum -Res0 62:60 +Res0 62:61 +UnsignedEnum 60 LUTv2 + 0b0 NI + 0b1 IMP +EndEnum UnsignedEnum 59:56 SMEver 0b0000 SME 0b0001 SME2 @@ -1086,7 +1111,14 @@ UnsignedEnum 42 F16F16 0b0 NI 0b1 IMP EndEnum -Res0 41:40 +UnsignedEnum 41 F8F16 + 0b0 NI + 0b1 IMP +EndEnum +UnsignedEnum 40 F8F32 + 0b0 NI + 0b1 IMP +EndEnum UnsignedEnum 39:36 I8I32 0b0000 NI 0b1111 IMP @@ -1107,7 +1139,49 @@ UnsignedEnum 32 F32F32 0b0 NI 0b1 IMP EndEnum -Res0 31:0 +Res0 31 +UnsignedEnum 30 SF8FMA + 0b0 NI + 0b1 IMP +EndEnum +UnsignedEnum 29 SF8DP4 + 0b0 NI + 0b1 IMP +EndEnum +UnsignedEnum 28 SF8DP2 + 0b0 NI + 0b1 IMP +EndEnum +Res0 27:0 +EndSysreg + +Sysreg ID_AA64FPFR0_EL1 3 0 0 4 7 +Res0 63:32 +UnsignedEnum 31 F8CVT + 0b0 NI + 0b1 IMP +EndEnum +UnsignedEnum 30 F8FMA + 0b0 NI + 0b1 IMP +EndEnum +UnsignedEnum 29 F8DP4 + 0b0 NI + 0b1 IMP +EndEnum +UnsignedEnum 28 F8DP2 + 0b0 NI + 0b1 IMP +EndEnum +Res0 27:2 +UnsignedEnum 1 F8E4M3 + 0b0 NI + 0b1 IMP +EndEnum +UnsignedEnum 0 F8E5M2 + 0b0 NI + 0b1 IMP +EndEnum EndSysreg Sysreg ID_AA64DFR0_EL1 3 0 0 5 0 @@ -1115,7 +1189,10 @@ Enum 63:60 HPMN0 0b0000 UNPREDICTABLE 0b0001 DEF EndEnum -Res0 59:56 +UnsignedEnum 59:56 ExtTrcBuff + 0b0000 NI + 0b0001 IMP +EndEnum UnsignedEnum 55:52 BRBE 0b0000 NI 0b0001 IMP @@ -1327,6 +1404,7 @@ UnsignedEnum 11:8 API 0b0011 PAuth2 0b0100 FPAC 0b0101 FPACCOMBINE + 0b0110 PAuth_LR EndEnum UnsignedEnum 7:4 APA 0b0000 NI @@ -1335,6 +1413,7 @@ UnsignedEnum 7:4 APA 0b0011 PAuth2 0b0100 FPAC 0b0101 FPACCOMBINE + 0b0110 PAuth_LR EndEnum UnsignedEnum 3:0 DPB 0b0000 NI @@ -1344,7 +1423,14 @@ EndEnum EndSysreg Sysreg ID_AA64ISAR2_EL1 3 0 0 6 2 -Res0 63:56 +UnsignedEnum 63:60 ATS1A + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 59:56 LUT + 0b0000 NI + 0b0001 IMP +EndEnum UnsignedEnum 55:52 CSSC 0b0000 NI 0b0001 IMP @@ -1353,7 +1439,19 @@ UnsignedEnum 51:48 RPRFM 0b0000 NI 0b0001 IMP EndEnum -Res0 47:32 +Res0 47:44 +UnsignedEnum 43:40 PRFMSLC + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 39:36 SYSINSTR_128 + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 35:32 SYSREG_128 + 0b0000 NI + 0b0001 IMP +EndEnum UnsignedEnum 31:28 CLRBHB 0b0000 NI 0b0001 IMP @@ -1377,6 +1475,7 @@ UnsignedEnum 15:12 APA3 0b0011 PAuth2 0b0100 FPAC 0b0101 FPACCOMBINE + 0b0110 PAuth_LR EndEnum UnsignedEnum 11:8 GPA3 0b0000 NI @@ -1392,6 +1491,23 @@ UnsignedEnum 3:0 WFxT EndEnum EndSysreg +Sysreg ID_AA64ISAR3_EL1 3 0 0 6 3 +Res0 63:12 +UnsignedEnum 11:8 TLBIW + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 7:4 FAMINMAX + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 3:0 CPA + 0b0000 NI + 0b0001 IMP + 0b0010 CPA2 +EndEnum +EndSysreg + Sysreg ID_AA64MMFR0_EL1 3 0 0 7 0 UnsignedEnum 63:60 ECV 0b0000 NI @@ -1680,7 +1796,8 @@ Field 63 TIDCP Field 62 SPINTMASK Field 61 NMI Field 60 EnTP2 -Res0 59:58 +Field 59 TCSO +Field 58 TCSO0 Field 57 EPAN Field 56 EnALS Field 55 EnAS0 @@ -1709,7 +1826,7 @@ EndEnum Field 37 ITFSB Field 36 BT1 Field 35 BT0 -Res0 34 +Field 34 EnFPM Field 33 MSCEn Field 32 CMOW Field 31 EnIA @@ -1747,7 +1864,8 @@ Field 0 M EndSysreg SysregFields CPACR_ELx -Res0 63:29 +Res0 63:30 +Field 29 E0POE Field 28 TTA Res0 27:26 Field 25:24 SMEN @@ -1790,6 +1908,41 @@ Sysreg SMCR_EL1 3 0 1 2 6 Fields SMCR_ELx EndSysreg +SysregFields GCSCR_ELx +Res0 63:10 +Field 9 STREn +Field 8 PUSHMEn +Res0 7 +Field 6 EXLOCKEN +Field 5 RVCHKEN +Res0 4:1 +Field 0 PCRSEL +EndSysregFields + +Sysreg GCSCR_EL1 3 0 2 5 0 +Fields GCSCR_ELx +EndSysreg + +SysregFields GCSPR_ELx +Field 63:3 PTR +Res0 2:0 +EndSysregFields + +Sysreg GCSPR_EL1 3 0 2 5 1 +Fields GCSPR_ELx +EndSysreg + +Sysreg GCSCRE0_EL1 3 0 2 5 2 +Res0 63:11 +Field 10 nTR +Field 9 STREn +Field 8 PUSHMEn +Res0 7:6 +Field 5 RVCHKEN +Res0 4:1 +Field 0 PCRSEL +EndSysreg + Sysreg ALLINT 3 0 4 3 0 Res0 63:14 Field 13 ALLINT @@ -1933,10 +2086,18 @@ Sysreg CONTEXTIDR_EL1 3 0 13 0 1 Fields CONTEXTIDR_ELx EndSysreg +Sysreg RCWSMASK_EL1 3 0 13 0 3 +Field 63:0 RCWSMASK +EndSysreg + Sysreg TPIDR_EL1 3 0 13 0 4 Field 63:0 ThreadID EndSysreg +Sysreg RCWMASK_EL1 3 0 13 0 6 +Field 63:0 RCWMASK +EndSysreg + Sysreg SCXTNUM_EL1 3 0 13 0 7 Field 63:0 SoftwareContextNumber EndSysreg @@ -2020,12 +2181,39 @@ Field 4 DZP Field 3:0 BS EndSysreg +Sysreg GCSPR_EL0 3 3 2 5 1 +Fields GCSPR_ELx +EndSysreg + Sysreg SVCR 3 3 4 2 2 Res0 63:2 Field 1 ZA Field 0 SM EndSysreg +Sysreg FPMR 3 3 4 4 2 +Res0 63:38 +Field 37:32 LSCALE2 +Field 31:24 NSCALE +Res0 23 +Field 22:16 LSCALE +Field 15 OSC +Field 14 OSM +Res0 13:9 +UnsignedEnum 8:6 F8D + 0b000 E5M2 + 0b001 E4M3 +EndEnum +UnsignedEnum 5:3 F8S2 + 0b000 E5M2 + 0b001 E4M3 +EndEnum +UnsignedEnum 2:0 F8S1 + 0b000 E5M2 + 0b001 E4M3 +EndEnum +EndSysreg + SysregFields HFGxTR_EL2 Field 63 nAMAIR2_EL1 Field 62 nMAIR2_EL1 @@ -2102,7 +2290,9 @@ Fields HFGxTR_EL2 EndSysreg Sysreg HFGITR_EL2 3 4 1 1 6 -Res0 63:61 +Res0 63 +Field 62 ATS1E1A +Res0 61 Field 60 COSPRCTX Field 59 nGCSEPP Field 58 nGCSSTR_EL1 @@ -2295,12 +2485,57 @@ Field 1 DBGBVRn_EL1 Field 0 DBGBCRn_EL1 EndSysreg +Sysreg HAFGRTR_EL2 3 4 3 1 6 +Res0 63:50 +Field 49 AMEVTYPER115_EL0 +Field 48 AMEVCNTR115_EL0 +Field 47 AMEVTYPER114_EL0 +Field 46 AMEVCNTR114_EL0 +Field 45 AMEVTYPER113_EL0 +Field 44 AMEVCNTR113_EL0 +Field 43 AMEVTYPER112_EL0 +Field 42 AMEVCNTR112_EL0 +Field 41 AMEVTYPER111_EL0 +Field 40 AMEVCNTR111_EL0 +Field 39 AMEVTYPER110_EL0 +Field 38 AMEVCNTR110_EL0 +Field 37 AMEVTYPER19_EL0 +Field 36 AMEVCNTR19_EL0 +Field 35 AMEVTYPER18_EL0 +Field 34 AMEVCNTR18_EL0 +Field 33 AMEVTYPER17_EL0 +Field 32 AMEVCNTR17_EL0 +Field 31 AMEVTYPER16_EL0 +Field 30 AMEVCNTR16_EL0 +Field 29 AMEVTYPER15_EL0 +Field 28 AMEVCNTR15_EL0 +Field 27 AMEVTYPER14_EL0 +Field 26 AMEVCNTR14_EL0 +Field 25 AMEVTYPER13_EL0 +Field 24 AMEVCNTR13_EL0 +Field 23 AMEVTYPER12_EL0 +Field 22 AMEVCNTR12_EL0 +Field 21 AMEVTYPER11_EL0 +Field 20 AMEVCNTR11_EL0 +Field 19 AMEVTYPER10_EL0 +Field 18 AMEVCNTR10_EL0 +Field 17 AMCNTEN1 +Res0 16:5 +Field 4 AMEVCNTR03_EL0 +Field 3 AMEVCNTR02_EL0 +Field 2 AMEVCNTR01_EL0 +Field 1 AMEVCNTR00_EL0 +Field 0 AMCNTEN0 +EndSysreg + Sysreg ZCR_EL2 3 4 1 2 0 Fields ZCR_ELx EndSysreg Sysreg HCRX_EL2 3 4 1 2 2 -Res0 63:23 +Res0 63:25 +Field 24 PACMEn +Field 23 EnFPM Field 22 GCSEn Field 21 EnIDCP128 Field 20 EnSDERR @@ -2348,6 +2583,14 @@ Sysreg SMCR_EL2 3 4 1 2 6 Fields SMCR_ELx EndSysreg +Sysreg GCSCR_EL2 3 4 2 5 0 +Fields GCSCR_ELx +EndSysreg + +Sysreg GCSPR_EL2 3 4 2 5 1 +Fields GCSPR_ELx +EndSysreg + Sysreg DACR32_EL2 3 4 3 0 0 Res0 63:32 Field 31:30 D15 @@ -2407,6 +2650,14 @@ Sysreg SMCR_EL12 3 5 1 2 6 Fields SMCR_ELx EndSysreg +Sysreg GCSCR_EL12 3 5 2 5 0 +Fields GCSCR_ELx +EndSysreg + +Sysreg GCSPR_EL12 3 5 2 5 1 +Fields GCSPR_ELx +EndSysreg + Sysreg FAR_EL12 3 5 6 0 0 Field 63:0 ADDR EndSysreg @@ -2471,6 +2722,33 @@ Field 1 PIE Field 0 PnCH EndSysreg +SysregFields MAIR2_ELx +Field 63:56 Attr7 +Field 55:48 Attr6 +Field 47:40 Attr5 +Field 39:32 Attr4 +Field 31:24 Attr3 +Field 23:16 Attr2 +Field 15:8 Attr1 +Field 7:0 Attr0 +EndSysregFields + +Sysreg MAIR2_EL1 3 0 10 2 1 +Fields MAIR2_ELx +EndSysreg + +Sysreg MAIR2_EL2 3 4 10 1 1 +Fields MAIR2_ELx +EndSysreg + +Sysreg AMAIR2_EL1 3 0 10 3 1 +Field 63:0 ImpDef +EndSysreg + +Sysreg AMAIR2_EL2 3 4 10 3 1 +Field 63:0 ImpDef +EndSysreg + SysregFields PIRx_ELx Field 63:60 Perm15 Field 59:56 Perm14 @@ -2510,6 +2788,26 @@ Sysreg PIR_EL2 3 4 10 2 3 Fields PIRx_ELx EndSysreg +Sysreg POR_EL0 3 3 10 2 4 +Fields PIRx_ELx +EndSysreg + +Sysreg POR_EL1 3 0 10 2 4 +Fields PIRx_ELx +EndSysreg + +Sysreg POR_EL12 3 5 10 2 4 +Fields PIRx_ELx +EndSysreg + +Sysreg S2POR_EL1 3 0 10 2 5 +Fields PIRx_ELx +EndSysreg + +Sysreg S2PIR_EL2 3 4 10 2 5 +Fields PIRx_ELx +EndSysreg + Sysreg LORSA_EL1 3 0 10 4 0 Res0 63:52 Field 51:16 SA diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 9eeb0c05f3f4..4ba8d67ddb09 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -68,6 +68,7 @@ LDFLAGS_vmlinux += -static -n -nostdlib ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS cflags-y += $(call cc-option,-mexplicit-relocs) KBUILD_CFLAGS_KERNEL += $(call cc-option,-mdirect-extern-access) +KBUILD_CFLAGS_KERNEL += $(call cc-option,-fdirect-access-external-data) KBUILD_AFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data) KBUILD_CFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data) KBUILD_AFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) @@ -82,7 +83,7 @@ endif ifeq ($(CONFIG_RELOCATABLE),y) KBUILD_CFLAGS_KERNEL += -fPIE -LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext +LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext $(call ld-option, --apply-dynamic-relocs) endif cflags-y += $(call cc-option, -mno-check-zero-division) @@ -142,6 +143,8 @@ vdso-install-y += arch/loongarch/vdso/vdso.so.dbg all: $(notdir $(KBUILD_IMAGE)) +vmlinuz.efi: vmlinux.efi + vmlinux.elf vmlinux.efi vmlinuz.efi: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(bootvars-y) $(boot)/$@ diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h index c9544f358c33..655db7d7a427 100644 --- a/arch/loongarch/include/asm/asmmacro.h +++ b/arch/loongarch/include/asm/asmmacro.h @@ -609,8 +609,7 @@ lu32i.d \reg, 0 lu52i.d \reg, \reg, 0 .pushsection ".la_abs", "aw", %progbits - 768: - .dword 768b-766b + .dword 766b .dword \sym .popsection #endif diff --git a/arch/loongarch/include/asm/efi.h b/arch/loongarch/include/asm/efi.h index 091897d40b03..91d81f9730ab 100644 --- a/arch/loongarch/include/asm/efi.h +++ b/arch/loongarch/include/asm/efi.h @@ -32,6 +32,6 @@ static inline unsigned long efi_get_kimg_min_align(void) #define EFI_KIMG_PREFERRED_ADDRESS PHYSADDR(VMLINUX_LOAD_ADDRESS) -unsigned long kernel_entry_address(void); +unsigned long kernel_entry_address(unsigned long kernel_addr); #endif /* _ASM_LOONGARCH_EFI_H */ diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h index b9a4ab54285c..9b16a3b8e706 100644 --- a/arch/loongarch/include/asm/elf.h +++ b/arch/loongarch/include/asm/elf.h @@ -293,7 +293,7 @@ extern const char *__elf_platform; #define ELF_PLAT_INIT(_r, load_addr) do { \ _r->regs[1] = _r->regs[2] = _r->regs[3] = _r->regs[4] = 0; \ _r->regs[5] = _r->regs[6] = _r->regs[7] = _r->regs[8] = 0; \ - _r->regs[9] = _r->regs[10] = _r->regs[11] = _r->regs[12] = 0; \ + _r->regs[9] = _r->regs[10] /* syscall n */ = _r->regs[12] = 0; \ _r->regs[13] = _r->regs[14] = _r->regs[15] = _r->regs[16] = 0; \ _r->regs[17] = _r->regs[18] = _r->regs[19] = _r->regs[20] = 0; \ _r->regs[21] = _r->regs[22] = _r->regs[23] = _r->regs[24] = 0; \ diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index b108301c2e5a..2d62f7b0d377 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -45,7 +45,10 @@ struct kvm_vcpu_stat { u64 signal_exits; }; +#define KVM_MEM_HUGEPAGE_CAPABLE (1UL << 0) +#define KVM_MEM_HUGEPAGE_INCAPABLE (1UL << 1) struct kvm_arch_memory_slot { + unsigned long flags; }; struct kvm_context { @@ -92,8 +95,10 @@ enum emulation_result { }; #define KVM_LARCH_FPU (0x1 << 0) -#define KVM_LARCH_SWCSR_LATEST (0x1 << 1) -#define KVM_LARCH_HWCSR_USABLE (0x1 << 2) +#define KVM_LARCH_LSX (0x1 << 1) +#define KVM_LARCH_LASX (0x1 << 2) +#define KVM_LARCH_SWCSR_LATEST (0x1 << 3) +#define KVM_LARCH_HWCSR_USABLE (0x1 << 4) struct kvm_vcpu_arch { /* @@ -175,6 +180,21 @@ static inline void writel_sw_gcsr(struct loongarch_csrs *csr, int reg, unsigned csr->csrs[reg] = val; } +static inline bool kvm_guest_has_fpu(struct kvm_vcpu_arch *arch) +{ + return arch->cpucfg[2] & CPUCFG2_FP; +} + +static inline bool kvm_guest_has_lsx(struct kvm_vcpu_arch *arch) +{ + return arch->cpucfg[2] & CPUCFG2_LSX; +} + +static inline bool kvm_guest_has_lasx(struct kvm_vcpu_arch *arch) +{ + return arch->cpucfg[2] & CPUCFG2_LASX; +} + /* Debug: dump vcpu state */ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu); diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h index 553cfa2b2b1c..e71ceb88f29e 100644 --- a/arch/loongarch/include/asm/kvm_vcpu.h +++ b/arch/loongarch/include/asm/kvm_vcpu.h @@ -55,7 +55,26 @@ void kvm_save_fpu(struct loongarch_fpu *fpu); void kvm_restore_fpu(struct loongarch_fpu *fpu); void kvm_restore_fcsr(struct loongarch_fpu *fpu); -void kvm_acquire_timer(struct kvm_vcpu *vcpu); +#ifdef CONFIG_CPU_HAS_LSX +int kvm_own_lsx(struct kvm_vcpu *vcpu); +void kvm_save_lsx(struct loongarch_fpu *fpu); +void kvm_restore_lsx(struct loongarch_fpu *fpu); +#else +static inline int kvm_own_lsx(struct kvm_vcpu *vcpu) { } +static inline void kvm_save_lsx(struct loongarch_fpu *fpu) { } +static inline void kvm_restore_lsx(struct loongarch_fpu *fpu) { } +#endif + +#ifdef CONFIG_CPU_HAS_LASX +int kvm_own_lasx(struct kvm_vcpu *vcpu); +void kvm_save_lasx(struct loongarch_fpu *fpu); +void kvm_restore_lasx(struct loongarch_fpu *fpu); +#else +static inline int kvm_own_lasx(struct kvm_vcpu *vcpu) { } +static inline void kvm_save_lasx(struct loongarch_fpu *fpu) { } +static inline void kvm_restore_lasx(struct loongarch_fpu *fpu) { } +#endif + void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long hz); void kvm_reset_timer(struct kvm_vcpu *vcpu); void kvm_save_timer(struct kvm_vcpu *vcpu); diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 9b4957cefa8a..46366e783c84 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -1098,12 +1098,11 @@ static __always_inline u64 drdtime(void) { - int rID = 0; u64 val = 0; __asm__ __volatile__( - "rdtime.d %0, %1 \n\t" - : "=r"(val), "=r"(rID) + "rdtime.d %0, $zero\n\t" + : "=r"(val) : ); return val; diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index ed5da02b1cf6..9b36ac003f89 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -40,13 +40,13 @@ static __always_inline unsigned long __percpu_##op(void *ptr, \ switch (size) { \ case 4: \ __asm__ __volatile__( \ - "am"#asm_op".w" " %[ret], %[val], %[ptr] \n" \ + "am"#asm_op".w" " %[ret], %[val], %[ptr] \n" \ : [ret] "=&r" (ret), [ptr] "+ZB"(*(u32 *)ptr) \ : [val] "r" (val)); \ break; \ case 8: \ __asm__ __volatile__( \ - "am"#asm_op".d" " %[ret], %[val], %[ptr] \n" \ + "am"#asm_op".d" " %[ret], %[val], %[ptr] \n" \ : [ret] "=&r" (ret), [ptr] "+ZB"(*(u64 *)ptr) \ : [val] "r" (val)); \ break; \ @@ -63,7 +63,7 @@ PERCPU_OP(and, and, &) PERCPU_OP(or, or, |) #undef PERCPU_OP -static __always_inline unsigned long __percpu_read(void *ptr, int size) +static __always_inline unsigned long __percpu_read(void __percpu *ptr, int size) { unsigned long ret; @@ -100,7 +100,7 @@ static __always_inline unsigned long __percpu_read(void *ptr, int size) return ret; } -static __always_inline void __percpu_write(void *ptr, unsigned long val, int size) +static __always_inline void __percpu_write(void __percpu *ptr, unsigned long val, int size) { switch (size) { case 1: @@ -132,8 +132,7 @@ static __always_inline void __percpu_write(void *ptr, unsigned long val, int siz } } -static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, - int size) +static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, int size) { switch (size) { case 1: diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h index a0bc159ce8bd..ee52fb1e9963 100644 --- a/arch/loongarch/include/asm/setup.h +++ b/arch/loongarch/include/asm/setup.h @@ -25,7 +25,7 @@ extern void set_merr_handler(unsigned long offset, void *addr, unsigned long len #ifdef CONFIG_RELOCATABLE struct rela_la_abs { - long offset; + long pc; long symvalue; }; diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index c6ad2ee6106c..923d0bd38294 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -79,6 +79,7 @@ struct kvm_fpu { #define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) #define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG) #define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG) +#define KVM_LOONGARCH_VCPU_CPUCFG 0 struct kvm_debug_exit_arch { }; diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 4fcc168f0732..3c808c680370 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -57,7 +57,7 @@ obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o obj-$(CONFIG_RELOCATABLE) += relocate.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index d53ab10f4644..4382e36ae3d4 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -349,6 +349,7 @@ SYM_FUNC_START(_restore_lsx_upper) lsx_restore_all_upper a0 t0 t1 jr ra SYM_FUNC_END(_restore_lsx_upper) +EXPORT_SYMBOL(_restore_lsx_upper) SYM_FUNC_START(_init_lsx_upper) lsx_init_all_upper t1 @@ -384,6 +385,7 @@ SYM_FUNC_START(_restore_lasx_upper) lasx_restore_all_upper a0 t0 t1 jr ra SYM_FUNC_END(_restore_lasx_upper) +EXPORT_SYMBOL(_restore_lasx_upper) SYM_FUNC_START(_init_lasx_upper) lasx_init_all_upper t1 diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c index 6c3eff9af9fb..1acfa704c8d0 100644 --- a/arch/loongarch/kernel/relocate.c +++ b/arch/loongarch/kernel/relocate.c @@ -52,7 +52,7 @@ static inline void __init relocate_absolute(long random_offset) for (p = begin; (void *)p < end; p++) { long v = p->symvalue; uint32_t lu12iw, ori, lu32id, lu52id; - union loongarch_instruction *insn = (void *)p - p->offset; + union loongarch_instruction *insn = (void *)p->pc; lu12iw = (v >> 12) & 0xfffff; ori = v & 0xfff; @@ -102,6 +102,14 @@ static inline __init unsigned long get_random_boot(void) return hash; } +static int __init nokaslr(char *p) +{ + pr_info("KASLR is disabled.\n"); + + return 0; /* Print a notice and silence the boot warning */ +} +early_param("nokaslr", nokaslr); + static inline __init bool kaslr_disabled(void) { char *str; diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c index 92270f14db94..f623feb2129f 100644 --- a/arch/loongarch/kernel/stacktrace.c +++ b/arch/loongarch/kernel/stacktrace.c @@ -32,7 +32,7 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, } for (unwind_start(&state, task, regs); - !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) { + !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); if (!addr || !consume_entry(cookie, addr)) break; diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index 3064af94db9c..e7015f7b70e3 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -58,14 +58,16 @@ static int constant_set_state_oneshot(struct clock_event_device *evt) return 0; } -static int constant_set_state_oneshot_stopped(struct clock_event_device *evt) +static int constant_set_state_periodic(struct clock_event_device *evt) { + unsigned long period; unsigned long timer_config; raw_spin_lock(&state_lock); - timer_config = csr_read64(LOONGARCH_CSR_TCFG); - timer_config &= ~CSR_TCFG_EN; + period = const_clock_freq / HZ; + timer_config = period & CSR_TCFG_VAL; + timer_config |= (CSR_TCFG_PERIOD | CSR_TCFG_EN); csr_write64(timer_config, LOONGARCH_CSR_TCFG); raw_spin_unlock(&state_lock); @@ -73,16 +75,14 @@ static int constant_set_state_oneshot_stopped(struct clock_event_device *evt) return 0; } -static int constant_set_state_periodic(struct clock_event_device *evt) +static int constant_set_state_shutdown(struct clock_event_device *evt) { - unsigned long period; unsigned long timer_config; raw_spin_lock(&state_lock); - period = const_clock_freq / HZ; - timer_config = period & CSR_TCFG_VAL; - timer_config |= (CSR_TCFG_PERIOD | CSR_TCFG_EN); + timer_config = csr_read64(LOONGARCH_CSR_TCFG); + timer_config &= ~CSR_TCFG_EN; csr_write64(timer_config, LOONGARCH_CSR_TCFG); raw_spin_unlock(&state_lock); @@ -90,11 +90,6 @@ static int constant_set_state_periodic(struct clock_event_device *evt) return 0; } -static int constant_set_state_shutdown(struct clock_event_device *evt) -{ - return 0; -} - static int constant_timer_next_event(unsigned long delta, struct clock_event_device *evt) { unsigned long timer_config; @@ -161,7 +156,7 @@ int constant_clockevent_init(void) cd->rating = 320; cd->cpumask = cpumask_of(cpu); cd->set_state_oneshot = constant_set_state_oneshot; - cd->set_state_oneshot_stopped = constant_set_state_oneshot_stopped; + cd->set_state_oneshot_stopped = constant_set_state_shutdown; cd->set_state_periodic = constant_set_state_periodic; cd->set_state_shutdown = constant_set_state_shutdown; cd->set_next_event = constant_timer_next_event; diff --git a/arch/loongarch/kernel/unwind.c b/arch/loongarch/kernel/unwind.c index ba324ba76fa1..a463d6961344 100644 --- a/arch/loongarch/kernel/unwind.c +++ b/arch/loongarch/kernel/unwind.c @@ -28,6 +28,5 @@ bool default_next_frame(struct unwind_state *state) } while (!get_stack_info(state->sp, state->task, info)); - state->error = true; return false; } diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index 55afc27320e1..929ae240280a 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -227,7 +227,7 @@ static bool next_frame(struct unwind_state *state) } while (!get_stack_info(state->sp, state->task, info)); out: - state->error = true; + state->stack_info.type = STACK_TYPE_UNKNOWN; return false; } diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig index f22bae89b07d..61f7e33b1f95 100644 --- a/arch/loongarch/kvm/Kconfig +++ b/arch/loongarch/kvm/Kconfig @@ -22,14 +22,13 @@ config KVM depends on AS_HAS_LVZ_EXTENSION depends on HAVE_KVM select HAVE_KVM_DIRTY_RING_ACQ_REL - select HAVE_KVM_EVENTFD select HAVE_KVM_VCPU_ASYNC_IOCTL + select KVM_COMMON select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_GENERIC_HARDWARE_ENABLING select KVM_GENERIC_MMU_NOTIFIER select KVM_MMIO select KVM_XFER_TO_GUEST_WORK - select PREEMPT_NOTIFIERS help Support hosting virtualized guest machines using hardware virtualization extensions. You will need diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index ce8de3fa472c..ed1d89d53e2e 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -200,17 +200,8 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu) ++vcpu->stat.idle_exits; trace_kvm_exit_idle(vcpu, KVM_TRACE_EXIT_IDLE); - if (!kvm_arch_vcpu_runnable(vcpu)) { - /* - * Switch to the software timer before halt-polling/blocking as - * the guest's timer may be a break event for the vCPU, and the - * hypervisor timer runs only when the CPU is in guest mode. - * Switch before halt-polling so that KVM recognizes an expired - * timer before blocking. - */ - kvm_save_timer(vcpu); - kvm_vcpu_block(vcpu); - } + if (!kvm_arch_vcpu_runnable(vcpu)) + kvm_vcpu_halt(vcpu); return EMULATE_DONE; } @@ -643,6 +634,11 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; + if (!kvm_guest_has_fpu(&vcpu->arch)) { + kvm_queue_exception(vcpu, EXCCODE_INE, 0); + return RESUME_GUEST; + } + /* * If guest FPU not present, the FPU operation should have been * treated as a reserved instruction! @@ -660,6 +656,36 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu) } /* + * kvm_handle_lsx_disabled() - Guest used LSX while disabled in root. + * @vcpu: Virtual CPU context. + * + * Handle when the guest attempts to use LSX when it is disabled in the root + * context. + */ +static int kvm_handle_lsx_disabled(struct kvm_vcpu *vcpu) +{ + if (kvm_own_lsx(vcpu)) + kvm_queue_exception(vcpu, EXCCODE_INE, 0); + + return RESUME_GUEST; +} + +/* + * kvm_handle_lasx_disabled() - Guest used LASX while disabled in root. + * @vcpu: Virtual CPU context. + * + * Handle when the guest attempts to use LASX when it is disabled in the root + * context. + */ +static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu) +{ + if (kvm_own_lasx(vcpu)) + kvm_queue_exception(vcpu, EXCCODE_INE, 0); + + return RESUME_GUEST; +} + +/* * LoongArch KVM callback handling for unimplemented guest exiting */ static int kvm_fault_ni(struct kvm_vcpu *vcpu) @@ -687,6 +713,8 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = { [EXCCODE_TLBS] = kvm_handle_write_fault, [EXCCODE_TLBM] = kvm_handle_write_fault, [EXCCODE_FPDIS] = kvm_handle_fpu_disabled, + [EXCCODE_LSXDIS] = kvm_handle_lsx_disabled, + [EXCCODE_LASXDIS] = kvm_handle_lasx_disabled, [EXCCODE_GSPR] = kvm_handle_gspr, }; diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index 1c1d5199500e..86a2f2d0cb27 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -287,7 +287,6 @@ int kvm_arch_hardware_enable(void) if (env & CSR_GCFG_MATC_ROOT) gcfg |= CSR_GCFG_MATC_ROOT; - gcfg |= CSR_GCFG_TIT; write_csr_gcfg(gcfg); kvm_flush_tlb_all(); diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 80480df5f550..915f17527893 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -13,6 +13,16 @@ #include <asm/tlb.h> #include <asm/kvm_mmu.h> +static inline bool kvm_hugepage_capable(struct kvm_memory_slot *slot) +{ + return slot->arch.flags & KVM_MEM_HUGEPAGE_CAPABLE; +} + +static inline bool kvm_hugepage_incapable(struct kvm_memory_slot *slot) +{ + return slot->arch.flags & KVM_MEM_HUGEPAGE_INCAPABLE; +} + static inline void kvm_ptw_prepare(struct kvm *kvm, kvm_ptw_ctx *ctx) { ctx->level = kvm->arch.root_level; @@ -365,6 +375,69 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_ptw_top(kvm->arch.pgd, start << PAGE_SHIFT, end << PAGE_SHIFT, &ctx); } +int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old, + struct kvm_memory_slot *new, enum kvm_mr_change change) +{ + gpa_t gpa_start; + hva_t hva_start; + size_t size, gpa_offset, hva_offset; + + if ((change != KVM_MR_MOVE) && (change != KVM_MR_CREATE)) + return 0; + /* + * Prevent userspace from creating a memory region outside of the + * VM GPA address space + */ + if ((new->base_gfn + new->npages) > (kvm->arch.gpa_size >> PAGE_SHIFT)) + return -ENOMEM; + + new->arch.flags = 0; + size = new->npages * PAGE_SIZE; + gpa_start = new->base_gfn << PAGE_SHIFT; + hva_start = new->userspace_addr; + if (IS_ALIGNED(size, PMD_SIZE) && IS_ALIGNED(gpa_start, PMD_SIZE) + && IS_ALIGNED(hva_start, PMD_SIZE)) + new->arch.flags |= KVM_MEM_HUGEPAGE_CAPABLE; + else { + /* + * Pages belonging to memslots that don't have the same + * alignment within a PMD for userspace and GPA cannot be + * mapped with PMD entries, because we'll end up mapping + * the wrong pages. + * + * Consider a layout like the following: + * + * memslot->userspace_addr: + * +-----+--------------------+--------------------+---+ + * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz| + * +-----+--------------------+--------------------+---+ + * + * memslot->base_gfn << PAGE_SIZE: + * +---+--------------------+--------------------+-----+ + * |abc|def Stage-2 block | Stage-2 block |tvxyz| + * +---+--------------------+--------------------+-----+ + * + * If we create those stage-2 blocks, we'll end up with this + * incorrect mapping: + * d -> f + * e -> g + * f -> h + */ + gpa_offset = gpa_start & (PMD_SIZE - 1); + hva_offset = hva_start & (PMD_SIZE - 1); + if (gpa_offset != hva_offset) { + new->arch.flags |= KVM_MEM_HUGEPAGE_INCAPABLE; + } else { + if (gpa_offset == 0) + gpa_offset = PMD_SIZE; + if ((size + gpa_offset) < (PMD_SIZE * 2)) + new->arch.flags |= KVM_MEM_HUGEPAGE_INCAPABLE; + } + } + + return 0; +} + void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_memory_slot *old, const struct kvm_memory_slot *new, @@ -562,47 +635,23 @@ out: } static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot, - unsigned long hva, unsigned long map_size, bool write) + unsigned long hva, bool write) { - size_t size; - gpa_t gpa_start; - hva_t uaddr_start, uaddr_end; + hva_t start, end; /* Disable dirty logging on HugePages */ if (kvm_slot_dirty_track_enabled(memslot) && write) return false; - size = memslot->npages * PAGE_SIZE; - gpa_start = memslot->base_gfn << PAGE_SHIFT; - uaddr_start = memslot->userspace_addr; - uaddr_end = uaddr_start + size; + if (kvm_hugepage_capable(memslot)) + return true; - /* - * Pages belonging to memslots that don't have the same alignment - * within a PMD for userspace and GPA cannot be mapped with stage-2 - * PMD entries, because we'll end up mapping the wrong pages. - * - * Consider a layout like the following: - * - * memslot->userspace_addr: - * +-----+--------------------+--------------------+---+ - * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz| - * +-----+--------------------+--------------------+---+ - * - * memslot->base_gfn << PAGE_SIZE: - * +---+--------------------+--------------------+-----+ - * |abc|def Stage-2 block | Stage-2 block |tvxyz| - * +---+--------------------+--------------------+-----+ - * - * If we create those stage-2 blocks, we'll end up with this incorrect - * mapping: - * d -> f - * e -> g - * f -> h - */ - if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1))) + if (kvm_hugepage_incapable(memslot)) return false; + start = memslot->userspace_addr; + end = start + memslot->npages * PAGE_SIZE; + /* * Next, let's make sure we're not trying to map anything not covered * by the memslot. This means we have to prohibit block size mappings @@ -615,8 +664,7 @@ static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot, * userspace_addr or the base_gfn, as both are equally aligned (per * the check above) and equally sized. */ - return (hva & ~(map_size - 1)) >= uaddr_start && - (hva & ~(map_size - 1)) + map_size <= uaddr_end; + return (hva >= ALIGN(start, PMD_SIZE)) && (hva < ALIGN_DOWN(end, PMD_SIZE)); } /* @@ -842,7 +890,7 @@ retry: /* Disable dirty logging on HugePages */ level = 0; - if (!fault_supports_huge_mapping(memslot, hva, PMD_SIZE, write)) { + if (!fault_supports_huge_mapping(memslot, hva, write)) { level = 0; } else { level = host_pfn_mapping_level(kvm, gfn, memslot); @@ -901,12 +949,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) { } -int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old, - struct kvm_memory_slot *new, enum kvm_mr_change change) -{ - return 0; -} - void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, const struct kvm_memory_slot *memslot) { diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S index 0ed9040307b7..ba976509bfe8 100644 --- a/arch/loongarch/kvm/switch.S +++ b/arch/loongarch/kvm/switch.S @@ -245,6 +245,37 @@ SYM_FUNC_START(kvm_restore_fpu) jr ra SYM_FUNC_END(kvm_restore_fpu) +#ifdef CONFIG_CPU_HAS_LSX +SYM_FUNC_START(kvm_save_lsx) + fpu_save_csr a0 t1 + fpu_save_cc a0 t1 t2 + lsx_save_data a0 t1 + jr ra +SYM_FUNC_END(kvm_save_lsx) + +SYM_FUNC_START(kvm_restore_lsx) + lsx_restore_data a0 t1 + fpu_restore_cc a0 t1 t2 + fpu_restore_csr a0 t1 t2 + jr ra +SYM_FUNC_END(kvm_restore_lsx) +#endif + +#ifdef CONFIG_CPU_HAS_LASX +SYM_FUNC_START(kvm_save_lasx) + fpu_save_csr a0 t1 + fpu_save_cc a0 t1 t2 + lasx_save_data a0 t1 + jr ra +SYM_FUNC_END(kvm_save_lasx) + +SYM_FUNC_START(kvm_restore_lasx) + lasx_restore_data a0 t1 + fpu_restore_cc a0 t1 t2 + fpu_restore_csr a0 t1 t2 + jr ra +SYM_FUNC_END(kvm_restore_lasx) +#endif .section ".rodata" SYM_DATA(kvm_exception_size, .quad kvm_exc_entry_end - kvm_exc_entry) SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest) diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index 284bf553fefe..111328f60872 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -65,40 +65,23 @@ void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long timer_hz) } /* - * Restore hard timer state and enable guest to access timer registers - * without trap, should be called with irq disabled - */ -void kvm_acquire_timer(struct kvm_vcpu *vcpu) -{ - unsigned long cfg; - - cfg = read_csr_gcfg(); - if (!(cfg & CSR_GCFG_TIT)) - return; - - /* Enable guest access to hard timer */ - write_csr_gcfg(cfg & ~CSR_GCFG_TIT); - - /* - * Freeze the soft-timer and sync the guest stable timer with it. We do - * this with interrupts disabled to avoid latency. - */ - hrtimer_cancel(&vcpu->arch.swtimer); -} - -/* * Restore soft timer state from saved context. */ void kvm_restore_timer(struct kvm_vcpu *vcpu) { - unsigned long cfg, delta, period; + unsigned long cfg, estat; + unsigned long ticks, delta, period; ktime_t expire, now; struct loongarch_csrs *csr = vcpu->arch.csr; /* * Set guest stable timer cfg csr + * Disable timer before restore estat CSR register, avoid to + * get invalid timer interrupt for old timer cfg */ cfg = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG); + + write_gcsr_timercfg(0); kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_ESTAT); kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TCFG); if (!(cfg & CSR_TCFG_EN)) { @@ -108,23 +91,55 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu) } /* + * Freeze the soft-timer and sync the guest stable timer with it. + */ + hrtimer_cancel(&vcpu->arch.swtimer); + + /* + * From LoongArch Reference Manual Volume 1 Chapter 7.6.2 + * If oneshot timer is fired, CSR TVAL will be -1, there are two + * conditions: + * 1) timer is fired during exiting to host + * 2) timer is fired and vm is doing timer irq, and then exiting to + * host. Host should not inject timer irq to avoid spurious + * timer interrupt again + */ + ticks = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TVAL); + estat = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT); + if (!(cfg & CSR_TCFG_PERIOD) && (ticks > cfg)) { + /* + * Writing 0 to LOONGARCH_CSR_TVAL will inject timer irq + * and set CSR TVAL with -1 + */ + write_gcsr_timertick(0); + + /* + * Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear + * timer interrupt, and CSR TVAL keeps unchanged with -1, it + * avoids spurious timer interrupt + */ + if (!(estat & CPU_TIMER)) + gcsr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR); + return; + } + + /* * Set remainder tick value if not expired */ + delta = 0; now = ktime_get(); expire = vcpu->arch.expire; if (ktime_before(now, expire)) delta = ktime_to_tick(vcpu, ktime_sub(expire, now)); - else { - if (cfg & CSR_TCFG_PERIOD) { - period = cfg & CSR_TCFG_VAL; - delta = ktime_to_tick(vcpu, ktime_sub(now, expire)); - delta = period - (delta % period); - } else - delta = 0; + else if (cfg & CSR_TCFG_PERIOD) { + period = cfg & CSR_TCFG_VAL; + delta = ktime_to_tick(vcpu, ktime_sub(now, expire)); + delta = period - (delta % period); + /* * Inject timer here though sw timer should inject timer * interrupt async already, since sw timer may be cancelled - * during injecting intr async in function kvm_acquire_timer + * during injecting intr async */ kvm_queue_irq(vcpu, INT_TI); } @@ -139,27 +154,41 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu) */ static void _kvm_save_timer(struct kvm_vcpu *vcpu) { - unsigned long ticks, delta; + unsigned long ticks, delta, cfg; ktime_t expire; struct loongarch_csrs *csr = vcpu->arch.csr; + cfg = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG); ticks = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TVAL); - delta = tick_to_ns(vcpu, ticks); - expire = ktime_add_ns(ktime_get(), delta); - vcpu->arch.expire = expire; - if (ticks) { + + /* + * From LoongArch Reference Manual Volume 1 Chapter 7.6.2 + * If period timer is fired, CSR TVAL will be reloaded from CSR TCFG + * If oneshot timer is fired, CSR TVAL will be -1 + * Here judge one-shot timer fired by checking whether TVAL is larger + * than TCFG + */ + if (ticks < cfg) { + delta = tick_to_ns(vcpu, ticks); + expire = ktime_add_ns(ktime_get(), delta); + vcpu->arch.expire = expire; + /* - * Update hrtimer to use new timeout * HRTIMER_MODE_PINNED is suggested since vcpu may run in * the same physical cpu in next time */ - hrtimer_cancel(&vcpu->arch.swtimer); hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED); - } else + } else if (vcpu->stat.generic.blocking) { /* - * Inject timer interrupt so that hall polling can dectect and exit + * Inject timer interrupt so that halt polling can dectect and exit. + * VCPU is scheduled out already and sleeps in rcuwait queue and + * will not poll pending events again. kvm_queue_irq() is not enough, + * hrtimer swtimer should be used here. */ - kvm_queue_irq(vcpu, INT_TI); + expire = ktime_add_ns(ktime_get(), 10); + vcpu->arch.expire = expire; + hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED); + } } /* @@ -168,21 +197,15 @@ static void _kvm_save_timer(struct kvm_vcpu *vcpu) */ void kvm_save_timer(struct kvm_vcpu *vcpu) { - unsigned long cfg; struct loongarch_csrs *csr = vcpu->arch.csr; preempt_disable(); - cfg = read_csr_gcfg(); - if (!(cfg & CSR_GCFG_TIT)) { - /* Disable guest use of hard timer */ - write_csr_gcfg(cfg | CSR_GCFG_TIT); - - /* Save hard timer state */ - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TCFG); - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TVAL); - if (kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG) & CSR_TCFG_EN) - _kvm_save_timer(vcpu); - } + + /* Save hard timer state */ + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TCFG); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TVAL); + if (kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG) & CSR_TCFG_EN) + _kvm_save_timer(vcpu); /* Save timer-related state to vCPU context */ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_ESTAT); diff --git a/arch/loongarch/kvm/trace.h b/arch/loongarch/kvm/trace.h index a1e35d655418..c2484ad4cffa 100644 --- a/arch/loongarch/kvm/trace.h +++ b/arch/loongarch/kvm/trace.h @@ -102,6 +102,8 @@ TRACE_EVENT(kvm_exit_gspr, #define KVM_TRACE_AUX_DISCARD 4 #define KVM_TRACE_AUX_FPU 1 +#define KVM_TRACE_AUX_LSX 2 +#define KVM_TRACE_AUX_LASX 3 #define kvm_trace_symbol_aux_op \ { KVM_TRACE_AUX_SAVE, "save" }, \ @@ -111,7 +113,9 @@ TRACE_EVENT(kvm_exit_gspr, { KVM_TRACE_AUX_DISCARD, "discard" } #define kvm_trace_symbol_aux_state \ - { KVM_TRACE_AUX_FPU, "FPU" } + { KVM_TRACE_AUX_FPU, "FPU" }, \ + { KVM_TRACE_AUX_LSX, "LSX" }, \ + { KVM_TRACE_AUX_LASX, "LASX" } TRACE_EVENT(kvm_aux, TP_PROTO(struct kvm_vcpu *vcpu, unsigned int op, diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 73d0c2b9c1a5..27701991886d 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -95,7 +95,6 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu) * check vmid before vcpu enter guest */ local_irq_disable(); - kvm_acquire_timer(vcpu); kvm_deliver_intr(vcpu); kvm_deliver_exception(vcpu); /* Make sure the vcpu mode has been written */ @@ -187,8 +186,15 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - return kvm_pending_timer(vcpu) || + int ret; + + /* Protect from TOD sync and vcpu_load/put() */ + preempt_disable(); + ret = kvm_pending_timer(vcpu) || kvm_read_hw_gcsr(LOONGARCH_CSR_ESTAT) & (1 << INT_TI); + preempt_enable(); + + return ret; } int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu) @@ -244,23 +250,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, return -EINVAL; } -/** - * kvm_migrate_count() - Migrate timer. - * @vcpu: Virtual CPU. - * - * Migrate hrtimer to the current CPU by cancelling and restarting it - * if the hrtimer is active. - * - * Must be called when the vCPU is migrated to a different CPU, so that - * the timer can interrupt the guest at the new CPU, and the timer irq can - * be delivered to the vCPU. - */ -static void kvm_migrate_count(struct kvm_vcpu *vcpu) -{ - if (hrtimer_cancel(&vcpu->arch.swtimer)) - hrtimer_restart(&vcpu->arch.swtimer); -} - static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val) { unsigned long gintc; @@ -309,6 +298,76 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val) return ret; } +static int _kvm_get_cpucfg(int id, u64 *v) +{ + int ret = 0; + + if (id < 0 && id >= KVM_MAX_CPUCFG_REGS) + return -EINVAL; + + switch (id) { + case 2: + /* Return CPUCFG2 features which have been supported by KVM */ + *v = CPUCFG2_FP | CPUCFG2_FPSP | CPUCFG2_FPDP | + CPUCFG2_FPVERS | CPUCFG2_LLFTP | CPUCFG2_LLFTPREV | + CPUCFG2_LAM; + /* + * If LSX is supported by CPU, it is also supported by KVM, + * as we implement it. + */ + if (cpu_has_lsx) + *v |= CPUCFG2_LSX; + /* + * if LASX is supported by CPU, it is also supported by KVM, + * as we implement it. + */ + if (cpu_has_lasx) + *v |= CPUCFG2_LASX; + + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + +static int kvm_check_cpucfg(int id, u64 val) +{ + u64 mask; + int ret = 0; + + if (id < 0 && id >= KVM_MAX_CPUCFG_REGS) + return -EINVAL; + + if (_kvm_get_cpucfg(id, &mask)) + return ret; + + switch (id) { + case 2: + /* CPUCFG2 features checking */ + if (val & ~mask) + /* The unsupported features should not be set */ + ret = -EINVAL; + else if (!(val & CPUCFG2_LLFTP)) + /* The LLFTP must be set, as guest must has a constant timer */ + ret = -EINVAL; + else if ((val & CPUCFG2_FP) && (!(val & CPUCFG2_FPSP) || !(val & CPUCFG2_FPDP))) + /* Single and double float point must both be set when enable FP */ + ret = -EINVAL; + else if ((val & CPUCFG2_LSX) && !(val & CPUCFG2_FP)) + /* FP should be set when enable LSX */ + ret = -EINVAL; + else if ((val & CPUCFG2_LASX) && !(val & CPUCFG2_LSX)) + /* LSX, FP should be set when enable LASX, and FP has been checked before. */ + ret = -EINVAL; + break; + default: + break; + } + return ret; +} + static int kvm_get_one_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, u64 *v) { @@ -378,10 +437,10 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu, break; case KVM_REG_LOONGARCH_CPUCFG: id = KVM_GET_IOC_CPUCFG_IDX(reg->id); - if (id >= 0 && id < KVM_MAX_CPUCFG_REGS) - vcpu->arch.cpucfg[id] = (u32)v; - else - ret = -EINVAL; + ret = kvm_check_cpucfg(id, v); + if (ret) + break; + vcpu->arch.cpucfg[id] = (u32)v; break; case KVM_REG_LOONGARCH_KVM: switch (reg->id) { @@ -471,10 +530,94 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return -EINVAL; } +static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case 2: + return 0; + default: + return -ENXIO; + } + + return -ENXIO; +} + +static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret = -ENXIO; + + switch (attr->group) { + case KVM_LOONGARCH_VCPU_CPUCFG: + ret = kvm_loongarch_cpucfg_has_attr(vcpu, attr); + break; + default: + break; + } + + return ret; +} + +static int kvm_loongarch_get_cpucfg_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret = 0; + uint64_t val; + uint64_t __user *uaddr = (uint64_t __user *)attr->addr; + + ret = _kvm_get_cpucfg(attr->attr, &val); + if (ret) + return ret; + + put_user(val, uaddr); + + return ret; +} + +static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret = -ENXIO; + + switch (attr->group) { + case KVM_LOONGARCH_VCPU_CPUCFG: + ret = kvm_loongarch_get_cpucfg_attr(vcpu, attr); + break; + default: + break; + } + + return ret; +} + +static int kvm_loongarch_cpucfg_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + return -ENXIO; +} + +static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret = -ENXIO; + + switch (attr->group) { + case KVM_LOONGARCH_VCPU_CPUCFG: + ret = kvm_loongarch_cpucfg_set_attr(vcpu, attr); + break; + default: + break; + } + + return ret; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { long r; + struct kvm_device_attr attr; void __user *argp = (void __user *)arg; struct kvm_vcpu *vcpu = filp->private_data; @@ -514,6 +657,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); break; } + case KVM_HAS_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, argp, sizeof(attr))) + break; + r = kvm_loongarch_vcpu_has_attr(vcpu, &attr); + break; + } + case KVM_GET_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, argp, sizeof(attr))) + break; + r = kvm_loongarch_vcpu_get_attr(vcpu, &attr); + break; + } + case KVM_SET_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, argp, sizeof(attr))) + break; + r = kvm_loongarch_vcpu_set_attr(vcpu, &attr); + break; + } default: r = -ENOIOCTLCMD; break; @@ -561,12 +725,96 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu) preempt_enable(); } +#ifdef CONFIG_CPU_HAS_LSX +/* Enable LSX and restore context */ +int kvm_own_lsx(struct kvm_vcpu *vcpu) +{ + if (!kvm_guest_has_fpu(&vcpu->arch) || !kvm_guest_has_lsx(&vcpu->arch)) + return -EINVAL; + + preempt_disable(); + + /* Enable LSX for guest */ + set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN); + switch (vcpu->arch.aux_inuse & KVM_LARCH_FPU) { + case KVM_LARCH_FPU: + /* + * Guest FPU state already loaded, + * only restore upper LSX state + */ + _restore_lsx_upper(&vcpu->arch.fpu); + break; + default: + /* Neither FP or LSX already active, + * restore full LSX state + */ + kvm_restore_lsx(&vcpu->arch.fpu); + break; + } + + trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LSX); + vcpu->arch.aux_inuse |= KVM_LARCH_LSX | KVM_LARCH_FPU; + preempt_enable(); + + return 0; +} +#endif + +#ifdef CONFIG_CPU_HAS_LASX +/* Enable LASX and restore context */ +int kvm_own_lasx(struct kvm_vcpu *vcpu) +{ + if (!kvm_guest_has_fpu(&vcpu->arch) || !kvm_guest_has_lsx(&vcpu->arch) || !kvm_guest_has_lasx(&vcpu->arch)) + return -EINVAL; + + preempt_disable(); + + set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN); + switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) { + case KVM_LARCH_LSX: + case KVM_LARCH_LSX | KVM_LARCH_FPU: + /* Guest LSX state already loaded, only restore upper LASX state */ + _restore_lasx_upper(&vcpu->arch.fpu); + break; + case KVM_LARCH_FPU: + /* Guest FP state already loaded, only restore upper LSX & LASX state */ + _restore_lsx_upper(&vcpu->arch.fpu); + _restore_lasx_upper(&vcpu->arch.fpu); + break; + default: + /* Neither FP or LSX already active, restore full LASX state */ + kvm_restore_lasx(&vcpu->arch.fpu); + break; + } + + trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LASX); + vcpu->arch.aux_inuse |= KVM_LARCH_LASX | KVM_LARCH_LSX | KVM_LARCH_FPU; + preempt_enable(); + + return 0; +} +#endif + /* Save context and disable FPU */ void kvm_lose_fpu(struct kvm_vcpu *vcpu) { preempt_disable(); - if (vcpu->arch.aux_inuse & KVM_LARCH_FPU) { + if (vcpu->arch.aux_inuse & KVM_LARCH_LASX) { + kvm_save_lasx(&vcpu->arch.fpu); + vcpu->arch.aux_inuse &= ~(KVM_LARCH_LSX | KVM_LARCH_FPU | KVM_LARCH_LASX); + trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_LASX); + + /* Disable LASX & LSX & FPU */ + clear_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN); + } else if (vcpu->arch.aux_inuse & KVM_LARCH_LSX) { + kvm_save_lsx(&vcpu->arch.fpu); + vcpu->arch.aux_inuse &= ~(KVM_LARCH_LSX | KVM_LARCH_FPU); + trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_LSX); + + /* Disable LSX & FPU */ + clear_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN); + } else if (vcpu->arch.aux_inuse & KVM_LARCH_FPU) { kvm_save_fpu(&vcpu->arch.fpu); vcpu->arch.aux_inuse &= ~KVM_LARCH_FPU; trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU); @@ -789,17 +1037,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) unsigned long flags; local_irq_save(flags); - if (vcpu->arch.last_sched_cpu != cpu) { - kvm_debug("[%d->%d]KVM vCPU[%d] switch\n", - vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id); - /* - * Migrate the timer interrupt to the current CPU so that it - * always interrupts the guest and synchronously triggers a - * guest timer interrupt. - */ - kvm_migrate_count(vcpu); - } - /* Restore guest state to registers */ _kvm_vcpu_load(vcpu, cpu); local_irq_restore(flags); diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c index 71d0539e2d0b..2aae72e63871 100644 --- a/arch/loongarch/mm/pgtable.c +++ b/arch/loongarch/mm/pgtable.c @@ -13,13 +13,13 @@ struct page *dmw_virt_to_page(unsigned long kaddr) { return pfn_to_page(virt_to_pfn(kaddr)); } -EXPORT_SYMBOL_GPL(dmw_virt_to_page); +EXPORT_SYMBOL(dmw_virt_to_page); struct page *tlb_virt_to_page(unsigned long kaddr) { return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr))); } -EXPORT_SYMBOL_GPL(tlb_virt_to_page); +EXPORT_SYMBOL(tlb_virt_to_page); pgd_t *pgd_alloc(struct mm_struct *mm) { diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 169ff8b3915e..4fcd6cd6da23 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -480,10 +480,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext case 8: move_reg(ctx, t1, src); emit_insn(ctx, extwb, dst, t1); + emit_zext_32(ctx, dst, is32); break; case 16: move_reg(ctx, t1, src); emit_insn(ctx, extwh, dst, t1); + emit_zext_32(ctx, dst, is32); break; case 32: emit_insn(ctx, addw, dst, src, LOONGARCH_GPR_ZERO); @@ -772,8 +774,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext break; case 32: emit_insn(ctx, revb2w, dst, dst); - /* zero-extend 32 bits into 64 bits */ - emit_zext_32(ctx, dst, is32); + /* clear the upper 32 bits */ + emit_zext_32(ctx, dst, true); break; case 64: emit_insn(ctx, revbd, dst, dst); @@ -911,8 +913,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext /* function return */ case BPF_JMP | BPF_EXIT: - emit_sext_32(ctx, regmap[BPF_REG_0], true); - if (i == ctx->prog->len - 1) break; @@ -988,14 +988,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext } break; case BPF_DW: - if (is_signed_imm12(off)) { - emit_insn(ctx, ldd, dst, src, off); - } else if (is_signed_imm14(off)) { - emit_insn(ctx, ldptrd, dst, src, off); - } else { - move_imm(ctx, t1, off, is32); - emit_insn(ctx, ldxd, dst, src, t1); - } + move_imm(ctx, t1, off, is32); + emit_insn(ctx, ldxd, dst, src, t1); break; } diff --git a/arch/m68k/include/asm/kexec.h b/arch/m68k/include/asm/kexec.h index f5a8b2defa4b..3b0b64f0a353 100644 --- a/arch/m68k/include/asm/kexec.h +++ b/arch/m68k/include/asm/kexec.h @@ -2,7 +2,7 @@ #ifndef _ASM_M68K_KEXEC_H #define _ASM_M68K_KEXEC_H -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* Maximum physical address we can use pages from */ #define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) @@ -25,6 +25,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs, #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ #endif /* _ASM_M68K_KEXEC_H */ diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile index 01fb69a5095f..f335bf3268a1 100644 --- a/arch/m68k/kernel/Makefile +++ b/arch/m68k/kernel/Makefile @@ -25,7 +25,7 @@ obj-$(CONFIG_PCI) += pcibios.o obj-$(CONFIG_M68K_NONCOHERENT_DMA) += dma.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_BOOTINFO_PROC) += bootinfo_proc.o obj-$(CONFIG_UBOOT) += uboot.o diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 76db82542519..797ae590ebdb 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -460,6 +460,7 @@ config MACH_LOONGSON2EF config MACH_LOONGSON64 bool "Loongson 64-bit family of machines" + select ARCH_DMA_DEFAULT_COHERENT select ARCH_SPARSEMEM_ENABLE select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO @@ -1251,6 +1252,7 @@ config CPU_LOONGSON64 select CPU_SUPPORTS_MSA select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT select CPU_MIPSR2_IRQ_VI + select DMA_NONCOHERENT select WEAK_ORDERING select WEAK_REORDERING_BEYOND_LLSC select MIPS_ASID_BITS_VARIABLE diff --git a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi index f878f47e4501..ee3e2153dd13 100644 --- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi +++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi @@ -130,8 +130,7 @@ compatible = "pci0014,7a03.0", "pci0014,7a03", "pciclass0c0320", - "pciclass0c03", - "loongson, pci-gmac"; + "pciclass0c03"; reg = <0x1800 0x0 0x0 0x0 0x0>; interrupts = <12 IRQ_TYPE_LEVEL_LOW>, diff --git a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi index 7c69e8245c2f..cce9428afc41 100644 --- a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi +++ b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi @@ -193,8 +193,7 @@ compatible = "pci0014,7a03.0", "pci0014,7a03", "pciclass020000", - "pciclass0200", - "loongson, pci-gmac"; + "pciclass0200"; reg = <0x1800 0x0 0x0 0x0 0x0>; interrupts = <12 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 33c09688210f..08ea2cde1eb5 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -422,7 +422,7 @@ static const struct plat_smp_ops octeon_smp_ops = { .cpu_disable = octeon_cpu_disable, .cpu_die = octeon_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; @@ -502,7 +502,7 @@ static const struct plat_smp_ops octeon_78xx_smp_ops = { .cpu_disable = octeon_cpu_disable, .cpu_die = octeon_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; diff --git a/arch/mips/include/asm/kexec.h b/arch/mips/include/asm/kexec.h index d6d5fa5cc31d..69e579e41e66 100644 --- a/arch/mips/include/asm/kexec.h +++ b/arch/mips/include/asm/kexec.h @@ -31,7 +31,7 @@ static inline void crash_setup_regs(struct pt_regs *newregs, prepare_frametrace(newregs); } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE struct kimage; extern unsigned long kexec_args[4]; extern int (*_machine_kexec_prepare)(struct kimage *); diff --git a/arch/mips/include/asm/mach-loongson64/boot_param.h b/arch/mips/include/asm/mach-loongson64/boot_param.h index 035b1a69e2d0..e007edd6b60a 100644 --- a/arch/mips/include/asm/mach-loongson64/boot_param.h +++ b/arch/mips/include/asm/mach-loongson64/boot_param.h @@ -14,7 +14,11 @@ #define ADAPTER_ROM 8 #define ACPI_TABLE 9 #define SMBIOS_TABLE 10 -#define MAX_MEMORY_TYPE 11 +#define UMA_VIDEO_RAM 11 +#define VUMA_VIDEO_RAM 12 +#define MAX_MEMORY_TYPE 13 + +#define MEM_SIZE_IS_IN_BYTES (1 << 31) #define LOONGSON3_BOOT_MEM_MAP_MAX 128 struct efi_memory_map_loongson { @@ -117,7 +121,8 @@ struct irq_source_routing_table { u64 pci_io_start_addr; u64 pci_io_end_addr; u64 pci_config_addr; - u32 dma_mask_bits; + u16 dma_mask_bits; + u16 dma_noncoherent; } __packed; struct interface_info { diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h index 5719ff49eff1..0c59e168f800 100644 --- a/arch/mips/include/asm/smp-ops.h +++ b/arch/mips/include/asm/smp-ops.h @@ -35,7 +35,7 @@ struct plat_smp_ops { void (*cpu_die)(unsigned int cpu); void (*cleanup_dead_cpu)(unsigned cpu); #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE void (*kexec_nonboot_cpu)(void); #endif }; diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index a40d8c0e4b87..901bc61fa7ae 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -93,7 +93,7 @@ static inline void __cpu_die(unsigned int cpu) extern void __noreturn play_dead(void); #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE static inline void kexec_nonboot_cpu(void) { extern const struct plat_smp_ops *mp_ops; /* private */ diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index 853a43ee4b44..ecf3278a32f7 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -90,7 +90,7 @@ obj-$(CONFIG_GPIO_TXX9) += gpio_txx9.o obj-$(CONFIG_RELOCATABLE) += relocate.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_EARLY_PRINTK_8250) += early_printk_8250.o diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 5387ed0a5186..b630604c577f 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -121,6 +121,19 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) /* Put the stack after the struct pt_regs. */ childksp = (unsigned long) childregs; p->thread.cp0_status = (read_c0_status() & ~(ST0_CU2|ST0_CU1)) | ST0_KERNEL_CUMASK; + + /* + * New tasks lose permission to use the fpu. This accelerates context + * switching for most programs since they don't use the fpu. + */ + clear_tsk_thread_flag(p, TIF_USEDFPU); + clear_tsk_thread_flag(p, TIF_USEDMSA); + clear_tsk_thread_flag(p, TIF_MSA_CTX_LIVE); + +#ifdef CONFIG_MIPS_MT_FPAFF + clear_tsk_thread_flag(p, TIF_FPUBOUND); +#endif /* CONFIG_MIPS_MT_FPAFF */ + if (unlikely(args->fn)) { /* kernel thread */ unsigned long status = p->thread.cp0_status; @@ -149,20 +162,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.reg29 = (unsigned long) childregs; p->thread.reg31 = (unsigned long) ret_from_fork; - /* - * New tasks lose permission to use the fpu. This accelerates context - * switching for most programs since they don't use the fpu. - */ childregs->cp0_status &= ~(ST0_CU2|ST0_CU1); - clear_tsk_thread_flag(p, TIF_USEDFPU); - clear_tsk_thread_flag(p, TIF_USEDMSA); - clear_tsk_thread_flag(p, TIF_MSA_CTX_LIVE); - -#ifdef CONFIG_MIPS_MT_FPAFF - clear_tsk_thread_flag(p, TIF_FPUBOUND); -#endif /* CONFIG_MIPS_MT_FPAFF */ - #ifdef CONFIG_MIPS_FP_SUPPORT atomic_set(&p->thread.bd_emu_frame, BD_EMUFRAME_NONE); #endif diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index c074ecce3fbf..b3dbf9ecb0d6 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -434,7 +434,7 @@ const struct plat_smp_ops bmips43xx_smp_ops = { .cpu_disable = bmips_cpu_disable, .cpu_die = bmips_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; @@ -451,7 +451,7 @@ const struct plat_smp_ops bmips5000_smp_ops = { .cpu_disable = bmips_cpu_disable, .cpu_die = bmips_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index dd55d59b88db..f6c37d407f36 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -392,7 +392,7 @@ static void cps_smp_finish(void) local_irq_enable(); } -#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC) +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC_CORE) enum cpu_death { CPU_DEATH_HALT, @@ -429,7 +429,7 @@ static void cps_shutdown_this_cpu(enum cpu_death death) } } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE static void cps_kexec_nonboot_cpu(void) { @@ -439,9 +439,9 @@ static void cps_kexec_nonboot_cpu(void) cps_shutdown_this_cpu(CPU_DEATH_POWER); } -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ -#endif /* CONFIG_HOTPLUG_CPU || CONFIG_KEXEC */ +#endif /* CONFIG_HOTPLUG_CPU || CONFIG_KEXEC_CORE */ #ifdef CONFIG_HOTPLUG_CPU @@ -610,7 +610,7 @@ static const struct plat_smp_ops cps_smp_ops = { .cpu_die = cps_cpu_die, .cleanup_dead_cpu = cps_cleanup_dead_cpu, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = cps_kexec_nonboot_cpu, #endif }; diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 8fbef537fb88..82e2e051b416 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -351,10 +351,11 @@ early_initcall(mips_smp_ipi_init); */ asmlinkage void start_secondary(void) { - unsigned int cpu; + unsigned int cpu = raw_smp_processor_id(); cpu_probe(); per_cpu_trap_init(false); + rcutree_report_cpu_starting(cpu); mips_clockevent_init(); mp_ops->init_secondary(); cpu_report(); @@ -366,7 +367,6 @@ asmlinkage void start_secondary(void) */ calibrate_delay(); - cpu = smp_processor_id(); cpu_data[cpu].udelay_val = loops_per_jiffy; set_cpu_sibling_map(cpu); diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig index c04987d2ed2e..18e7a17d5115 100644 --- a/arch/mips/kvm/Kconfig +++ b/arch/mips/kvm/Kconfig @@ -20,13 +20,11 @@ config KVM depends on HAVE_KVM depends on MIPS_FP_SUPPORT select EXPORT_UASM - select PREEMPT_NOTIFIERS + select KVM_COMMON select KVM_GENERIC_DIRTYLOG_READ_PROTECT - select HAVE_KVM_EVENTFD select HAVE_KVM_VCPU_ASYNC_IOCTL select KVM_MMIO select KVM_GENERIC_MMU_NOTIFIER - select INTERVAL_TREE select KVM_GENERIC_HARDWARE_ENABLING help Support for hosting Guest kernels. diff --git a/arch/mips/loongson64/env.c b/arch/mips/loongson64/env.c index c961e2999f15..ef3750a6ffac 100644 --- a/arch/mips/loongson64/env.c +++ b/arch/mips/loongson64/env.c @@ -13,6 +13,8 @@ * Copyright (C) 2009 Lemote Inc. * Author: Wu Zhangjin, wuzhangjin@gmail.com */ + +#include <linux/dma-map-ops.h> #include <linux/export.h> #include <linux/pci_ids.h> #include <asm/bootinfo.h> @@ -147,8 +149,14 @@ void __init prom_lefi_init_env(void) loongson_sysconf.dma_mask_bits = eirq_source->dma_mask_bits; if (loongson_sysconf.dma_mask_bits < 32 || - loongson_sysconf.dma_mask_bits > 64) + loongson_sysconf.dma_mask_bits > 64) { loongson_sysconf.dma_mask_bits = 32; + dma_default_coherent = true; + } else { + dma_default_coherent = !eirq_source->dma_noncoherent; + } + + pr_info("Firmware: Coherent DMA: %s\n", dma_default_coherent ? "on" : "off"); loongson_sysconf.restart_addr = boot_p->reset_system.ResetWarm; loongson_sysconf.poweroff_addr = boot_p->reset_system.Shutdown; diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c index ee8de1735b7c..f25caa6aa9d3 100644 --- a/arch/mips/loongson64/init.c +++ b/arch/mips/loongson64/init.c @@ -49,8 +49,7 @@ void virtual_early_config(void) void __init szmem(unsigned int node) { u32 i, mem_type; - static unsigned long num_physpages; - u64 node_id, node_psize, start_pfn, end_pfn, mem_start, mem_size; + phys_addr_t node_id, mem_start, mem_size; /* Otherwise come from DTB */ if (loongson_sysconf.fw_interface != LOONGSON_LEFI) @@ -64,30 +63,46 @@ void __init szmem(unsigned int node) mem_type = loongson_memmap->map[i].mem_type; mem_size = loongson_memmap->map[i].mem_size; - mem_start = loongson_memmap->map[i].mem_start; + + /* Memory size comes in MB if MEM_SIZE_IS_IN_BYTES not set */ + if (mem_size & MEM_SIZE_IS_IN_BYTES) + mem_size &= ~MEM_SIZE_IS_IN_BYTES; + else + mem_size = mem_size << 20; + + mem_start = (node_id << 44) | loongson_memmap->map[i].mem_start; switch (mem_type) { case SYSTEM_RAM_LOW: case SYSTEM_RAM_HIGH: - start_pfn = ((node_id << 44) + mem_start) >> PAGE_SHIFT; - node_psize = (mem_size << 20) >> PAGE_SHIFT; - end_pfn = start_pfn + node_psize; - num_physpages += node_psize; - pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n", - (u32)node_id, mem_type, mem_start, mem_size); - pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", - start_pfn, end_pfn, num_physpages); - memblock_add_node(PFN_PHYS(start_pfn), - PFN_PHYS(node_psize), node, + case UMA_VIDEO_RAM: + pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes usable\n", + (u32)node_id, mem_type, &mem_start, &mem_size); + memblock_add_node(mem_start, mem_size, node, MEMBLOCK_NONE); break; case SYSTEM_RAM_RESERVED: - pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n", - (u32)node_id, mem_type, mem_start, mem_size); - memblock_reserve(((node_id << 44) + mem_start), mem_size << 20); + case VIDEO_ROM: + case ADAPTER_ROM: + case ACPI_TABLE: + case SMBIOS_TABLE: + pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes reserved\n", + (u32)node_id, mem_type, &mem_start, &mem_size); + memblock_reserve(mem_start, mem_size); + break; + /* We should not reserve VUMA_VIDEO_RAM as it overlaps with MMIO */ + case VUMA_VIDEO_RAM: + default: + pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes unhandled\n", + (u32)node_id, mem_type, &mem_start, &mem_size); break; } } + + /* Reserve vgabios if it comes from firmware */ + if (loongson_sysconf.vgabios_addr) + memblock_reserve(virt_to_phys((void *)loongson_sysconf.vgabios_addr), + SZ_256K); } #ifndef CONFIG_NUMA diff --git a/arch/mips/loongson64/reset.c b/arch/mips/loongson64/reset.c index e420800043b0..e01c8d4a805a 100644 --- a/arch/mips/loongson64/reset.c +++ b/arch/mips/loongson64/reset.c @@ -53,7 +53,7 @@ static void loongson_halt(void) } } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* 0X80000000~0X80200000 is safe */ #define MAX_ARGS 64 @@ -158,7 +158,7 @@ static int __init mips_reboot_setup(void) _machine_halt = loongson_halt; pm_power_off = loongson_poweroff; -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE kexec_argv = kmalloc(KEXEC_ARGV_SIZE, GFP_KERNEL); if (WARN_ON(!kexec_argv)) return -ENOMEM; diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c index e015a26a40f7..498bdc1bb0ed 100644 --- a/arch/mips/loongson64/smp.c +++ b/arch/mips/loongson64/smp.c @@ -864,7 +864,7 @@ const struct plat_smp_ops loongson3_smp_ops = { .cpu_disable = loongson3_cpu_disable, .cpu_die = loongson3_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index fd69dfa0cdab..d14ccc948a29 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -115,9 +115,12 @@ config ARCH_HAS_ILOG2_U64 default n config GENERIC_BUG - bool - default y + def_bool y depends on BUG + select GENERIC_BUG_RELATIVE_POINTERS if 64BIT + +config GENERIC_BUG_RELATIVE_POINTERS + bool config GENERIC_HWEIGHT bool @@ -140,11 +143,11 @@ config ARCH_MMAP_RND_COMPAT_BITS_MIN default 8 config ARCH_MMAP_RND_BITS_MAX - default 24 if 64BIT - default 17 + default 18 if 64BIT + default 13 config ARCH_MMAP_RND_COMPAT_BITS_MAX - default 17 + default 13 # unless you want to implement ACPI on PA-RISC ... ;-) config PM diff --git a/arch/parisc/include/asm/alternative.h b/arch/parisc/include/asm/alternative.h index 1ed45fd085d3..1eb488f25b83 100644 --- a/arch/parisc/include/asm/alternative.h +++ b/arch/parisc/include/asm/alternative.h @@ -34,7 +34,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end, /* Alternative SMP implementation. */ #define ALTERNATIVE(cond, replacement) "!0:" \ - ".section .altinstructions, \"aw\" !" \ + ".section .altinstructions, \"a\" !" \ + ".align 4 !" \ ".word (0b-4-.) !" \ ".hword 1, " __stringify(cond) " !" \ ".word " __stringify(replacement) " !" \ @@ -44,7 +45,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end, /* to replace one single instructions by a new instruction */ #define ALTERNATIVE(from, to, cond, replacement)\ - .section .altinstructions, "aw" ! \ + .section .altinstructions, "a" ! \ + .align 4 ! \ .word (from - .) ! \ .hword (to - from)/4, cond ! \ .word replacement ! \ @@ -52,7 +54,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end, /* to replace multiple instructions by new code */ #define ALTERNATIVE_CODE(from, num_instructions, cond, new_instr_ptr)\ - .section .altinstructions, "aw" ! \ + .section .altinstructions, "a" ! \ + .align 4 ! \ .word (from - .) ! \ .hword -num_instructions, cond ! \ .word (new_instr_ptr - .) ! \ diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 75677b526b2b..74d17d7e759d 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -574,6 +574,7 @@ */ #define ASM_EXCEPTIONTABLE_ENTRY(fault_addr, except_addr) \ .section __ex_table,"aw" ! \ + .align 4 ! \ .word (fault_addr - .), (except_addr - .) ! \ .previous diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h index 4b6d60b94124..833555f74ffa 100644 --- a/arch/parisc/include/asm/bug.h +++ b/arch/parisc/include/asm/bug.h @@ -17,24 +17,27 @@ #define PARISC_BUG_BREAK_ASM "break 0x1f, 0x1fff" #define PARISC_BUG_BREAK_INSN 0x03ffe01f /* PARISC_BUG_BREAK_ASM */ -#if defined(CONFIG_64BIT) -#define ASM_WORD_INSN ".dword\t" +#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS +# define __BUG_REL(val) ".word " __stringify(val) " - ." #else -#define ASM_WORD_INSN ".word\t" +# define __BUG_REL(val) ".word " __stringify(val) #endif + #ifdef CONFIG_DEBUG_BUGVERBOSE #define BUG() \ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ - "\t.pushsection __bug_table,\"aw\"\n" \ - "2:\t" ASM_WORD_INSN "1b, %c0\n" \ - "\t.short %c1, %c2\n" \ - "\t.org 2b+%c3\n" \ + "\t.pushsection __bug_table,\"a\"\n" \ + "\t.align 4\n" \ + "2:\t" __BUG_REL(1b) "\n" \ + "\t" __BUG_REL(%c0) "\n" \ + "\t.short %1, %2\n" \ + "\t.blockz %3-2*4-2*2\n" \ "\t.popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ - "i" (0), "i" (sizeof(struct bug_entry)) ); \ + "i" (0), "i" (sizeof(struct bug_entry)) ); \ unreachable(); \ } while(0) @@ -51,10 +54,12 @@ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ - "\t.pushsection __bug_table,\"aw\"\n" \ - "2:\t" ASM_WORD_INSN "1b, %c0\n" \ - "\t.short %c1, %c2\n" \ - "\t.org 2b+%c3\n" \ + "\t.pushsection __bug_table,\"a\"\n" \ + "\t.align 4\n" \ + "2:\t" __BUG_REL(1b) "\n" \ + "\t" __BUG_REL(%c0) "\n" \ + "\t.short %1, %2\n" \ + "\t.blockz %3-2*4-2*2\n" \ "\t.popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ "i" (BUGFLAG_WARNING|(flags)), \ @@ -65,10 +70,11 @@ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ - "\t.pushsection __bug_table,\"aw\"\n" \ - "2:\t" ASM_WORD_INSN "1b\n" \ - "\t.short %c0\n" \ - "\t.org 2b+%c1\n" \ + "\t.pushsection __bug_table,\"a\"\n" \ + "\t.align 4\n" \ + "2:\t" __BUG_REL(1b) "\n" \ + "\t.short %0\n" \ + "\t.blockz %1-4-2\n" \ "\t.popsection" \ : : "i" (BUGFLAG_WARNING|(flags)), \ "i" (sizeof(struct bug_entry)) ); \ diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h index 140eaa97bf21..2d73d3c3cd37 100644 --- a/arch/parisc/include/asm/elf.h +++ b/arch/parisc/include/asm/elf.h @@ -349,15 +349,7 @@ struct pt_regs; /* forward declaration... */ #define ELF_HWCAP 0 -/* Masks for stack and mmap randomization */ -#define BRK_RND_MASK (is_32bit_task() ? 0x07ffUL : 0x3ffffUL) -#define MMAP_RND_MASK (is_32bit_task() ? 0x1fffUL : 0x3ffffUL) -#define STACK_RND_MASK MMAP_RND_MASK - -struct mm_struct; -extern unsigned long arch_randomize_brk(struct mm_struct *); -#define arch_randomize_brk arch_randomize_brk - +#define STACK_RND_MASK 0x7ff /* 8MB of VA */ #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 struct linux_binprm; diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h index af2a598bc0f8..94428798b6aa 100644 --- a/arch/parisc/include/asm/jump_label.h +++ b/arch/parisc/include/asm/jump_label.h @@ -15,10 +15,12 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran asm_volatile_goto("1:\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" + ".align %1\n\t" ".word 1b - ., %l[l_yes] - .\n\t" __stringify(ASM_ULONG_INSN) " %c0 - .\n\t" ".popsection\n\t" - : : "i" (&((char *)key)[branch]) : : l_yes); + : : "i" (&((char *)key)[branch]), "i" (sizeof(long)) + : : l_yes); return false; l_yes: @@ -30,10 +32,12 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool asm_volatile_goto("1:\n\t" "b,n %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" + ".align %1\n\t" ".word 1b - ., %l[l_yes] - .\n\t" __stringify(ASM_ULONG_INSN) " %c0 - .\n\t" ".popsection\n\t" - : : "i" (&((char *)key)[branch]) : : l_yes); + : : "i" (&((char *)key)[branch]), "i" (sizeof(long)) + : : l_yes); return false; l_yes: diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index ee9e071859b2..47ebc4c91eaf 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -55,7 +55,7 @@ }) #ifdef CONFIG_SMP -# define __lock_aligned __section(".data..lock_aligned") +# define __lock_aligned __section(".data..lock_aligned") __aligned(16) #endif #endif /* __PARISC_LDCW_H */ diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index c05d121cf5d0..982aca20f56f 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -47,6 +47,8 @@ #ifndef __ASSEMBLY__ +struct rlimit; +unsigned long mmap_upper_limit(struct rlimit *rlim_stack); unsigned long calc_max_stack_size(unsigned long stack_max); /* diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 2bf660eabe42..4165079898d9 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -41,6 +41,7 @@ struct exception_table_entry { #define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\ ".section __ex_table,\"aw\"\n" \ + ".align 4\n" \ ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \ ".previous\n" diff --git a/arch/parisc/include/uapi/asm/errno.h b/arch/parisc/include/uapi/asm/errno.h index 87245c584784..8d94739d75c6 100644 --- a/arch/parisc/include/uapi/asm/errno.h +++ b/arch/parisc/include/uapi/asm/errno.h @@ -75,7 +75,6 @@ /* We now return you to your regularly scheduled HPUX. */ -#define ENOSYM 215 /* symbol does not exist in executable */ #define ENOTSOCK 216 /* Socket operation on non-socket */ #define EDESTADDRREQ 217 /* Destination address required */ #define EMSGSIZE 218 /* Message too long */ @@ -101,7 +100,6 @@ #define ETIMEDOUT 238 /* Connection timed out */ #define ECONNREFUSED 239 /* Connection refused */ #define EREFUSED ECONNREFUSED /* for HP's NFS apparently */ -#define EREMOTERELEASE 240 /* Remote peer released connection */ #define EHOSTDOWN 241 /* Host is down */ #define EHOSTUNREACH 242 /* No route to host */ diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 29e2750f86a4..e95a977ba5f3 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -383,7 +383,7 @@ show_cpuinfo (struct seq_file *m, void *v) char cpu_name[60], *p; /* strip PA path from CPU name to not confuse lscpu */ - strlcpy(cpu_name, per_cpu(cpu_data, 0).dev->name, sizeof(cpu_name)); + strscpy(cpu_name, per_cpu(cpu_data, 0).dev->name, sizeof(cpu_name)); p = strrchr(cpu_name, '['); if (p) *(--p) = 0; diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index ab896eff7a1d..98af719d5f85 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -77,7 +77,7 @@ unsigned long calc_max_stack_size(unsigned long stack_max) * indicating that "current" should be used instead of a passed-in * value from the exec bprm as done with arch_pick_mmap_layout(). */ -static unsigned long mmap_upper_limit(struct rlimit *rlim_stack) +unsigned long mmap_upper_limit(struct rlimit *rlim_stack) { unsigned long stack_base; diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 58694d1989c2..548051b0b4af 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -130,6 +130,7 @@ SECTIONS RO_DATA(8) /* unwind info */ + . = ALIGN(4); .PARISC.unwind : { __start___unwind = .; *(.PARISC.unwind) diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 8d3eacb50d56..9d44e6630908 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -301,7 +301,6 @@ CONFIG_WQ_WATCHDOG=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y CONFIG_BUG_ON_DATA_CORRUPTION=y -CONFIG_DEBUG_CREDENTIALS=y # CONFIG_FTRACE is not set CONFIG_XMON=y # CONFIG_RUNTIME_TESTING_MENU is not set diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 6a9acfb690c9..2f8f3f93cbb6 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -23,6 +23,15 @@ #include <asm/feature-fixups.h> #ifdef CONFIG_VSX +#define __REST_1FPVSR(n,c,base) \ +BEGIN_FTR_SECTION \ + b 2f; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ + REST_FPR(n,base); \ + b 3f; \ +2: REST_VSR(n,c,base); \ +3: + #define __REST_32FPVSRS(n,c,base) \ BEGIN_FTR_SECTION \ b 2f; \ @@ -41,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ 2: SAVE_32VSRS(n,c,base); \ 3: #else +#define __REST_1FPVSR(n,b,base) REST_FPR(n, base) #define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) #define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) #endif +#define REST_1FPVSR(n,c,base) __REST_1FPVSR(n,__REG_##c,__REG_##base) #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) @@ -67,6 +78,7 @@ _GLOBAL(store_fp_state) SAVE_32FPVSRS(0, R4, R3) mffs fr0 stfd fr0,FPSTATE_FPSCR(r3) + REST_1FPVSR(0, R4, R3) blr EXPORT_SYMBOL(store_fp_state) @@ -138,4 +150,5 @@ _GLOBAL(save_fpu) 2: SAVE_32FPVSRS(0, R4, R6) mffs fr0 stfd fr0,FPSTATE_FPSCR(r6) + REST_1FPVSR(0, R4, R6) blr diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 392404688cec..9452a54d356c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1198,11 +1198,11 @@ void kvmppc_save_user_regs(void) usermsr = current->thread.regs->msr; + /* Caller has enabled FP/VEC/VSX/TM in MSR */ if (usermsr & MSR_FP) - save_fpu(current); - + __giveup_fpu(current); if (usermsr & MSR_VEC) - save_altivec(current); + __giveup_altivec(current); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (usermsr & MSR_TM) { diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S index 90701885762c..40677416d7b2 100644 --- a/arch/powerpc/kernel/trace/ftrace_entry.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -62,7 +62,7 @@ .endif /* Save previous stack pointer (r1) */ - addi r8, r1, SWITCH_FRAME_SIZE + addi r8, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE PPC_STL r8, GPR1(r1) .if \allregs == 1 @@ -182,7 +182,7 @@ ftrace_no_trace: mflr r3 mtctr r3 REST_GPR(3, r1) - addi r1, r1, SWITCH_FRAME_SIZE + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE mtlr r0 bctr #endif diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 4094e4c4c77a..80b3f6e476b6 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -33,6 +33,7 @@ _GLOBAL(store_vr_state) mfvscr v0 li r4, VRSTATE_VSCR stvx v0, r4, r3 + lvx v0, 0, r3 blr EXPORT_SYMBOL(store_vr_state) @@ -109,6 +110,7 @@ _GLOBAL(save_altivec) mfvscr v0 li r4,VRSTATE_VSCR stvx v0,r4,r7 + lvx v0,0,r7 blr #ifdef CONFIG_VSX diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index b33358ee6424..074263429faf 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -19,13 +19,11 @@ if VIRTUALIZATION config KVM bool - select PREEMPT_NOTIFIERS - select HAVE_KVM_EVENTFD + select KVM_COMMON select HAVE_KVM_VCPU_ASYNC_IOCTL select KVM_VFIO select IRQ_BYPASS_MANAGER select HAVE_KVM_IRQ_BYPASS - select INTERVAL_TREE config KVM_BOOK3S_HANDLER bool @@ -225,7 +223,6 @@ config KVM_MPIC bool "KVM in-kernel MPIC emulation" depends on KVM && PPC_E500 select HAVE_KVM_IRQCHIP - select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_MSI help @@ -238,7 +235,6 @@ config KVM_XICS bool "KVM in-kernel XICS emulation" depends on KVM_BOOK3S_64 && !KVM_MPIC select HAVE_KVM_IRQCHIP - select HAVE_KVM_IRQFD default y help Include support for the XICS (eXternal Interrupt Controller diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index c39945a7fce3..23407fbd73c9 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -528,7 +528,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ENABLE_CAP: case KVM_CAP_ONE_REG: case KVM_CAP_IOEVENTFD: - case KVM_CAP_DEVICE_CTRL: case KVM_CAP_IMMEDIATE_EXIT: case KVM_CAP_SET_GUEST_DEBUG: r = 1; @@ -578,7 +577,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; #endif -#ifdef CONFIG_HAVE_KVM_IRQFD +#ifdef CONFIG_HAVE_KVM_IRQCHIP case KVM_CAP_IRQFD_RESAMPLE: r = !xive_enabled(); break; diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index b1f25bac280b..71d52a670d95 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -385,11 +385,15 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, * same fault IRQ is not freed by the OS before. */ mutex_lock(&vas_pseries_mutex); - if (migration_in_progress) + if (migration_in_progress) { rc = -EBUSY; - else + } else { rc = allocate_setup_window(txwin, (u64 *)&domain[0], cop_feat_caps->win_type); + if (!rc) + caps->nr_open_wins_progress++; + } + mutex_unlock(&vas_pseries_mutex); if (rc) goto out; @@ -404,8 +408,17 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, goto out_free; txwin->win_type = cop_feat_caps->win_type; - mutex_lock(&vas_pseries_mutex); + /* + * The migration SUSPEND thread sets migration_in_progress and + * closes all open windows from the list. But the window is + * added to the list after open and modify HCALLs. So possible + * that migration_in_progress is set before modify HCALL which + * may cause some windows are still open when the hypervisor + * initiates the migration. + * So checks the migration_in_progress flag again and close all + * open windows. + * * Possible to lose the acquired credit with DLPAR core * removal after the window is opened. So if there are any * closed windows (means with lost credits), do not give new @@ -413,9 +426,11 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, * after the existing windows are reopened when credits are * available. */ - if (!caps->nr_close_wins) { + mutex_lock(&vas_pseries_mutex); + if (!caps->nr_close_wins && !migration_in_progress) { list_add(&txwin->win_list, &caps->list); caps->nr_open_windows++; + caps->nr_open_wins_progress--; mutex_unlock(&vas_pseries_mutex); vas_user_win_add_mm_context(&txwin->vas_win.task_ref); return &txwin->vas_win; @@ -433,6 +448,12 @@ out_free: */ free_irq_setup(txwin); h_deallocate_vas_window(txwin->vas_win.winid); + /* + * Hold mutex and reduce nr_open_wins_progress counter. + */ + mutex_lock(&vas_pseries_mutex); + caps->nr_open_wins_progress--; + mutex_unlock(&vas_pseries_mutex); out: atomic_dec(&cop_feat_caps->nr_used_credits); kfree(txwin); @@ -937,14 +958,14 @@ int vas_migration_handler(int action) struct vas_caps *vcaps; int i, rc = 0; + pr_info("VAS migration event %d\n", action); + /* * NX-GZIP is not enabled. Nothing to do for migration. */ if (!copypaste_feat) return rc; - mutex_lock(&vas_pseries_mutex); - if (action == VAS_SUSPEND) migration_in_progress = true; else @@ -990,12 +1011,27 @@ int vas_migration_handler(int action) switch (action) { case VAS_SUSPEND: + mutex_lock(&vas_pseries_mutex); rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows, true); + /* + * Windows are included in the list after successful + * open. So wait for closing these in-progress open + * windows in vas_allocate_window() which will be + * done if the migration_in_progress is set. + */ + while (vcaps->nr_open_wins_progress) { + mutex_unlock(&vas_pseries_mutex); + msleep(10); + mutex_lock(&vas_pseries_mutex); + } + mutex_unlock(&vas_pseries_mutex); break; case VAS_RESUME: + mutex_lock(&vas_pseries_mutex); atomic_set(&caps->nr_total_credits, new_nr_creds); rc = reconfig_open_windows(vcaps, new_nr_creds, true); + mutex_unlock(&vas_pseries_mutex); break; default: /* should not happen */ @@ -1011,8 +1047,9 @@ int vas_migration_handler(int action) goto out; } + pr_info("VAS migration event (%d) successful\n", action); + out: - mutex_unlock(&vas_pseries_mutex); return rc; } diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index 7115043ec488..45567cd13178 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -91,6 +91,8 @@ struct vas_cop_feat_caps { struct vas_caps { struct vas_cop_feat_caps caps; struct list_head list; /* List of open windows */ + int nr_open_wins_progress; /* Number of open windows in */ + /* progress. Used in migration */ int nr_close_wins; /* closed windows in the hypervisor for DLPAR */ int nr_open_windows; /* Number of successful open windows */ u8 feat; /* Feature type */ diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 95a2a06acc6a..ef8b7b012a0b 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -685,7 +685,7 @@ config RISCV_BOOT_SPINWAIT If unsure what to do here, say N. config ARCH_SUPPORTS_KEXEC - def_bool MMU + def_bool y config ARCH_SELECTS_KEXEC def_bool y @@ -693,7 +693,7 @@ config ARCH_SELECTS_KEXEC select HOTPLUG_CPU if SMP config ARCH_SUPPORTS_KEXEC_FILE - def_bool 64BIT && MMU + def_bool 64BIT config ARCH_SELECTS_KEXEC_FILE def_bool y @@ -724,6 +724,25 @@ config COMPAT If you want to execute 32-bit userspace applications, say Y. +config PARAVIRT + bool "Enable paravirtualization code" + depends on RISCV_SBI + help + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. + +config PARAVIRT_TIME_ACCOUNTING + bool "Paravirtual steal time accounting" + depends on PARAVIRT + help + Select this option to enable fine granularity task steal time + accounting. Time spent executing other tasks in parallel with + the current vCPU is discounted from the vCPU power. To account for + that, there can be a small performance impact. + + If in doubt, say N here. + config RELOCATABLE bool "Build a relocatable kernel" depends on MMU && 64BIT && !XIP_KERNEL diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts index 90b261114763..dce96f27cc89 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts @@ -8,9 +8,6 @@ #include <dt-bindings/gpio/gpio.h> #include <dt-bindings/leds/common.h> -/* Clock frequency (in Hz) of the rtcclk */ -#define RTCCLK_FREQ 1000000 - / { model = "Microchip PolarFire-SoC Icicle Kit"; compatible = "microchip,mpfs-icicle-reference-rtlv2210", "microchip,mpfs-icicle-kit", @@ -29,10 +26,6 @@ stdout-path = "serial1:115200n8"; }; - cpus { - timebase-frequency = <RTCCLK_FREQ>; - }; - leds { compatible = "gpio-leds"; diff --git a/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts b/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts index 184cb36a175e..a8d623ee9fa4 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts @@ -10,9 +10,6 @@ #include "mpfs.dtsi" #include "mpfs-m100pfs-fabric.dtsi" -/* Clock frequency (in Hz) of the rtcclk */ -#define MTIMER_FREQ 1000000 - / { model = "Aries Embedded M100PFEVPS"; compatible = "aries,m100pfsevp", "microchip,mpfs"; @@ -33,10 +30,6 @@ stdout-path = "serial1:115200n8"; }; - cpus { - timebase-frequency = <MTIMER_FREQ>; - }; - ddrc_cache_lo: memory@80000000 { device_type = "memory"; reg = <0x0 0x80000000 0x0 0x40000000>; diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts index c87cc2d8fe29..ea0808ab1042 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts @@ -6,9 +6,6 @@ #include "mpfs.dtsi" #include "mpfs-polarberry-fabric.dtsi" -/* Clock frequency (in Hz) of the rtcclk */ -#define MTIMER_FREQ 1000000 - / { model = "Sundance PolarBerry"; compatible = "sundance,polarberry", "microchip,mpfs"; @@ -22,10 +19,6 @@ stdout-path = "serial0:115200n8"; }; - cpus { - timebase-frequency = <MTIMER_FREQ>; - }; - ddrc_cache_lo: memory@80000000 { device_type = "memory"; reg = <0x0 0x80000000 0x0 0x2e000000>; diff --git a/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts index 013cb666c72d..f9a890579438 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts @@ -6,9 +6,6 @@ #include "mpfs.dtsi" #include "mpfs-sev-kit-fabric.dtsi" -/* Clock frequency (in Hz) of the rtcclk */ -#define MTIMER_FREQ 1000000 - / { #address-cells = <2>; #size-cells = <2>; @@ -28,10 +25,6 @@ stdout-path = "serial1:115200n8"; }; - cpus { - timebase-frequency = <MTIMER_FREQ>; - }; - reserved-memory { #address-cells = <2>; #size-cells = <2>; diff --git a/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts b/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts index e0797c7e1b35..d1120f5f2c01 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts @@ -11,9 +11,6 @@ #include "mpfs.dtsi" #include "mpfs-tysom-m-fabric.dtsi" -/* Clock frequency (in Hz) of the rtcclk */ -#define MTIMER_FREQ 1000000 - / { model = "Aldec TySOM-M-MPFS250T-REV2"; compatible = "aldec,tysom-m-mpfs250t-rev2", "microchip,mpfs"; @@ -34,10 +31,6 @@ stdout-path = "serial1:115200n8"; }; - cpus { - timebase-frequency = <MTIMER_FREQ>; - }; - ddrc_cache_lo: memory@80000000 { device_type = "memory"; reg = <0x0 0x80000000 0x0 0x30000000>; diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi index a6faf24f1dba..266489d43912 100644 --- a/arch/riscv/boot/dts/microchip/mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi @@ -13,6 +13,7 @@ cpus { #address-cells = <1>; #size-cells = <0>; + timebase-frequency = <1000000>; cpu0: cpu@0 { compatible = "sifive,e51", "sifive,rocket0", "riscv"; diff --git a/arch/riscv/boot/dts/sophgo/cv1800b.dtsi b/arch/riscv/boot/dts/sophgo/cv1800b.dtsi index df40e87ee063..aec6401a467b 100644 --- a/arch/riscv/boot/dts/sophgo/cv1800b.dtsi +++ b/arch/riscv/boot/dts/sophgo/cv1800b.dtsi @@ -34,7 +34,6 @@ cpu0_intc: interrupt-controller { compatible = "riscv,cpu-intc"; interrupt-controller; - #address-cells = <0>; #interrupt-cells = <1>; }; }; diff --git a/arch/riscv/errata/andes/errata.c b/arch/riscv/errata/andes/errata.c index 197db68cc8da..17a904869724 100644 --- a/arch/riscv/errata/andes/errata.c +++ b/arch/riscv/errata/andes/errata.c @@ -38,29 +38,35 @@ static long ax45mp_iocp_sw_workaround(void) return ret.error ? 0 : ret.value; } -static bool errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigned long impid) +static void errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigned long impid) { + static bool done; + if (!IS_ENABLED(CONFIG_ERRATA_ANDES_CMO)) - return false; + return; + + if (done) + return; + + done = true; if (arch_id != ANDESTECH_AX45MP_MARCHID || impid != ANDESTECH_AX45MP_MIMPID) - return false; + return; if (!ax45mp_iocp_sw_workaround()) - return false; + return; /* Set this just to make core cbo code happy */ riscv_cbom_block_size = 1; riscv_noncoherent_supported(); - - return true; } void __init_or_module andes_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, unsigned long archid, unsigned long impid, unsigned int stage) { - errata_probe_iocp(stage, archid, impid); + if (stage == RISCV_ALTERNATIVES_BOOT) + errata_probe_iocp(stage, archid, impid); /* we have nothing to patch here ATM so just return back */ } diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 6964dd235e97..484d04a92fa6 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -41,6 +41,7 @@ KVM_ARCH_REQ_FLAGS(4, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_HFENCE \ KVM_ARCH_REQ_FLAGS(5, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(6) enum kvm_riscv_hfence_type { KVM_RISCV_HFENCE_UNKNOWN = 0, @@ -262,6 +263,12 @@ struct kvm_vcpu_arch { /* 'static' configurations which are set only once */ struct kvm_vcpu_config cfg; + + /* SBI steal-time accounting */ + struct { + gpa_t shmem; + u64 last_steal; + } sta; }; static inline void kvm_arch_sync_events(struct kvm *kvm) {} @@ -370,4 +377,7 @@ bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask); void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu); + #endif /* __RISCV_KVM_HOST_H__ */ diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h index 6a453f7f8b56..b96705258cf9 100644 --- a/arch/riscv/include/asm/kvm_vcpu_sbi.h +++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h @@ -15,9 +15,10 @@ #define KVM_SBI_VERSION_MINOR 0 enum kvm_riscv_sbi_ext_status { - KVM_RISCV_SBI_EXT_UNINITIALIZED, - KVM_RISCV_SBI_EXT_AVAILABLE, - KVM_RISCV_SBI_EXT_UNAVAILABLE, + KVM_RISCV_SBI_EXT_STATUS_UNINITIALIZED, + KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE, + KVM_RISCV_SBI_EXT_STATUS_ENABLED, + KVM_RISCV_SBI_EXT_STATUS_DISABLED, }; struct kvm_vcpu_sbi_context { @@ -36,7 +37,7 @@ struct kvm_vcpu_sbi_extension { unsigned long extid_start; unsigned long extid_end; - bool default_unavail; + bool default_disabled; /** * SBI extension handler. It can be defined for a given extension or group of @@ -59,11 +60,21 @@ int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +int kvm_riscv_vcpu_set_reg_sbi(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg); +int kvm_riscv_vcpu_get_reg_sbi(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg); const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext( struct kvm_vcpu *vcpu, unsigned long extid); +bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx); int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run); void kvm_riscv_vcpu_sbi_init(struct kvm_vcpu *vcpu); +int kvm_riscv_vcpu_get_reg_sbi_sta(struct kvm_vcpu *vcpu, unsigned long reg_num, + unsigned long *reg_val); +int kvm_riscv_vcpu_set_reg_sbi_sta(struct kvm_vcpu *vcpu, unsigned long reg_num, + unsigned long reg_val); + #ifdef CONFIG_RISCV_SBI_V01 extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01; #endif @@ -74,6 +85,7 @@ extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_dbcn; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; diff --git a/arch/riscv/include/asm/paravirt.h b/arch/riscv/include/asm/paravirt.h new file mode 100644 index 000000000000..c0abde70fc2c --- /dev/null +++ b/arch/riscv/include/asm/paravirt.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_PARAVIRT_H +#define _ASM_RISCV_PARAVIRT_H + +#ifdef CONFIG_PARAVIRT +#include <linux/static_call_types.h> + +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; + +u64 dummy_steal_clock(int cpu); + +DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); + +static inline u64 paravirt_steal_clock(int cpu) +{ + return static_call(pv_steal_clock)(cpu); +} + +int __init pv_time_init(void); + +#else + +#define pv_time_init() do {} while (0) + +#endif /* CONFIG_PARAVIRT */ +#endif /* _ASM_RISCV_PARAVIRT_H */ diff --git a/arch/riscv/include/asm/paravirt_api_clock.h b/arch/riscv/include/asm/paravirt_api_clock.h new file mode 100644 index 000000000000..65ac7cee0dad --- /dev/null +++ b/arch/riscv/include/asm/paravirt_api_clock.h @@ -0,0 +1 @@ +#include <asm/paravirt.h> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 294044429e8e..ab00235b018f 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -899,7 +899,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) #define PAGE_KERNEL __pgprot(0) #define swapper_pg_dir NULL #define TASK_SIZE 0xffffffffUL -#define VMALLOC_START 0 +#define VMALLOC_START _AC(0, UL) #define VMALLOC_END TASK_SIZE #endif /* !CONFIG_MMU */ diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 0892f4421bc4..b6f898c56940 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -31,6 +31,7 @@ enum sbi_ext_id { SBI_EXT_SRST = 0x53525354, SBI_EXT_PMU = 0x504D55, SBI_EXT_DBCN = 0x4442434E, + SBI_EXT_STA = 0x535441, /* Experimentals extensions must lie within this range */ SBI_EXT_EXPERIMENTAL_START = 0x08000000, @@ -243,6 +244,22 @@ enum sbi_ext_dbcn_fid { SBI_EXT_DBCN_CONSOLE_WRITE_BYTE = 2, }; +/* SBI STA (steal-time accounting) extension */ +enum sbi_ext_sta_fid { + SBI_EXT_STA_STEAL_TIME_SET_SHMEM = 0, +}; + +struct sbi_sta_struct { + __le32 sequence; + __le32 flags; + __le64 steal; + u8 preempted; + u8 pad[47]; +} __packed; + +#define SBI_STA_SHMEM_DISABLE -1 + +/* SBI spec version fields */ #define SBI_SPEC_VERSION_DEFAULT 0x1 #define SBI_SPEC_VERSION_MAJOR_SHIFT 24 #define SBI_SPEC_VERSION_MAJOR_MASK 0x7f diff --git a/arch/riscv/include/asm/syscall_wrapper.h b/arch/riscv/include/asm/syscall_wrapper.h index 1d7942c8a6cb..eeec04b7dae6 100644 --- a/arch/riscv/include/asm/syscall_wrapper.h +++ b/arch/riscv/include/asm/syscall_wrapper.h @@ -46,9 +46,6 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *); return sys_ni_syscall(); \ } -#define COMPAT_SYS_NI(name) \ - SYSCALL_ALIAS(__riscv_compat_sys_##name, sys_ni_posix_timers); - #endif /* CONFIG_COMPAT */ #define __SYSCALL_DEFINEx(x, name, ...) \ @@ -82,6 +79,4 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *); return sys_ni_syscall(); \ } -#define SYS_NI(name) SYSCALL_ALIAS(__riscv_sys_##name, sys_ni_posix_timers); - #endif /* __ASM_SYSCALL_WRAPPER_H */ diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 60d3b21dead7..d6b7a5b95874 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -157,9 +157,16 @@ enum KVM_RISCV_SBI_EXT_ID { KVM_RISCV_SBI_EXT_EXPERIMENTAL, KVM_RISCV_SBI_EXT_VENDOR, KVM_RISCV_SBI_EXT_DBCN, + KVM_RISCV_SBI_EXT_STA, KVM_RISCV_SBI_EXT_MAX, }; +/* SBI STA extension registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ +struct kvm_riscv_sbi_sta { + unsigned long shmem_lo; + unsigned long shmem_hi; +}; + /* Possible states for kvm_riscv_timer */ #define KVM_RISCV_TIMER_STATE_OFF 0 #define KVM_RISCV_TIMER_STATE_ON 1 @@ -241,6 +248,12 @@ enum KVM_RISCV_SBI_EXT_ID { #define KVM_REG_RISCV_VECTOR_REG(n) \ ((n) + sizeof(struct __riscv_v_ext_state) / sizeof(unsigned long)) +/* Registers for specific SBI extensions are mapped as type 10 */ +#define KVM_REG_RISCV_SBI_STATE (0x0a << KVM_REG_RISCV_TYPE_SHIFT) +#define KVM_REG_RISCV_SBI_STA (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT) +#define KVM_REG_RISCV_SBI_STA_REG(name) \ + (offsetof(struct kvm_riscv_sbi_sta, name) / sizeof(unsigned long)) + /* Device Control API: RISC-V AIA */ #define KVM_DEV_RISCV_APLIC_ALIGN 0x1000 #define KVM_DEV_RISCV_APLIC_SIZE 0x4000 diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index fee22a3d1b53..807c2bde1f83 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -85,6 +85,7 @@ obj-$(CONFIG_SMP) += sbi-ipi.o obj-$(CONFIG_SMP) += cpu_ops_sbi.o endif obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o +obj-$(CONFIG_PARAVIRT) += paravirt.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o diff --git a/arch/riscv/kernel/crash_core.c b/arch/riscv/kernel/crash_core.c index 55f1d7856b54..8706736fd4e2 100644 --- a/arch/riscv/kernel/crash_core.c +++ b/arch/riscv/kernel/crash_core.c @@ -5,18 +5,20 @@ void arch_crash_save_vmcoreinfo(void) { - VMCOREINFO_NUMBER(VA_BITS); VMCOREINFO_NUMBER(phys_ram_base); vmcoreinfo_append_str("NUMBER(PAGE_OFFSET)=0x%lx\n", PAGE_OFFSET); vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START); vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END); +#ifdef CONFIG_MMU + VMCOREINFO_NUMBER(VA_BITS); vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START); vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END); #ifdef CONFIG_64BIT vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR); vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END); #endif +#endif vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR); vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n", kernel_map.va_kernel_pa_offset); diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index b77397432403..76ace1e0b46f 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -154,7 +154,6 @@ secondary_start_sbi: XIP_FIXUP_OFFSET a3 add a3, a3, a1 REG_L sp, (a3) - scs_load_current .Lsecondary_start_common: @@ -165,6 +164,7 @@ secondary_start_sbi: call relocate_enable_mmu #endif call .Lsetup_trap_vector + scs_load_current tail smp_callin #endif /* CONFIG_SMP */ diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 56a8c78e9e21..aac019ed63b1 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -40,15 +40,6 @@ struct relocation_handlers { long buffer); }; -unsigned int initialize_relocation_hashtable(unsigned int num_relocations); -void process_accumulated_relocations(struct module *me); -int add_relocation_to_accumulate(struct module *me, int type, void *location, - unsigned int hashtable_bits, Elf_Addr v); - -struct hlist_head *relocation_hashtable; - -struct list_head used_buckets_list; - /* * The auipc+jalr instruction pair can reach any PC-relative offset * in the range [-2^31 - 2^11, 2^31 - 2^11) @@ -64,7 +55,7 @@ static bool riscv_insn_valid_32bit_offset(ptrdiff_t val) static int riscv_insn_rmw(void *location, u32 keep, u32 set) { - u16 *parcel = location; + __le16 *parcel = location; u32 insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16; insn &= keep; @@ -77,7 +68,7 @@ static int riscv_insn_rmw(void *location, u32 keep, u32 set) static int riscv_insn_rvc_rmw(void *location, u16 keep, u16 set) { - u16 *parcel = location; + __le16 *parcel = location; u16 insn = le16_to_cpu(*parcel); insn &= keep; @@ -604,7 +595,10 @@ static const struct relocation_handlers reloc_handlers[] = { /* 192-255 nonstandard ABI extensions */ }; -void process_accumulated_relocations(struct module *me) +static void +process_accumulated_relocations(struct module *me, + struct hlist_head **relocation_hashtable, + struct list_head *used_buckets_list) { /* * Only ADD/SUB/SET/ULEB128 should end up here. @@ -624,18 +618,25 @@ void process_accumulated_relocations(struct module *me) * - Each relocation entry for a location address */ struct used_bucket *bucket_iter; + struct used_bucket *bucket_iter_tmp; struct relocation_head *rel_head_iter; + struct hlist_node *rel_head_iter_tmp; struct relocation_entry *rel_entry_iter; + struct relocation_entry *rel_entry_iter_tmp; int curr_type; void *location; long buffer; - list_for_each_entry(bucket_iter, &used_buckets_list, head) { - hlist_for_each_entry(rel_head_iter, bucket_iter->bucket, node) { + list_for_each_entry_safe(bucket_iter, bucket_iter_tmp, + used_buckets_list, head) { + hlist_for_each_entry_safe(rel_head_iter, rel_head_iter_tmp, + bucket_iter->bucket, node) { buffer = 0; location = rel_head_iter->location; - list_for_each_entry(rel_entry_iter, - rel_head_iter->rel_entry, head) { + list_for_each_entry_safe(rel_entry_iter, + rel_entry_iter_tmp, + rel_head_iter->rel_entry, + head) { curr_type = rel_entry_iter->type; reloc_handlers[curr_type].reloc_handler( me, &buffer, rel_entry_iter->value); @@ -648,11 +649,14 @@ void process_accumulated_relocations(struct module *me) kfree(bucket_iter); } - kfree(relocation_hashtable); + kfree(*relocation_hashtable); } -int add_relocation_to_accumulate(struct module *me, int type, void *location, - unsigned int hashtable_bits, Elf_Addr v) +static int add_relocation_to_accumulate(struct module *me, int type, + void *location, + unsigned int hashtable_bits, Elf_Addr v, + struct hlist_head *relocation_hashtable, + struct list_head *used_buckets_list) { struct relocation_entry *entry; struct relocation_head *rel_head; @@ -661,6 +665,10 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location, unsigned long hash; entry = kmalloc(sizeof(*entry), GFP_KERNEL); + + if (!entry) + return -ENOMEM; + INIT_LIST_HEAD(&entry->head); entry->type = type; entry->value = v; @@ -669,7 +677,10 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location, current_head = &relocation_hashtable[hash]; - /* Find matching location (if any) */ + /* + * Search for the relocation_head for the relocations that happen at the + * provided location + */ bool found = false; struct relocation_head *rel_head_iter; @@ -681,19 +692,45 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location, } } + /* + * If there has not yet been any relocations at the provided location, + * create a relocation_head for that location and populate it with this + * relocation_entry. + */ if (!found) { rel_head = kmalloc(sizeof(*rel_head), GFP_KERNEL); + + if (!rel_head) { + kfree(entry); + return -ENOMEM; + } + rel_head->rel_entry = kmalloc(sizeof(struct list_head), GFP_KERNEL); + + if (!rel_head->rel_entry) { + kfree(entry); + kfree(rel_head); + return -ENOMEM; + } + INIT_LIST_HEAD(rel_head->rel_entry); rel_head->location = location; INIT_HLIST_NODE(&rel_head->node); if (!current_head->first) { bucket = kmalloc(sizeof(struct used_bucket), GFP_KERNEL); + + if (!bucket) { + kfree(entry); + kfree(rel_head); + kfree(rel_head->rel_entry); + return -ENOMEM; + } + INIT_LIST_HEAD(&bucket->head); bucket->bucket = current_head; - list_add(&bucket->head, &used_buckets_list); + list_add(&bucket->head, used_buckets_list); } hlist_add_head(&rel_head->node, current_head); } @@ -704,7 +741,9 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location, return 0; } -unsigned int initialize_relocation_hashtable(unsigned int num_relocations) +static unsigned int +initialize_relocation_hashtable(unsigned int num_relocations, + struct hlist_head **relocation_hashtable) { /* Can safely assume that bits is not greater than sizeof(long) */ unsigned long hashtable_size = roundup_pow_of_two(num_relocations); @@ -720,12 +759,13 @@ unsigned int initialize_relocation_hashtable(unsigned int num_relocations) hashtable_size <<= should_double_size; - relocation_hashtable = kmalloc_array(hashtable_size, - sizeof(*relocation_hashtable), - GFP_KERNEL); - __hash_init(relocation_hashtable, hashtable_size); + *relocation_hashtable = kmalloc_array(hashtable_size, + sizeof(*relocation_hashtable), + GFP_KERNEL); + if (!*relocation_hashtable) + return -ENOMEM; - INIT_LIST_HEAD(&used_buckets_list); + __hash_init(*relocation_hashtable, hashtable_size); return hashtable_bits; } @@ -742,7 +782,17 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, Elf_Addr v; int res; unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel); - unsigned int hashtable_bits = initialize_relocation_hashtable(num_relocations); + struct hlist_head *relocation_hashtable; + struct list_head used_buckets_list; + unsigned int hashtable_bits; + + hashtable_bits = initialize_relocation_hashtable(num_relocations, + &relocation_hashtable); + + if (hashtable_bits < 0) + return hashtable_bits; + + INIT_LIST_HEAD(&used_buckets_list); pr_debug("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); @@ -823,14 +873,18 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, } if (reloc_handlers[type].accumulate_handler) - res = add_relocation_to_accumulate(me, type, location, hashtable_bits, v); + res = add_relocation_to_accumulate(me, type, location, + hashtable_bits, v, + relocation_hashtable, + &used_buckets_list); else res = handler(me, location, v); if (res) return res; } - process_accumulated_relocations(me); + process_accumulated_relocations(me, &relocation_hashtable, + &used_buckets_list); return 0; } diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c new file mode 100644 index 000000000000..8e114f5930ce --- /dev/null +++ b/arch/riscv/kernel/paravirt.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023 Ventana Micro Systems Inc. + */ + +#define pr_fmt(fmt) "riscv-pv: " fmt + +#include <linux/cpuhotplug.h> +#include <linux/compiler.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/jump_label.h> +#include <linux/kconfig.h> +#include <linux/kernel.h> +#include <linux/percpu-defs.h> +#include <linux/printk.h> +#include <linux/static_call.h> +#include <linux/types.h> + +#include <asm/barrier.h> +#include <asm/page.h> +#include <asm/paravirt.h> +#include <asm/sbi.h> + +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; + +static u64 native_steal_clock(int cpu) +{ + return 0; +} + +DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); + +static bool steal_acc = true; +static int __init parse_no_stealacc(char *arg) +{ + steal_acc = false; + return 0; +} + +early_param("no-steal-acc", parse_no_stealacc); + +DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64); + +static bool __init has_pv_steal_clock(void) +{ + if (sbi_spec_version >= sbi_mk_version(2, 0) && + sbi_probe_extension(SBI_EXT_STA) > 0) { + pr_info("SBI STA extension detected\n"); + return true; + } + + return false; +} + +static int sbi_sta_steal_time_set_shmem(unsigned long lo, unsigned long hi, + unsigned long flags) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_STEAL_TIME_SET_SHMEM, + lo, hi, flags, 0, 0, 0); + if (ret.error) { + if (lo == SBI_STA_SHMEM_DISABLE && hi == SBI_STA_SHMEM_DISABLE) + pr_warn("Failed to disable steal-time shmem"); + else + pr_warn("Failed to set steal-time shmem"); + return sbi_err_map_linux_errno(ret.error); + } + + return 0; +} + +static int pv_time_cpu_online(unsigned int cpu) +{ + struct sbi_sta_struct *st = this_cpu_ptr(&steal_time); + phys_addr_t pa = __pa(st); + unsigned long lo = (unsigned long)pa; + unsigned long hi = IS_ENABLED(CONFIG_32BIT) ? upper_32_bits((u64)pa) : 0; + + return sbi_sta_steal_time_set_shmem(lo, hi, 0); +} + +static int pv_time_cpu_down_prepare(unsigned int cpu) +{ + return sbi_sta_steal_time_set_shmem(SBI_STA_SHMEM_DISABLE, + SBI_STA_SHMEM_DISABLE, 0); +} + +static u64 pv_time_steal_clock(int cpu) +{ + struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu); + u32 sequence; + u64 steal; + + /* + * Check the sequence field before and after reading the steal + * field. Repeat the read if it is different or odd. + */ + do { + sequence = READ_ONCE(st->sequence); + virt_rmb(); + steal = READ_ONCE(st->steal); + virt_rmb(); + } while ((le32_to_cpu(sequence) & 1) || + sequence != READ_ONCE(st->sequence)); + + return le64_to_cpu(steal); +} + +int __init pv_time_init(void) +{ + int ret; + + if (!has_pv_steal_clock()) + return 0; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "riscv/pv_time:online", + pv_time_cpu_online, + pv_time_cpu_down_prepare); + if (ret < 0) + return ret; + + static_call_update(pv_steal_clock, pv_time_steal_clock); + + static_key_slow_inc(¶virt_steal_enabled); + if (steal_acc) + static_key_slow_inc(¶virt_steal_rq_enabled); + + pr_info("Computing paravirt steal-time\n"); + + return 0; +} diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index c712037dbe10..a2ca5b7756a5 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -169,7 +169,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, pair->value &= ~missing; } -static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext) +static bool hwprobe_ext0_has(const struct cpumask *cpus, u64 ext) { struct riscv_hwprobe pair; diff --git a/arch/riscv/kernel/tests/module_test/test_uleb128.S b/arch/riscv/kernel/tests/module_test/test_uleb128.S index 90f22049d553..8515ed7cd8c1 100644 --- a/arch/riscv/kernel/tests/module_test/test_uleb128.S +++ b/arch/riscv/kernel/tests/module_test/test_uleb128.S @@ -6,13 +6,13 @@ .text .global test_uleb_basic test_uleb_basic: - ld a0, second + lw a0, second addi a0, a0, -127 ret .global test_uleb_large test_uleb_large: - ld a0, fourth + lw a0, fourth addi a0, a0, -0x07e8 ret @@ -22,10 +22,10 @@ first: second: .reloc second, R_RISCV_SET_ULEB128, second .reloc second, R_RISCV_SUB_ULEB128, first - .dword 0 + .word 0 third: .space 1000 fourth: .reloc fourth, R_RISCV_SET_ULEB128, fourth .reloc fourth, R_RISCV_SUB_ULEB128, third - .dword 0 + .word 0 diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c index 23641e82a9df..ba3477197789 100644 --- a/arch/riscv/kernel/time.c +++ b/arch/riscv/kernel/time.c @@ -12,6 +12,7 @@ #include <asm/sbi.h> #include <asm/processor.h> #include <asm/timex.h> +#include <asm/paravirt.h> unsigned long riscv_timebase __ro_after_init; EXPORT_SYMBOL_GPL(riscv_timebase); @@ -45,4 +46,6 @@ void __init time_init(void) timer_probe(); tick_setup_hrtimer_broadcast(); + + pv_time_init(); } diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 5eba37147caa..5255f8134aef 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -550,16 +550,14 @@ int handle_misaligned_store(struct pt_regs *regs) } else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) { len = 8; val.data_ulong = GET_RS2S(insn, regs); - } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP && - ((insn >> SH_RD) & 0x1f)) { + } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP) { len = 8; val.data_ulong = GET_RS2C(insn, regs); #endif } else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) { len = 4; val.data_ulong = GET_RS2S(insn, regs); - } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP && - ((insn >> SH_RD) & 0x1f)) { + } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP) { len = 4; val.data_ulong = GET_RS2C(insn, regs); } else if ((insn & INSN_MASK_C_FSD) == INSN_MATCH_C_FSD) { diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig index ae2e05f050ec..d490db943858 100644 --- a/arch/riscv/kvm/Kconfig +++ b/arch/riscv/kvm/Kconfig @@ -20,18 +20,17 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)" depends on RISCV_SBI && MMU - select HAVE_KVM_EVENTFD select HAVE_KVM_IRQCHIP - select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_MSI select HAVE_KVM_VCPU_ASYNC_IOCTL + select KVM_COMMON select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_GENERIC_HARDWARE_ENABLING select KVM_MMIO select KVM_XFER_TO_GUEST_WORK select KVM_GENERIC_MMU_NOTIFIER - select PREEMPT_NOTIFIERS + select SCHED_INFO help Support hosting virtualized guest machines. diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index 4c2067fc59fc..c9646521f113 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -26,6 +26,7 @@ kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o kvm-y += vcpu_sbi_base.o kvm-y += vcpu_sbi_replace.o kvm-y += vcpu_sbi_hsm.o +kvm-y += vcpu_sbi_sta.o kvm-y += vcpu_timer.o kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o kvm-y += aia.o diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index 6cf23b8adb71..e808723a85f1 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -55,6 +55,7 @@ struct imsic { /* IMSIC SW-file */ struct imsic_mrif *swfile; phys_addr_t swfile_pa; + spinlock_t swfile_extirq_lock; }; #define imsic_vs_csr_read(__c) \ @@ -613,12 +614,23 @@ static void imsic_swfile_extirq_update(struct kvm_vcpu *vcpu) { struct imsic *imsic = vcpu->arch.aia_context.imsic_state; struct imsic_mrif *mrif = imsic->swfile; + unsigned long flags; + + /* + * The critical section is necessary during external interrupt + * updates to avoid the risk of losing interrupts due to potential + * interruptions between reading topei and updating pending status. + */ + + spin_lock_irqsave(&imsic->swfile_extirq_lock, flags); if (imsic_mrif_atomic_read(mrif, &mrif->eidelivery) && imsic_mrif_topei(mrif, imsic->nr_eix, imsic->nr_msis)) kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT); else kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT); + + spin_unlock_irqrestore(&imsic->swfile_extirq_lock, flags); } static void imsic_swfile_read(struct kvm_vcpu *vcpu, bool clear, @@ -1039,6 +1051,7 @@ int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu) } imsic->swfile = page_to_virt(swfile_page); imsic->swfile_pa = page_to_phys(swfile_page); + spin_lock_init(&imsic->swfile_extirq_lock); /* Setup IO device */ kvm_iodevice_init(&imsic->iodev, &imsic_iodoev_ops); diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index e087c809073c..b5ca9f2e98ac 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -83,6 +83,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) vcpu->arch.hfence_tail = 0; memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue)); + kvm_riscv_vcpu_sbi_sta_reset(vcpu); + /* Reset the guest CSRs for hotplug usecase */ if (loaded) kvm_arch_vcpu_load(vcpu, smp_processor_id()); @@ -541,6 +543,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_riscv_vcpu_aia_load(vcpu, cpu); + kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); + vcpu->cpu = cpu; } @@ -614,6 +618,9 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_HFENCE, vcpu)) kvm_riscv_hfence_process(vcpu); + + if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) + kvm_riscv_vcpu_record_steal_time(vcpu); } } @@ -757,8 +764,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) /* Update HVIP CSR for current CPU */ kvm_riscv_update_hvip(vcpu); - if (ret <= 0 || - kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) || + if (kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) || kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending()) { vcpu->mode = OUTSIDE_GUEST_MODE; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index f8c9fa0c03c5..fc34557f5356 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -485,7 +485,7 @@ static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu, if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) rc = kvm_riscv_vcpu_smstateen_set_csr(vcpu, reg_num, reg_val); -break; + break; default: rc = -ENOENT; break; @@ -931,50 +931,106 @@ static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu) return copy_isa_ext_reg_indices(vcpu, NULL);; } -static inline unsigned long num_sbi_ext_regs(void) +static int copy_sbi_ext_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { - /* - * number of KVM_REG_RISCV_SBI_SINGLE + - * 2 x (number of KVM_REG_RISCV_SBI_MULTI) - */ - return KVM_RISCV_SBI_EXT_MAX + 2*(KVM_REG_RISCV_SBI_MULTI_REG_LAST+1); -} - -static int copy_sbi_ext_reg_indices(u64 __user *uindices) -{ - int n; + unsigned int n = 0; - /* copy KVM_REG_RISCV_SBI_SINGLE */ - n = KVM_RISCV_SBI_EXT_MAX; - for (int i = 0; i < n; i++) { + for (int i = 0; i < KVM_RISCV_SBI_EXT_MAX; i++) { u64 size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | i; + if (!riscv_vcpu_supports_sbi_ext(vcpu, i)) + continue; + if (uindices) { if (put_user(reg, uindices)) return -EFAULT; uindices++; } + + n++; } - /* copy KVM_REG_RISCV_SBI_MULTI */ - n = KVM_REG_RISCV_SBI_MULTI_REG_LAST + 1; - for (int i = 0; i < n; i++) { - u64 size = IS_ENABLED(CONFIG_32BIT) ? - KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; - u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT | - KVM_REG_RISCV_SBI_MULTI_EN | i; + return n; +} + +static unsigned long num_sbi_ext_regs(struct kvm_vcpu *vcpu) +{ + return copy_sbi_ext_reg_indices(vcpu, NULL); +} + +static int copy_sbi_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; + int total = 0; + + if (scontext->ext_status[KVM_RISCV_SBI_EXT_STA] == KVM_RISCV_SBI_EXT_STATUS_ENABLED) { + u64 size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; + int n = sizeof(struct kvm_riscv_sbi_sta) / sizeof(unsigned long); + + for (int i = 0; i < n; i++) { + u64 reg = KVM_REG_RISCV | size | + KVM_REG_RISCV_SBI_STATE | + KVM_REG_RISCV_SBI_STA | i; + + if (uindices) { + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + } + + total += n; + } + + return total; +} + +static inline unsigned long num_sbi_regs(struct kvm_vcpu *vcpu) +{ + return copy_sbi_reg_indices(vcpu, NULL); +} + +static inline unsigned long num_vector_regs(const struct kvm_vcpu *vcpu) +{ + if (!riscv_isa_extension_available(vcpu->arch.isa, v)) + return 0; + + /* vstart, vl, vtype, vcsr, vlenb and 32 vector regs */ + return 37; +} + +static int copy_vector_reg_indices(const struct kvm_vcpu *vcpu, + u64 __user *uindices) +{ + const struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; + int n = num_vector_regs(vcpu); + u64 reg, size; + int i; + + if (n == 0) + return 0; + + /* copy vstart, vl, vtype, vcsr and vlenb */ + size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; + for (i = 0; i < 5; i++) { + reg = KVM_REG_RISCV | size | KVM_REG_RISCV_VECTOR | i; if (uindices) { if (put_user(reg, uindices)) return -EFAULT; uindices++; } + } - reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT | - KVM_REG_RISCV_SBI_MULTI_DIS | i; + /* vector_regs have a variable 'vlenb' size */ + size = __builtin_ctzl(cntx->vector.vlenb); + size <<= KVM_REG_SIZE_SHIFT; + for (i = 0; i < 32; i++) { + reg = KVM_REG_RISCV | KVM_REG_RISCV_VECTOR | size | + KVM_REG_RISCV_VECTOR_REG(i); if (uindices) { if (put_user(reg, uindices)) @@ -983,7 +1039,7 @@ static int copy_sbi_ext_reg_indices(u64 __user *uindices) } } - return num_sbi_ext_regs(); + return n; } /* @@ -1001,8 +1057,10 @@ unsigned long kvm_riscv_vcpu_num_regs(struct kvm_vcpu *vcpu) res += num_timer_regs(); res += num_fp_f_regs(vcpu); res += num_fp_d_regs(vcpu); + res += num_vector_regs(vcpu); res += num_isa_ext_regs(vcpu); - res += num_sbi_ext_regs(); + res += num_sbi_ext_regs(vcpu); + res += num_sbi_regs(vcpu); return res; } @@ -1045,14 +1103,25 @@ int kvm_riscv_vcpu_copy_reg_indices(struct kvm_vcpu *vcpu, return ret; uindices += ret; + ret = copy_vector_reg_indices(vcpu, uindices); + if (ret < 0) + return ret; + uindices += ret; + ret = copy_isa_ext_reg_indices(vcpu, uindices); if (ret < 0) return ret; uindices += ret; - ret = copy_sbi_ext_reg_indices(uindices); + ret = copy_sbi_ext_reg_indices(vcpu, uindices); if (ret < 0) return ret; + uindices += ret; + + ret = copy_sbi_reg_indices(vcpu, uindices); + if (ret < 0) + return ret; + uindices += ret; return 0; } @@ -1075,12 +1144,14 @@ int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu, case KVM_REG_RISCV_FP_D: return kvm_riscv_vcpu_set_reg_fp(vcpu, reg, KVM_REG_RISCV_FP_D); + case KVM_REG_RISCV_VECTOR: + return kvm_riscv_vcpu_set_reg_vector(vcpu, reg); case KVM_REG_RISCV_ISA_EXT: return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg); case KVM_REG_RISCV_SBI_EXT: return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg); - case KVM_REG_RISCV_VECTOR: - return kvm_riscv_vcpu_set_reg_vector(vcpu, reg); + case KVM_REG_RISCV_SBI_STATE: + return kvm_riscv_vcpu_set_reg_sbi(vcpu, reg); default: break; } @@ -1106,12 +1177,14 @@ int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu, case KVM_REG_RISCV_FP_D: return kvm_riscv_vcpu_get_reg_fp(vcpu, reg, KVM_REG_RISCV_FP_D); + case KVM_REG_RISCV_VECTOR: + return kvm_riscv_vcpu_get_reg_vector(vcpu, reg); case KVM_REG_RISCV_ISA_EXT: return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg); case KVM_REG_RISCV_SBI_EXT: return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg); - case KVM_REG_RISCV_VECTOR: - return kvm_riscv_vcpu_get_reg_vector(vcpu, reg); + case KVM_REG_RISCV_SBI_STATE: + return kvm_riscv_vcpu_get_reg_sbi(vcpu, reg); default: break; } diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index a04ff98085d9..72a2ffb8dcd1 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -71,6 +71,10 @@ static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = { .ext_ptr = &vcpu_sbi_ext_dbcn, }, { + .ext_idx = KVM_RISCV_SBI_EXT_STA, + .ext_ptr = &vcpu_sbi_ext_sta, + }, + { .ext_idx = KVM_RISCV_SBI_EXT_EXPERIMENTAL, .ext_ptr = &vcpu_sbi_ext_experimental, }, @@ -80,6 +84,34 @@ static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = { }, }; +static const struct kvm_riscv_sbi_extension_entry * +riscv_vcpu_get_sbi_ext(struct kvm_vcpu *vcpu, unsigned long idx) +{ + const struct kvm_riscv_sbi_extension_entry *sext = NULL; + + if (idx >= KVM_RISCV_SBI_EXT_MAX) + return NULL; + + for (int i = 0; i < ARRAY_SIZE(sbi_ext); i++) { + if (sbi_ext[i].ext_idx == idx) { + sext = &sbi_ext[i]; + break; + } + } + + return sext; +} + +bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx) +{ + struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; + const struct kvm_riscv_sbi_extension_entry *sext; + + sext = riscv_vcpu_get_sbi_ext(vcpu, idx); + + return sext && scontext->ext_status[sext->ext_idx] != KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE; +} + void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run) { struct kvm_cpu_context *cp = &vcpu->arch.guest_context; @@ -140,28 +172,19 @@ static int riscv_vcpu_set_sbi_ext_single(struct kvm_vcpu *vcpu, unsigned long reg_num, unsigned long reg_val) { - unsigned long i; - const struct kvm_riscv_sbi_extension_entry *sext = NULL; struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; - - if (reg_num >= KVM_RISCV_SBI_EXT_MAX) - return -ENOENT; + const struct kvm_riscv_sbi_extension_entry *sext; if (reg_val != 1 && reg_val != 0) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { - if (sbi_ext[i].ext_idx == reg_num) { - sext = &sbi_ext[i]; - break; - } - } - if (!sext) + sext = riscv_vcpu_get_sbi_ext(vcpu, reg_num); + if (!sext || scontext->ext_status[sext->ext_idx] == KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE) return -ENOENT; scontext->ext_status[sext->ext_idx] = (reg_val) ? - KVM_RISCV_SBI_EXT_AVAILABLE : - KVM_RISCV_SBI_EXT_UNAVAILABLE; + KVM_RISCV_SBI_EXT_STATUS_ENABLED : + KVM_RISCV_SBI_EXT_STATUS_DISABLED; return 0; } @@ -170,24 +193,16 @@ static int riscv_vcpu_get_sbi_ext_single(struct kvm_vcpu *vcpu, unsigned long reg_num, unsigned long *reg_val) { - unsigned long i; - const struct kvm_riscv_sbi_extension_entry *sext = NULL; struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; + const struct kvm_riscv_sbi_extension_entry *sext; - if (reg_num >= KVM_RISCV_SBI_EXT_MAX) - return -ENOENT; - - for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { - if (sbi_ext[i].ext_idx == reg_num) { - sext = &sbi_ext[i]; - break; - } - } - if (!sext) + sext = riscv_vcpu_get_sbi_ext(vcpu, reg_num); + if (!sext || scontext->ext_status[sext->ext_idx] == KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE) return -ENOENT; *reg_val = scontext->ext_status[sext->ext_idx] == - KVM_RISCV_SBI_EXT_AVAILABLE; + KVM_RISCV_SBI_EXT_STATUS_ENABLED; + return 0; } @@ -310,6 +325,69 @@ int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu, return 0; } +int kvm_riscv_vcpu_set_reg_sbi(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_SBI_STATE); + unsigned long reg_subtype, reg_val; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + + if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; + reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; + + switch (reg_subtype) { + case KVM_REG_RISCV_SBI_STA: + return kvm_riscv_vcpu_set_reg_sbi_sta(vcpu, reg_num, reg_val); + default: + return -EINVAL; + } + + return 0; +} + +int kvm_riscv_vcpu_get_reg_sbi(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_SBI_STATE); + unsigned long reg_subtype, reg_val; + int ret; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + + reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; + reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; + + switch (reg_subtype) { + case KVM_REG_RISCV_SBI_STA: + ret = kvm_riscv_vcpu_get_reg_sbi_sta(vcpu, reg_num, ®_val); + break; + default: + return -EINVAL; + } + + if (ret) + return ret; + + if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} + const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext( struct kvm_vcpu *vcpu, unsigned long extid) { @@ -325,7 +403,7 @@ const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext( if (ext->extid_start <= extid && ext->extid_end >= extid) { if (entry->ext_idx >= KVM_RISCV_SBI_EXT_MAX || scontext->ext_status[entry->ext_idx] == - KVM_RISCV_SBI_EXT_AVAILABLE) + KVM_RISCV_SBI_EXT_STATUS_ENABLED) return ext; return NULL; @@ -413,12 +491,12 @@ void kvm_riscv_vcpu_sbi_init(struct kvm_vcpu *vcpu) if (ext->probe && !ext->probe(vcpu)) { scontext->ext_status[entry->ext_idx] = - KVM_RISCV_SBI_EXT_UNAVAILABLE; + KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE; continue; } - scontext->ext_status[entry->ext_idx] = ext->default_unavail ? - KVM_RISCV_SBI_EXT_UNAVAILABLE : - KVM_RISCV_SBI_EXT_AVAILABLE; + scontext->ext_status[entry->ext_idx] = ext->default_disabled ? + KVM_RISCV_SBI_EXT_STATUS_DISABLED : + KVM_RISCV_SBI_EXT_STATUS_ENABLED; } } diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c index 23b57c931b15..9c2ab3dfa93a 100644 --- a/arch/riscv/kvm/vcpu_sbi_replace.c +++ b/arch/riscv/kvm/vcpu_sbi_replace.c @@ -204,6 +204,6 @@ static int kvm_sbi_ext_dbcn_handler(struct kvm_vcpu *vcpu, const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_dbcn = { .extid_start = SBI_EXT_DBCN, .extid_end = SBI_EXT_DBCN, - .default_unavail = true, + .default_disabled = true, .handler = kvm_sbi_ext_dbcn_handler, }; diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c new file mode 100644 index 000000000000..01f09fe8c3b0 --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_sta.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023 Ventana Micro Systems Inc. + */ + +#include <linux/kconfig.h> +#include <linux/kernel.h> +#include <linux/kvm_host.h> +#include <linux/mm.h> +#include <linux/sizes.h> + +#include <asm/bug.h> +#include <asm/current.h> +#include <asm/kvm_vcpu_sbi.h> +#include <asm/page.h> +#include <asm/sbi.h> +#include <asm/uaccess.h> + +void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu) +{ + vcpu->arch.sta.shmem = INVALID_GPA; + vcpu->arch.sta.last_steal = 0; +} + +void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu) +{ + gpa_t shmem = vcpu->arch.sta.shmem; + u64 last_steal = vcpu->arch.sta.last_steal; + u32 *sequence_ptr, sequence; + u64 *steal_ptr, steal; + unsigned long hva; + gfn_t gfn; + + if (shmem == INVALID_GPA) + return; + + /* + * shmem is 64-byte aligned (see the enforcement in + * kvm_sbi_sta_steal_time_set_shmem()) and the size of sbi_sta_struct + * is 64 bytes, so we know all its offsets are in the same page. + */ + gfn = shmem >> PAGE_SHIFT; + hva = kvm_vcpu_gfn_to_hva(vcpu, gfn); + + if (WARN_ON(kvm_is_error_hva(hva))) { + vcpu->arch.sta.shmem = INVALID_GPA; + return; + } + + sequence_ptr = (u32 *)(hva + offset_in_page(shmem) + + offsetof(struct sbi_sta_struct, sequence)); + steal_ptr = (u64 *)(hva + offset_in_page(shmem) + + offsetof(struct sbi_sta_struct, steal)); + + if (WARN_ON(get_user(sequence, sequence_ptr))) + return; + + sequence = le32_to_cpu(sequence); + sequence += 1; + + if (WARN_ON(put_user(cpu_to_le32(sequence), sequence_ptr))) + return; + + if (!WARN_ON(get_user(steal, steal_ptr))) { + steal = le64_to_cpu(steal); + vcpu->arch.sta.last_steal = READ_ONCE(current->sched_info.run_delay); + steal += vcpu->arch.sta.last_steal - last_steal; + WARN_ON(put_user(cpu_to_le64(steal), steal_ptr)); + } + + sequence += 1; + WARN_ON(put_user(cpu_to_le32(sequence), sequence_ptr)); + + kvm_vcpu_mark_page_dirty(vcpu, gfn); +} + +static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + unsigned long shmem_phys_lo = cp->a0; + unsigned long shmem_phys_hi = cp->a1; + u32 flags = cp->a2; + struct sbi_sta_struct zero_sta = {0}; + unsigned long hva; + bool writable; + gpa_t shmem; + int ret; + + if (flags != 0) + return SBI_ERR_INVALID_PARAM; + + if (shmem_phys_lo == SBI_STA_SHMEM_DISABLE && + shmem_phys_hi == SBI_STA_SHMEM_DISABLE) { + vcpu->arch.sta.shmem = INVALID_GPA; + return 0; + } + + if (shmem_phys_lo & (SZ_64 - 1)) + return SBI_ERR_INVALID_PARAM; + + shmem = shmem_phys_lo; + + if (shmem_phys_hi != 0) { + if (IS_ENABLED(CONFIG_32BIT)) + shmem |= ((gpa_t)shmem_phys_hi << 32); + else + return SBI_ERR_INVALID_ADDRESS; + } + + hva = kvm_vcpu_gfn_to_hva_prot(vcpu, shmem >> PAGE_SHIFT, &writable); + if (kvm_is_error_hva(hva) || !writable) + return SBI_ERR_INVALID_ADDRESS; + + ret = kvm_vcpu_write_guest(vcpu, shmem, &zero_sta, sizeof(zero_sta)); + if (ret) + return SBI_ERR_FAILURE; + + vcpu->arch.sta.shmem = shmem; + vcpu->arch.sta.last_steal = current->sched_info.run_delay; + + return 0; +} + +static int kvm_sbi_ext_sta_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_vcpu_sbi_return *retdata) +{ + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + unsigned long funcid = cp->a6; + int ret; + + switch (funcid) { + case SBI_EXT_STA_STEAL_TIME_SET_SHMEM: + ret = kvm_sbi_sta_steal_time_set_shmem(vcpu); + break; + default: + ret = SBI_ERR_NOT_SUPPORTED; + break; + } + + retdata->err_val = ret; + + return 0; +} + +static unsigned long kvm_sbi_ext_sta_probe(struct kvm_vcpu *vcpu) +{ + return !!sched_info_on(); +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta = { + .extid_start = SBI_EXT_STA, + .extid_end = SBI_EXT_STA, + .handler = kvm_sbi_ext_sta_handler, + .probe = kvm_sbi_ext_sta_probe, +}; + +int kvm_riscv_vcpu_get_reg_sbi_sta(struct kvm_vcpu *vcpu, + unsigned long reg_num, + unsigned long *reg_val) +{ + switch (reg_num) { + case KVM_REG_RISCV_SBI_STA_REG(shmem_lo): + *reg_val = (unsigned long)vcpu->arch.sta.shmem; + break; + case KVM_REG_RISCV_SBI_STA_REG(shmem_hi): + if (IS_ENABLED(CONFIG_32BIT)) + *reg_val = upper_32_bits(vcpu->arch.sta.shmem); + else + *reg_val = 0; + break; + default: + return -EINVAL; + } + + return 0; +} + +int kvm_riscv_vcpu_set_reg_sbi_sta(struct kvm_vcpu *vcpu, + unsigned long reg_num, + unsigned long reg_val) +{ + switch (reg_num) { + case KVM_REG_RISCV_SBI_STA_REG(shmem_lo): + if (IS_ENABLED(CONFIG_32BIT)) { + gpa_t hi = upper_32_bits(vcpu->arch.sta.shmem); + + vcpu->arch.sta.shmem = reg_val; + vcpu->arch.sta.shmem |= hi << 32; + } else { + vcpu->arch.sta.shmem = reg_val; + } + break; + case KVM_REG_RISCV_SBI_STA_REG(shmem_hi): + if (IS_ENABLED(CONFIG_32BIT)) { + gpa_t lo = lower_32_bits(vcpu->arch.sta.shmem); + + vcpu->arch.sta.shmem = ((gpa_t)reg_val << 32); + vcpu->arch.sta.shmem |= lo; + } else if (reg_val != 0) { + return -EINVAL; + } + break; + default: + return -EINVAL; + } + + return 0; +} diff --git a/arch/riscv/kvm/vcpu_switch.S b/arch/riscv/kvm/vcpu_switch.S index d74df8eb4d71..0c26189aa01c 100644 --- a/arch/riscv/kvm/vcpu_switch.S +++ b/arch/riscv/kvm/vcpu_switch.S @@ -15,7 +15,7 @@ .altmacro .option norelax -ENTRY(__kvm_riscv_switch_to) +SYM_FUNC_START(__kvm_riscv_switch_to) /* Save Host GPRs (except A0 and T0-T6) */ REG_S ra, (KVM_ARCH_HOST_RA)(a0) REG_S sp, (KVM_ARCH_HOST_SP)(a0) @@ -45,7 +45,7 @@ ENTRY(__kvm_riscv_switch_to) REG_L t0, (KVM_ARCH_GUEST_SSTATUS)(a0) REG_L t1, (KVM_ARCH_GUEST_HSTATUS)(a0) REG_L t2, (KVM_ARCH_GUEST_SCOUNTEREN)(a0) - la t4, __kvm_switch_return + la t4, .Lkvm_switch_return REG_L t5, (KVM_ARCH_GUEST_SEPC)(a0) /* Save Host and Restore Guest SSTATUS */ @@ -113,7 +113,7 @@ ENTRY(__kvm_riscv_switch_to) /* Back to Host */ .align 2 -__kvm_switch_return: +.Lkvm_switch_return: /* Swap Guest A0 with SSCRATCH */ csrrw a0, CSR_SSCRATCH, a0 @@ -208,9 +208,9 @@ __kvm_switch_return: /* Return to C code */ ret -ENDPROC(__kvm_riscv_switch_to) +SYM_FUNC_END(__kvm_riscv_switch_to) -ENTRY(__kvm_riscv_unpriv_trap) +SYM_CODE_START(__kvm_riscv_unpriv_trap) /* * We assume that faulting unpriv load/store instruction is * 4-byte long and blindly increment SEPC by 4. @@ -231,12 +231,10 @@ ENTRY(__kvm_riscv_unpriv_trap) csrr a1, CSR_HTINST REG_S a1, (KVM_ARCH_TRAP_HTINST)(a0) sret -ENDPROC(__kvm_riscv_unpriv_trap) +SYM_CODE_END(__kvm_riscv_unpriv_trap) #ifdef CONFIG_FPU - .align 3 - .global __kvm_riscv_fp_f_save -__kvm_riscv_fp_f_save: +SYM_FUNC_START(__kvm_riscv_fp_f_save) csrr t2, CSR_SSTATUS li t1, SR_FS csrs CSR_SSTATUS, t1 @@ -276,10 +274,9 @@ __kvm_riscv_fp_f_save: sw t0, KVM_ARCH_FP_F_FCSR(a0) csrw CSR_SSTATUS, t2 ret +SYM_FUNC_END(__kvm_riscv_fp_f_save) - .align 3 - .global __kvm_riscv_fp_d_save -__kvm_riscv_fp_d_save: +SYM_FUNC_START(__kvm_riscv_fp_d_save) csrr t2, CSR_SSTATUS li t1, SR_FS csrs CSR_SSTATUS, t1 @@ -319,10 +316,9 @@ __kvm_riscv_fp_d_save: sw t0, KVM_ARCH_FP_D_FCSR(a0) csrw CSR_SSTATUS, t2 ret +SYM_FUNC_END(__kvm_riscv_fp_d_save) - .align 3 - .global __kvm_riscv_fp_f_restore -__kvm_riscv_fp_f_restore: +SYM_FUNC_START(__kvm_riscv_fp_f_restore) csrr t2, CSR_SSTATUS li t1, SR_FS lw t0, KVM_ARCH_FP_F_FCSR(a0) @@ -362,10 +358,9 @@ __kvm_riscv_fp_f_restore: fscsr t0 csrw CSR_SSTATUS, t2 ret +SYM_FUNC_END(__kvm_riscv_fp_f_restore) - .align 3 - .global __kvm_riscv_fp_d_restore -__kvm_riscv_fp_d_restore: +SYM_FUNC_START(__kvm_riscv_fp_d_restore) csrr t2, CSR_SSTATUS li t1, SR_FS lw t0, KVM_ARCH_FP_D_FCSR(a0) @@ -405,4 +400,5 @@ __kvm_riscv_fp_d_restore: fscsr t0 csrw CSR_SSTATUS, t2 ret +SYM_FUNC_END(__kvm_riscv_fp_d_restore) #endif diff --git a/arch/riscv/kvm/vcpu_vector.c b/arch/riscv/kvm/vcpu_vector.c index b339a2682f25..d92d1348045c 100644 --- a/arch/riscv/kvm/vcpu_vector.c +++ b/arch/riscv/kvm/vcpu_vector.c @@ -76,6 +76,7 @@ int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu, cntx->vector.datap = kmalloc(riscv_v_vsize, GFP_KERNEL); if (!cntx->vector.datap) return -ENOMEM; + cntx->vector.vlenb = riscv_v_vsize / 32; vcpu->arch.host_context.vector.datap = kzalloc(riscv_v_vsize, GFP_KERNEL); if (!vcpu->arch.host_context.vector.datap) @@ -115,6 +116,9 @@ static int kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu, case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr): *reg_addr = &cntx->vector.vcsr; break; + case KVM_REG_RISCV_VECTOR_CSR_REG(vlenb): + *reg_addr = &cntx->vector.vlenb; + break; case KVM_REG_RISCV_VECTOR_CSR_REG(datap): default: return -ENOENT; @@ -173,6 +177,18 @@ int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu, if (!riscv_isa_extension_available(isa, v)) return -ENOENT; + if (reg_num == KVM_REG_RISCV_VECTOR_CSR_REG(vlenb)) { + struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; + unsigned long reg_val; + + if (copy_from_user(®_val, uaddr, reg_size)) + return -EFAULT; + if (reg_val != cntx->vector.vlenb) + return -EINVAL; + + return 0; + } + rc = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size, ®_addr); if (rc) return rc; diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index 7e2b50c692c1..ce58bc48e5b8 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -179,7 +179,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = kvm_riscv_aia_available(); break; case KVM_CAP_IOEVENTFD: - case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: case KVM_CAP_SYNC_MMU: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 438cd92e6080..6de44ede4e14 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -44,8 +44,7 @@ CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y CONFIG_CRASH_DUMP=y CONFIG_LIVEPATCH=y -CONFIG_MARCH_ZEC12=y -CONFIG_TUNE_ZEC12=y +CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_HZ_100=y @@ -76,7 +75,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_MODULE_SIG_SHA256=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y @@ -93,6 +91,7 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y +CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLUB_STATS=y # CONFIG_COMPAT_BRK is not set @@ -619,6 +618,9 @@ CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_BTRFS_DEBUG=y CONFIG_BTRFS_ASSERT=y CONFIG_NILFS2_FS=m +CONFIG_BCACHEFS_FS=y +CONFIG_BCACHEFS_QUOTA=y +CONFIG_BCACHEFS_POSIX_ACL=y CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y @@ -691,7 +693,6 @@ CONFIG_PERSISTENT_KEYRINGS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y -CONFIG_SECURITY_NETWORK=y CONFIG_HARDENED_USERCOPY=y CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_SELINUX=y @@ -834,7 +835,6 @@ CONFIG_DEBUG_IRQFLAGS=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y -CONFIG_DEBUG_CREDENTIALS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_REF_SCALE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=300 diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 1b8150e50f6a..bcae47da6b7c 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -42,8 +42,7 @@ CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y CONFIG_CRASH_DUMP=y CONFIG_LIVEPATCH=y -CONFIG_MARCH_ZEC12=y -CONFIG_TUNE_ZEC12=y +CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_HZ_100=y @@ -71,7 +70,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_MODULE_SIG_SHA256=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y @@ -88,6 +86,7 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y +CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y CONFIG_ZSMALLOC_STAT=y # CONFIG_COMPAT_BRK is not set CONFIG_MEMORY_HOTPLUG=y @@ -605,6 +604,9 @@ CONFIG_OCFS2_FS=m CONFIG_BTRFS_FS=y CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_NILFS2_FS=m +CONFIG_BCACHEFS_FS=m +CONFIG_BCACHEFS_QUOTA=y +CONFIG_BCACHEFS_POSIX_ACL=y CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y @@ -677,7 +679,6 @@ CONFIG_PERSISTENT_KEYRINGS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y -CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_LOCKDOWN_LSM=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index b831083b4edd..47028450eee1 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -9,8 +9,7 @@ CONFIG_BPF_SYSCALL=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_CRASH_DUMP=y -CONFIG_MARCH_ZEC12=y -CONFIG_TUNE_ZEC12=y +CONFIG_MARCH_Z13=y # CONFIG_COMPAT is not set CONFIG_NR_CPUS=2 CONFIG_HZ_100=y diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index 94b6919026df..796007125dff 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -111,4 +111,10 @@ static inline void stfle(u64 *stfle_fac_list, int size) preempt_enable(); } +/** + * stfle_size - Actual size of the facility list as specified by stfle + * (number of double words) + */ +unsigned int stfle_size(void); + #endif /* __ASM_FACILITY_H */ diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index b714ed0ef688..9acf48e53a87 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h @@ -79,7 +79,7 @@ static inline int test_fp_ctl(u32 fpc) #define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23|KERNEL_VXR_V24V31) #define KERNEL_VXR (KERNEL_VXR_LOW|KERNEL_VXR_HIGH) -#define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_V0V7) +#define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_LOW) struct kernel_fpu; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 67a298b6cf6e..52664105a473 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -818,7 +818,7 @@ struct s390_io_adapter { struct kvm_s390_cpu_model { /* facility mask supported by kvm & hosting machine */ - __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64]; + __u64 fac_mask[S390_ARCH_FAC_MASK_SIZE_U64]; struct kvm_s390_vm_cpu_subfunc subfuncs; /* facility list requested by guest (in dma page) */ __u64 *fac_list; diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index dc17896a001a..c15eadbb9983 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -228,7 +228,6 @@ typedef struct thread_struct thread_struct; execve_tail(); \ } while (0) -/* Forward declaration, a strange C thing */ struct task_struct; struct mm_struct; struct seq_file; diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h index 9286430fe729..35c1d1b860d8 100644 --- a/arch/s390/include/asm/syscall_wrapper.h +++ b/arch/s390/include/asm/syscall_wrapper.h @@ -63,10 +63,6 @@ cond_syscall(__s390x_sys_##name); \ cond_syscall(__s390_sys_##name) -#define SYS_NI(name) \ - SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers); \ - SYSCALL_ALIAS(__s390_sys_##name, sys_ni_posix_timers) - #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ long __s390_compat_sys##name(struct pt_regs *regs); \ ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \ @@ -85,15 +81,11 @@ /* * As some compat syscalls may not be implemented, we need to expand - * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in - * kernel/time/posix-stubs.c to cover this case as well. + * COND_SYSCALL_COMPAT in kernel/sys_ni.c to cover this case as well. */ #define COND_SYSCALL_COMPAT(name) \ cond_syscall(__s390_compat_sys_##name) -#define COMPAT_SYS_NI(name) \ - SYSCALL_ALIAS(__s390_compat_sys_##name, sys_ni_posix_timers) - #define __S390_SYS_STUBx(x, name, ...) \ long __s390_sys##name(struct pt_regs *regs); \ ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \ @@ -124,9 +116,6 @@ #define COND_SYSCALL(name) \ cond_syscall(__s390x_sys_##name) -#define SYS_NI(name) \ - SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers) - #define __S390_SYS_STUBx(x, fullname, name, ...) #endif /* CONFIG_COMPAT */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 353def93973b..7a562b4199c8 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -41,7 +41,7 @@ obj-y += sysinfo.o lgr.o os_info.o ctlreg.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o kdebugfs.o alternative.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o -obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o +obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o extra-y += vmlinux.lds diff --git a/arch/s390/kernel/facility.c b/arch/s390/kernel/facility.c new file mode 100644 index 000000000000..f02127219a27 --- /dev/null +++ b/arch/s390/kernel/facility.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2023 + */ + +#include <asm/facility.h> + +unsigned int stfle_size(void) +{ + static unsigned int size; + unsigned int r; + u64 dummy; + + r = READ_ONCE(size); + if (!r) { + r = __stfle_asm(&dummy, 1) + 1; + WRITE_ONCE(size, r); + } + return r; +} +EXPORT_SYMBOL(stfle_size); diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index cc364fce6aa9..ba75f6bee774 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -666,6 +666,7 @@ static int __init ipl_init(void) &ipl_ccw_attr_group_lpar); break; case IPL_TYPE_ECKD: + case IPL_TYPE_ECKD_DUMP: rc = sysfs_create_group(&ipl_kset->kobj, &ipl_eckd_attr_group); break; case IPL_TYPE_FCP: diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 77fd24e6cbb6..39a91b00438a 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -279,12 +279,6 @@ static int paicrypt_event_init(struct perf_event *event) if (IS_ERR(cpump)) return PTR_ERR(cpump); - /* Event initialization sets last_tag to 0. When later on the events - * are deleted and re-added, do not reset the event count value to zero. - * Events are added, deleted and re-added when 2 or more events - * are active at the same time. - */ - event->hw.last_tag = 0; event->destroy = paicrypt_event_destroy; if (a->sample_period) { @@ -318,6 +312,11 @@ static void paicrypt_start(struct perf_event *event, int flags) { u64 sum; + /* Event initialization sets last_tag to 0. When later on the events + * are deleted and re-added, do not reset the event count value to zero. + * Events are added, deleted and re-added when 2 or more events + * are active at the same time. + */ if (!event->hw.last_tag) { event->hw.last_tag = 1; sum = paicrypt_getall(event); /* Get current value */ diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 8ba0f1a3a39d..e7013a2e8960 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -260,7 +260,6 @@ static int paiext_event_init(struct perf_event *event) rc = paiext_alloc(a, event); if (rc) return rc; - event->hw.last_tag = 0; event->destroy = paiext_event_destroy; if (a->sample_period) { diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 45fdf2a9b2e3..72e9b7dcdf7d 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -20,19 +20,16 @@ config KVM def_tristate y prompt "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM - select PREEMPT_NOTIFIERS select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_VCPU_ASYNC_IOCTL - select HAVE_KVM_EVENTFD select KVM_ASYNC_PF select KVM_ASYNC_PF_SYNC + select KVM_COMMON select HAVE_KVM_IRQCHIP - select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_INVALID_WAKEUPS select HAVE_KVM_NO_POLL select KVM_VFIO - select INTERVAL_TREE select MMU_NOTIFIER help Support hosting paravirtualized guest machines using the SIE diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index 3765c4223bf9..80879fc73c90 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -213,8 +213,8 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu, else if (dbg->arch.nr_hw_bp > MAX_BP_COUNT) return -EINVAL; - bp_data = memdup_user(dbg->arch.hw_bp, - sizeof(*bp_data) * dbg->arch.nr_hw_bp); + bp_data = memdup_array_user(dbg->arch.hw_bp, dbg->arch.nr_hw_bp, + sizeof(*bp_data)); if (IS_ERR(bp_data)) return PTR_ERR(bp_data); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 7aa0e668488f..39463d0e4a1c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -563,7 +563,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ENABLE_CAP: case KVM_CAP_S390_CSS_SUPPORT: case KVM_CAP_IOEVENTFD: - case KVM_CAP_DEVICE_CTRL: case KVM_CAP_S390_IRQCHIP: case KVM_CAP_VM_ATTRIBUTES: case KVM_CAP_MP_STATE: diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 02dcbe82a8e5..fef42e2a80a2 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -19,6 +19,7 @@ #include <asm/nmi.h> #include <asm/dis.h> #include <asm/fpu/api.h> +#include <asm/facility.h> #include "kvm-s390.h" #include "gaccess.h" @@ -587,10 +588,6 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start, if (!gmap_is_shadow(gmap)) return; - if (start >= 1UL << 31) - /* We are only interested in prefix pages */ - return; - /* * Only new shadow blocks are added to the list during runtime, * therefore we can safely reference them all the time. @@ -988,12 +985,26 @@ static void retry_vsie_icpt(struct vsie_page *vsie_page) static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) { struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; - __u32 fac = READ_ONCE(vsie_page->scb_o->fac) & 0x7ffffff8U; + __u32 fac = READ_ONCE(vsie_page->scb_o->fac); + /* + * Alternate-STFLE-Interpretive-Execution facilities are not supported + * -> format-0 flcb + */ if (fac && test_kvm_facility(vcpu->kvm, 7)) { retry_vsie_icpt(vsie_page); + /* + * The facility list origin (FLO) is in bits 1 - 28 of the FLD + * so we need to mask here before reading. + */ + fac = fac & 0x7ffffff8U; + /* + * format-0 -> size of nested guest's facility list == guest's size + * guest's size == host's size, since STFLE is interpretatively executed + * using a format-0 for the guest, too. + */ if (read_guest_real(vcpu, fac, &vsie_page->fac, - sizeof(vsie_page->fac))) + stfle_size() * sizeof(u64))) return set_validity_icpt(scb_s, 0x1090U); scb_s->fac = (__u32)(__u64) &vsie_page->fac; } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 3bd2ab2a9a34..5cb92941540b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -756,7 +756,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, pte_clear(mm, addr, ptep); } if (reset) - pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; + pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); pgste_set_unlock(ptep, pgste); preempt_enable(); } diff --git a/arch/sh/include/asm/kexec.h b/arch/sh/include/asm/kexec.h index 927d80ba2332..76631714673c 100644 --- a/arch/sh/include/asm/kexec.h +++ b/arch/sh/include/asm/kexec.h @@ -28,7 +28,7 @@ /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_SH -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* arch/sh/kernel/machine_kexec.c */ void reserve_crashkernel(void); @@ -67,6 +67,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs, } #else static inline void reserve_crashkernel(void) { } -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ #endif /* __ASM_SH_KEXEC_H */ diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile index 69cd9ac4b2ab..2d7e70537de0 100644 --- a/arch/sh/kernel/Makefile +++ b/arch/sh/kernel/Makefile @@ -33,7 +33,7 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SH_STANDARD_BIOS) += sh_bios.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_MODULES) += sh_ksyms_32.o module.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_IO_TRAPPED) += io_trapped.o diff --git a/arch/sh/kernel/reboot.c b/arch/sh/kernel/reboot.c index e8eeedc9b182..1de006b1c339 100644 --- a/arch/sh/kernel/reboot.c +++ b/arch/sh/kernel/reboot.c @@ -63,7 +63,7 @@ struct machine_ops machine_ops = { .shutdown = native_machine_shutdown, .restart = native_machine_restart, .halt = native_machine_halt, -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .crash_shutdown = native_machine_crash_shutdown, #endif }; @@ -88,7 +88,7 @@ void machine_halt(void) machine_ops.halt(); } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE void machine_crash_shutdown(struct pt_regs *regs) { machine_ops.crash_shutdown(regs); diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 3d80515298d2..d3175f09b3aa 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -220,7 +220,7 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn, request_resource(res, &code_resource); request_resource(res, &data_resource); request_resource(res, &bss_resource); -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE request_resource(res, &crashk_res); #endif diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index 55c98fdd67d2..18d15d1ce87d 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -178,7 +178,7 @@ static unsigned long get_cmdline_acpi_rsdp(void) { unsigned long addr = 0; -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE char val[MAX_ADDR_LEN] = { }; int ret; diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 1b5d17a9f70d..cf1f13c82175 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -10,6 +10,7 @@ #include <asm/coco.h> #include <asm/tdx.h> #include <asm/vmx.h> +#include <asm/ia32.h> #include <asm/insn.h> #include <asm/insn-eval.h> #include <asm/pgtable.h> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index d813160b14d8..6356060caaf3 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -26,6 +26,7 @@ #include <xen/events.h> #endif +#include <asm/apic.h> #include <asm/desc.h> #include <asm/traps.h> #include <asm/vdso.h> @@ -167,7 +168,96 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) } } -/* Handles int $0x80 */ +#ifdef CONFIG_IA32_EMULATION +static __always_inline bool int80_is_external(void) +{ + const unsigned int offs = (0x80 / 32) * 0x10; + const u32 bit = BIT(0x80 % 32); + + /* The local APIC on XENPV guests is fake */ + if (cpu_feature_enabled(X86_FEATURE_XENPV)) + return false; + + /* + * If vector 0x80 is set in the APIC ISR then this is an external + * interrupt. Either from broken hardware or injected by a VMM. + * + * Note: In guest mode this is only valid for secure guests where + * the secure module fully controls the vAPIC exposed to the guest. + */ + return apic_read(APIC_ISR + offs) & bit; +} + +/** + * int80_emulation - 32-bit legacy syscall entry + * + * This entry point can be used by 32-bit and 64-bit programs to perform + * 32-bit system calls. Instances of INT $0x80 can be found inline in + * various programs and libraries. It is also used by the vDSO's + * __kernel_vsyscall fallback for hardware that doesn't support a faster + * entry method. Restarted 32-bit system calls also fall back to INT + * $0x80 regardless of what instruction was originally used to do the + * system call. + * + * This is considered a slow path. It is not used by most libc + * implementations on modern hardware except during process startup. + * + * The arguments for the INT $0x80 based syscall are on stack in the + * pt_regs structure: + * eax: system call number + * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 + */ +DEFINE_IDTENTRY_RAW(int80_emulation) +{ + int nr; + + /* Kernel does not use INT $0x80! */ + if (unlikely(!user_mode(regs))) { + irqentry_enter(regs); + instrumentation_begin(); + panic("Unexpected external interrupt 0x80\n"); + } + + /* + * Establish kernel context for instrumentation, including for + * int80_is_external() below which calls into the APIC driver. + * Identical for soft and external interrupts. + */ + enter_from_user_mode(regs); + + instrumentation_begin(); + add_random_kstack_offset(); + + /* Validate that this is a soft interrupt to the extent possible */ + if (unlikely(int80_is_external())) + panic("Unexpected external interrupt 0x80\n"); + + /* + * The low level idtentry code pushed -1 into regs::orig_ax + * and regs::ax contains the syscall number. + * + * User tracing code (ptrace or signal handlers) might assume + * that the regs::orig_ax contains a 32-bit number on invoking + * a 32-bit syscall. + * + * Establish the syscall convention by saving the 32bit truncated + * syscall number in regs::orig_ax and by invalidating regs::ax. + */ + regs->orig_ax = regs->ax & GENMASK(31, 0); + regs->ax = -ENOSYS; + + nr = syscall_32_enter(regs); + + local_irq_enable(); + nr = syscall_enter_from_user_mode_work(regs, nr); + do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); + syscall_exit_to_user_mode(regs); +} +#else /* CONFIG_IA32_EMULATION */ + +/* Handles int $0x80 on a 32bit kernel */ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) { int nr = syscall_32_enter(regs); @@ -186,6 +276,7 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) instrumentation_end(); syscall_exit_to_user_mode(regs); } +#endif /* !CONFIG_IA32_EMULATION */ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) { diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 27c05d08558a..de94e2e84ecc 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -275,80 +275,3 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR int3 SYM_CODE_END(entry_SYSCALL_compat) - -/* - * 32-bit legacy system call entry. - * - * 32-bit x86 Linux system calls traditionally used the INT $0x80 - * instruction. INT $0x80 lands here. - * - * This entry point can be used by 32-bit and 64-bit programs to perform - * 32-bit system calls. Instances of INT $0x80 can be found inline in - * various programs and libraries. It is also used by the vDSO's - * __kernel_vsyscall fallback for hardware that doesn't support a faster - * entry method. Restarted 32-bit system calls also fall back to INT - * $0x80 regardless of what instruction was originally used to do the - * system call. - * - * This is considered a slow path. It is not used by most libc - * implementations on modern hardware except during process startup. - * - * Arguments: - * eax system call number - * ebx arg1 - * ecx arg2 - * edx arg3 - * esi arg4 - * edi arg5 - * ebp arg6 - */ -SYM_CODE_START(entry_INT80_compat) - UNWIND_HINT_ENTRY - ENDBR - /* - * Interrupts are off on entry. - */ - ASM_CLAC /* Do this early to minimize exposure */ - ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV - - /* - * User tracing code (ptrace or signal handlers) might assume that - * the saved RAX contains a 32-bit number when we're invoking a 32-bit - * syscall. Just in case the high bits are nonzero, zero-extend - * the syscall number. (This could almost certainly be deleted - * with no ill effects.) - */ - movl %eax, %eax - - /* switch to thread stack expects orig_ax and rdi to be pushed */ - pushq %rax /* pt_regs->orig_ax */ - - /* Need to switch before accessing the thread stack. */ - SWITCH_TO_KERNEL_CR3 scratch_reg=%rax - - /* In the Xen PV case we already run on the thread stack. */ - ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV - - movq %rsp, %rax - movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp - - pushq 5*8(%rax) /* regs->ss */ - pushq 4*8(%rax) /* regs->rsp */ - pushq 3*8(%rax) /* regs->eflags */ - pushq 2*8(%rax) /* regs->cs */ - pushq 1*8(%rax) /* regs->ip */ - pushq 0*8(%rax) /* regs->orig_ax */ -.Lint80_keep_stack: - - PUSH_AND_CLEAR_REGS rax=$-ENOSYS - UNWIND_HINT_REGS - - cld - - IBRS_ENTER - UNTRAIN_RET - - movq %rsp, %rdi - call do_int80_syscall_32 - jmp swapgs_restore_regs_and_return_to_usermode -SYM_CODE_END(entry_INT80_compat) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a08f794a0e79..ce1c777227b4 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4660,7 +4660,7 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) if (pmu->intel_cap.pebs_output_pt_available) pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; else - pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT; + pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT; intel_pmu_check_event_constraints(pmu->event_constraints, pmu->num_counters, diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 21556ad87f4b..8f3a4d16bb79 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -15,6 +15,7 @@ #include <linux/io.h> #include <asm/apic.h> #include <asm/desc.h> +#include <asm/e820/api.h> #include <asm/sev.h> #include <asm/ibt.h> #include <asm/hypervisor.h> @@ -286,15 +287,31 @@ static int hv_cpu_die(unsigned int cpu) static int __init hv_pci_init(void) { - int gen2vm = efi_enabled(EFI_BOOT); + bool gen2vm = efi_enabled(EFI_BOOT); /* - * For Generation-2 VM, we exit from pci_arch_init() by returning 0. - * The purpose is to suppress the harmless warning: + * A Generation-2 VM doesn't support legacy PCI/PCIe, so both + * raw_pci_ops and raw_pci_ext_ops are NULL, and pci_subsys_init() -> + * pcibios_init() doesn't call pcibios_resource_survey() -> + * e820__reserve_resources_late(); as a result, any emulated persistent + * memory of E820_TYPE_PRAM (12) via the kernel parameter + * memmap=nn[KMG]!ss is not added into iomem_resource and hence can't be + * detected by register_e820_pmem(). Fix this by directly calling + * e820__reserve_resources_late() here: e820__reserve_resources_late() + * depends on e820__reserve_resources(), which has been called earlier + * from setup_arch(). Note: e820__reserve_resources_late() also adds + * any memory of E820_TYPE_PMEM (7) into iomem_resource, and + * acpi_nfit_register_region() -> acpi_nfit_insert_resource() -> + * region_intersects() returns REGION_INTERSECTS, so the memory of + * E820_TYPE_PMEM won't get added twice. + * + * We return 0 here so that pci_arch_init() won't print the warning: * "PCI: Fatal: No config space access function found" */ - if (gen2vm) + if (gen2vm) { + e820__reserve_resources_late(); return 0; + } /* For Generation-1 VM, we'll proceed in pci_arch_init(). */ return 1; diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index c8a7fc23f63c..f896eed4516c 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -16,6 +16,9 @@ #include <asm/x86_init.h> #include <asm/cpufeature.h> #include <asm/irq_vectors.h> +#include <asm/xen/hypervisor.h> + +#include <xen/xen.h> #ifdef CONFIG_ACPI_APEI # include <asm/pgtable_types.h> @@ -127,6 +130,17 @@ static inline void arch_acpi_set_proc_cap_bits(u32 *cap) if (!cpu_has(c, X86_FEATURE_MWAIT) || boot_option_idle_override == IDLE_NOMWAIT) *cap &= ~(ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH); + + if (xen_initial_domain()) { + /* + * When Linux is running as Xen dom0, the hypervisor is the + * entity in charge of the processor power management, and so + * Xen needs to check the OS capabilities reported in the + * processor capabilities buffer matches what the hypervisor + * driver supports. + */ + xen_sanitize_proc_cap_bits(cap); + } } static inline bool acpi_has_cpu_in_madt(void) diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 5a2ae24b1204..9805629479d9 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -75,6 +75,11 @@ static inline bool ia32_enabled(void) return __ia32_enabled; } +static inline void ia32_disable(void) +{ + __ia32_enabled = false; +} + #else /* !CONFIG_IA32_EMULATION */ static inline bool ia32_enabled(void) @@ -82,6 +87,8 @@ static inline bool ia32_enabled(void) return IS_ENABLED(CONFIG_X86_32); } +static inline void ia32_disable(void) {} + #endif #endif /* _ASM_X86_IA32_H */ diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 05fd175cec7d..13639e57e1f8 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -569,6 +569,10 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_UD, exc_invalid_op); DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault); +#if defined(CONFIG_IA32_EMULATION) +DECLARE_IDTENTRY_RAW(IA32_SYSCALL_VECTOR, int80_emulation); +#endif + #ifdef CONFIG_X86_MCE #ifdef CONFIG_X86_64 DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check); diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 4d84122bd643..484f4f0131a5 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -32,10 +32,6 @@ void entry_SYSCALL_compat(void); void entry_SYSCALL_compat_safe_stack(void); void entry_SYSRETL_compat_unsafe_stack(void); void entry_SYSRETL_compat_end(void); -void entry_INT80_compat(void); -#ifdef CONFIG_XEN_PV -void xen_entry_INT80_compat(void); -#endif #else /* !CONFIG_IA32_EMULATION */ #define entry_SYSCALL_compat NULL #define entry_SYSENTER_compat NULL diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index fd2669b1cb2d..21f9407be5d3 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -86,9 +86,6 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); return sys_ni_syscall(); \ } -#define __SYS_NI(abi, name) \ - SYSCALL_ALIAS(__##abi##_##name, sys_ni_posix_timers); - #ifdef CONFIG_X86_64 #define __X64_SYS_STUB0(name) \ __SYS_STUB0(x64, sys_##name) @@ -100,13 +97,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #define __X64_COND_SYSCALL(name) \ __COND_SYSCALL(x64, sys_##name) -#define __X64_SYS_NI(name) \ - __SYS_NI(x64, sys_##name) #else /* CONFIG_X86_64 */ #define __X64_SYS_STUB0(name) #define __X64_SYS_STUBx(x, name, ...) #define __X64_COND_SYSCALL(name) -#define __X64_SYS_NI(name) #endif /* CONFIG_X86_64 */ #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) @@ -120,13 +114,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #define __IA32_COND_SYSCALL(name) \ __COND_SYSCALL(ia32, sys_##name) -#define __IA32_SYS_NI(name) \ - __SYS_NI(ia32, sys_##name) #else /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ #define __IA32_SYS_STUB0(name) #define __IA32_SYS_STUBx(x, name, ...) #define __IA32_COND_SYSCALL(name) -#define __IA32_SYS_NI(name) #endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ #ifdef CONFIG_IA32_EMULATION @@ -135,8 +126,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); * additional wrappers (aptly named __ia32_sys_xyzzy) which decode the * ia32 regs in the proper order for shared or "common" syscalls. As some * syscalls may not be implemented, we need to expand COND_SYSCALL in - * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this - * case as well. + * kernel/sys_ni.c to cover this case as well. */ #define __IA32_COMPAT_SYS_STUB0(name) \ __SYS_STUB0(ia32, compat_sys_##name) @@ -148,14 +138,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #define __IA32_COMPAT_COND_SYSCALL(name) \ __COND_SYSCALL(ia32, compat_sys_##name) -#define __IA32_COMPAT_SYS_NI(name) \ - __SYS_NI(ia32, compat_sys_##name) - #else /* CONFIG_IA32_EMULATION */ #define __IA32_COMPAT_SYS_STUB0(name) #define __IA32_COMPAT_SYS_STUBx(x, name, ...) #define __IA32_COMPAT_COND_SYSCALL(name) -#define __IA32_COMPAT_SYS_NI(name) #endif /* CONFIG_IA32_EMULATION */ @@ -175,13 +161,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #define __X32_COMPAT_COND_SYSCALL(name) \ __COND_SYSCALL(x64, compat_sys_##name) -#define __X32_COMPAT_SYS_NI(name) \ - __SYS_NI(x64, compat_sys_##name) #else /* CONFIG_X86_X32_ABI */ #define __X32_COMPAT_SYS_STUB0(name) #define __X32_COMPAT_SYS_STUBx(x, name, ...) #define __X32_COMPAT_COND_SYSCALL(name) -#define __X32_COMPAT_SYS_NI(name) #endif /* CONFIG_X86_X32_ABI */ @@ -212,17 +195,12 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); /* * As some compat syscalls may not be implemented, we need to expand - * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in - * kernel/time/posix-stubs.c to cover this case as well. + * COND_SYSCALL_COMPAT in kernel/sys_ni.c to cover this case as well. */ #define COND_SYSCALL_COMPAT(name) \ __IA32_COMPAT_COND_SYSCALL(name) \ __X32_COMPAT_COND_SYSCALL(name) -#define COMPAT_SYS_NI(name) \ - __IA32_COMPAT_SYS_NI(name) \ - __X32_COMPAT_SYS_NI(name) - #endif /* CONFIG_COMPAT */ #define __SYSCALL_DEFINEx(x, name, ...) \ @@ -243,8 +221,8 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); * As the generic SYSCALL_DEFINE0() macro does not decode any parameters for * obvious reasons, and passing struct pt_regs *regs to it in %rdi does not * hurt, we only need to re-define it here to keep the naming congruent to - * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() and SYS_NI() - * macros to work correctly. + * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() macro + * to work correctly. */ #define SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ @@ -257,10 +235,6 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); __X64_COND_SYSCALL(name) \ __IA32_COND_SYSCALL(name) -#define SYS_NI(name) \ - __X64_SYS_NI(name) \ - __IA32_SYS_NI(name) - /* * For VSYSCALLS, we need to declare these three syscalls with the new diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 7048dfacc04b..a9088250770f 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -100,4 +100,13 @@ static inline void leave_lazy(enum xen_lazy_mode mode) enum xen_lazy_mode xen_get_lazy_mode(void); +#if defined(CONFIG_XEN_DOM0) && defined(CONFIG_ACPI) +void xen_sanitize_proc_cap_bits(uint32_t *buf); +#else +static inline void xen_sanitize_proc_cap_bits(uint32_t *buf) +{ + BUG(); +} +#endif + #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index d0918a75cb00..85a3ce2a3666 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -63,6 +63,7 @@ int acpi_fix_pin2_polarity __initdata; #ifdef CONFIG_X86_LOCAL_APIC static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; +static bool has_lapic_cpus __initdata; static bool acpi_support_online_capable; #endif @@ -233,6 +234,14 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end) return 0; /* + * According to https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#processor-local-x2apic-structure + * when MADT provides both valid LAPIC and x2APIC entries, the APIC ID + * in x2APIC must be equal or greater than 0xff. + */ + if (has_lapic_cpus && apic_id < 0xff) + return 0; + + /* * We need to register disabled CPU as well to permit * counting disabled CPUs. This allows us to size * cpus_possible_map more accurately, to permit @@ -284,6 +293,7 @@ acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end) processor->processor_id, /* ACPI ID */ processor->lapic_flags & ACPI_MADT_ENABLED); + has_lapic_cpus = true; return 0; } @@ -1114,10 +1124,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void) static int __init acpi_parse_madt_lapic_entries(void) { - int count; - int x2count = 0; - int ret; - struct acpi_subtable_proc madt_proc[2]; + int count, x2count = 0; if (!boot_cpu_has(X86_FEATURE_APIC)) return -ENODEV; @@ -1126,21 +1133,10 @@ static int __init acpi_parse_madt_lapic_entries(void) acpi_parse_sapic, MAX_LOCAL_APIC); if (!count) { - memset(madt_proc, 0, sizeof(madt_proc)); - madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC; - madt_proc[0].handler = acpi_parse_lapic; - madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC; - madt_proc[1].handler = acpi_parse_x2apic; - ret = acpi_table_parse_entries_array(ACPI_SIG_MADT, - sizeof(struct acpi_table_madt), - madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC); - if (ret < 0) { - pr_err("Error parsing LAPIC/X2APIC entries\n"); - return ret; - } - - count = madt_proc[0].count; - x2count = madt_proc[1].count; + count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, + acpi_parse_lapic, MAX_LOCAL_APIC); + x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, + acpi_parse_x2apic, MAX_LOCAL_APIC); } if (!count && !x2count) { pr_err("No LAPIC entries present\n"); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 73be3931e4f0..aae7456ece07 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -255,6 +255,16 @@ static void __init_or_module noinline optimize_nops(u8 *instr, size_t len) } } +static void __init_or_module noinline optimize_nops_inplace(u8 *instr, size_t len) +{ + unsigned long flags; + + local_irq_save(flags); + optimize_nops(instr, len); + sync_core(); + local_irq_restore(flags); +} + /* * In this context, "source" is where the instructions are placed in the * section .altinstr_replacement, for example during kernel build by the @@ -438,7 +448,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, * patch if feature is *NOT* present. */ if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) { - optimize_nops(instr, a->instrlen); + optimize_nops_inplace(instr, a->instrlen); continue; } @@ -1685,8 +1695,8 @@ void __init_or_module text_poke_early(void *addr, const void *opcode, } else { local_irq_save(flags); memcpy(addr, opcode, len); - local_irq_restore(flags); sync_core(); + local_irq_restore(flags); /* * Could also do a CLFLUSH here to speed up CPU recovery; but diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a7eab05e5f29..f322ebd053a9 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1320,6 +1320,9 @@ static void zenbleed_check_cpu(void *unused) void amd_check_microcode(void) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return; + on_each_cpu(zenbleed_check_cpu, NULL, 1); } diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 9373ec01c5ae..13b45b9c806d 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -104,8 +104,6 @@ struct cont_desc { size_t size; }; -static u32 ucode_new_rev; - /* * Microcode patch container file is prepended to the initrd in cpio * format. See Documentation/arch/x86/microcode.rst @@ -442,12 +440,11 @@ static int __apply_microcode_amd(struct microcode_amd *mc) * * Returns true if container found (sets @desc), false otherwise. */ -static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size) +static bool early_apply_microcode(u32 cpuid_1_eax, u32 old_rev, void *ucode, size_t size) { struct cont_desc desc = { 0 }; struct microcode_amd *mc; bool ret = false; - u32 rev, dummy; desc.cpuid_1_eax = cpuid_1_eax; @@ -457,22 +454,15 @@ static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size) if (!mc) return ret; - native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - /* * Allow application of the same revision to pick up SMT-specific * changes even if the revision of the other SMT thread is already * up-to-date. */ - if (rev > mc->hdr.patch_id) + if (old_rev > mc->hdr.patch_id) return ret; - if (!__apply_microcode_amd(mc)) { - ucode_new_rev = mc->hdr.patch_id; - ret = true; - } - - return ret; + return !__apply_microcode_amd(mc); } static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family) @@ -506,9 +496,12 @@ static void __init find_blobs_in_containers(unsigned int cpuid_1_eax, struct cpi *ret = cp; } -void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax) +void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_eax) { struct cpio_data cp = { }; + u32 dummy; + + native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->old_rev, dummy); /* Needed in load_microcode_amd() */ ucode_cpu_info[0].cpu_sig.sig = cpuid_1_eax; @@ -517,7 +510,8 @@ void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax) if (!(cp.data && cp.size)) return; - early_apply_microcode(cpuid_1_eax, cp.data, cp.size); + if (early_apply_microcode(cpuid_1_eax, ed->old_rev, cp.data, cp.size)) + native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy); } static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size); @@ -625,10 +619,8 @@ void reload_ucode_amd(unsigned int cpu) rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); if (rev < mc->hdr.patch_id) { - if (!__apply_microcode_amd(mc)) { - ucode_new_rev = mc->hdr.patch_id; - pr_info("reload patch_level=0x%08x\n", ucode_new_rev); - } + if (!__apply_microcode_amd(mc)) + pr_info_once("reload revision: 0x%08x\n", mc->hdr.patch_id); } } @@ -649,8 +641,6 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) if (p && (p->patch_id == csig->rev)) uci->mc = p->data; - pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); - return 0; } @@ -691,8 +681,6 @@ static enum ucode_state apply_microcode_amd(int cpu) rev = mc_amd->hdr.patch_id; ret = UCODE_UPDATED; - pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev); - out: uci->cpu_sig.rev = rev; c->microcode = rev; @@ -935,11 +923,6 @@ struct microcode_ops * __init init_amd_microcode(void) pr_warn("AMD CPU family 0x%x not supported\n", c->x86); return NULL; } - - if (ucode_new_rev) - pr_info_once("microcode updated early to new patch_level=0x%08x\n", - ucode_new_rev); - return µcode_amd_ops; } diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 666d25bbc5ad..232026a239a6 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -41,8 +41,6 @@ #include "internal.h" -#define DRIVER_VERSION "2.2" - static struct microcode_ops *microcode_ops; bool dis_ucode_ldr = true; @@ -77,6 +75,8 @@ static u32 final_levels[] = { 0, /* T-101 terminator */ }; +struct early_load_data early_data; + /* * Check the current patch level on this CPU. * @@ -155,9 +155,9 @@ void __init load_ucode_bsp(void) return; if (intel) - load_ucode_intel_bsp(); + load_ucode_intel_bsp(&early_data); else - load_ucode_amd_bsp(cpuid_1_eax); + load_ucode_amd_bsp(&early_data, cpuid_1_eax); } void load_ucode_ap(void) @@ -828,6 +828,11 @@ static int __init microcode_init(void) if (!microcode_ops) return -ENODEV; + pr_info_once("Current revision: 0x%08x\n", (early_data.new_rev ?: early_data.old_rev)); + + if (early_data.new_rev) + pr_info_once("Updated early from: 0x%08x\n", early_data.old_rev); + microcode_pdev = platform_device_register_simple("microcode", -1, NULL, 0); if (IS_ERR(microcode_pdev)) return PTR_ERR(microcode_pdev); @@ -846,8 +851,6 @@ static int __init microcode_init(void) cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/microcode:online", mc_cpu_online, mc_cpu_down_prep); - pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION); - return 0; out_pdev: diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 6024feb98d29..070426b9895f 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -339,16 +339,9 @@ static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci, static enum ucode_state apply_microcode_early(struct ucode_cpu_info *uci) { struct microcode_intel *mc = uci->mc; - enum ucode_state ret; - u32 cur_rev, date; + u32 cur_rev; - ret = __apply_microcode(uci, mc, &cur_rev); - if (ret == UCODE_UPDATED) { - date = mc->hdr.date; - pr_info_once("updated early: 0x%x -> 0x%x, date = %04x-%02x-%02x\n", - cur_rev, mc->hdr.rev, date & 0xffff, date >> 24, (date >> 16) & 0xff); - } - return ret; + return __apply_microcode(uci, mc, &cur_rev); } static __init bool load_builtin_intel_microcode(struct cpio_data *cp) @@ -413,13 +406,17 @@ static int __init save_builtin_microcode(void) early_initcall(save_builtin_microcode); /* Load microcode on BSP from initrd or builtin blobs */ -void __init load_ucode_intel_bsp(void) +void __init load_ucode_intel_bsp(struct early_load_data *ed) { struct ucode_cpu_info uci; + ed->old_rev = intel_get_microcode_revision(); + uci.mc = get_microcode_blob(&uci, false); if (uci.mc && apply_microcode_early(&uci) == UCODE_UPDATED) ucode_patch_va = UCODE_BSP_LOADED; + + ed->new_rev = uci.cpu_sig.rev; } void load_ucode_intel_ap(void) diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h index f8047b12329a..21776c529fa9 100644 --- a/arch/x86/kernel/cpu/microcode/internal.h +++ b/arch/x86/kernel/cpu/microcode/internal.h @@ -37,6 +37,12 @@ struct microcode_ops { use_nmi : 1; }; +struct early_load_data { + u32 old_rev; + u32 new_rev; +}; + +extern struct early_load_data early_data; extern struct ucode_cpu_info ucode_cpu_info[]; struct cpio_data find_microcode_in_initrd(const char *path); @@ -92,14 +98,14 @@ extern bool dis_ucode_ldr; extern bool force_minrev; #ifdef CONFIG_CPU_SUP_AMD -void load_ucode_amd_bsp(unsigned int family); +void load_ucode_amd_bsp(struct early_load_data *ed, unsigned int family); void load_ucode_amd_ap(unsigned int family); int save_microcode_in_initrd_amd(unsigned int family); void reload_ucode_amd(unsigned int cpu); struct microcode_ops *init_amd_microcode(void); void exit_amd_microcode(void); #else /* CONFIG_CPU_SUP_AMD */ -static inline void load_ucode_amd_bsp(unsigned int family) { } +static inline void load_ucode_amd_bsp(struct early_load_data *ed, unsigned int family) { } static inline void load_ucode_amd_ap(unsigned int family) { } static inline int save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; } static inline void reload_ucode_amd(unsigned int cpu) { } @@ -108,12 +114,12 @@ static inline void exit_amd_microcode(void) { } #endif /* !CONFIG_CPU_SUP_AMD */ #ifdef CONFIG_CPU_SUP_INTEL -void load_ucode_intel_bsp(void); +void load_ucode_intel_bsp(struct early_load_data *ed); void load_ucode_intel_ap(void); void reload_ucode_intel(void); struct microcode_ops *init_intel_microcode(void); #else /* CONFIG_CPU_SUP_INTEL */ -static inline void load_ucode_intel_bsp(void) { } +static inline void load_ucode_intel_bsp(struct early_load_data *ed) { } static inline void load_ucode_intel_ap(void) { } static inline void reload_ucode_intel(void) { } static inline struct microcode_ops *init_intel_microcode(void) { return NULL; } diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index e6bba12c759c..01fa06dd06b6 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -262,11 +262,14 @@ static uint32_t __init ms_hyperv_platform(void) static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs) { static atomic_t nmi_cpu = ATOMIC_INIT(-1); + unsigned int old_cpu, this_cpu; if (!unknown_nmi_panic) return NMI_DONE; - if (atomic_cmpxchg(&nmi_cpu, -1, raw_smp_processor_id()) != -1) + old_cpu = -1; + this_cpu = raw_smp_processor_id(); + if (!atomic_try_cmpxchg(&nmi_cpu, &old_cpu, this_cpu)) return NMI_HANDLED; return NMI_DONE; diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 086a2c3aaaa0..0f8103240fda 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -255,6 +255,22 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) testl $X2APIC_ENABLE, %eax jnz .Lread_apicid_msr +#ifdef CONFIG_X86_X2APIC + /* + * If system is in X2APIC mode then MMIO base might not be + * mapped causing the MMIO read below to fault. Faults can't + * be handled at that point. + */ + cmpl $0, x2apic_mode(%rip) + jz .Lread_apicid_mmio + + /* Force the AP into X2APIC mode. */ + orl $X2APIC_ENABLE, %eax + wrmsr + jmp .Lread_apicid_msr +#endif + +.Lread_apicid_mmio: /* Read the APIC ID from the fix-mapped MMIO space. */ movq apic_mmio_base(%rip), %rcx addq $APIC_ID, %rcx diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 8857abc706e4..660b601f1d6c 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -121,7 +121,7 @@ static const __initconst struct idt_data def_idts[] = { static const struct idt_data ia32_idt[] __initconst = { #if defined(CONFIG_IA32_EMULATION) - SYSG(IA32_SYSCALL_VECTOR, entry_INT80_compat), + SYSG(IA32_SYSCALL_VECTOR, asm_int80_emulation), #elif defined(CONFIG_X86_32) SYSG(IA32_SYSCALL_VECTOR, entry_INT80_32), #endif diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 70472eebe719..c67285824e82 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1234,10 +1234,6 @@ void setup_ghcb(void) if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) return; - /* First make sure the hypervisor talks a supported protocol. */ - if (!sev_es_negotiate_protocol()) - sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); - /* * Check whether the runtime #VC exception handler is active. It uses * the per-CPU GHCB page which is set up by sev_es_init_vc_handling(). @@ -1255,6 +1251,13 @@ void setup_ghcb(void) } /* + * Make sure the hypervisor talks a supported protocol. + * This gets called only in the BSP boot phase. + */ + if (!sev_es_negotiate_protocol()) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); + + /* * Clear the boot_ghcb. The first exception comes in before the bss * section is cleared. */ diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index cacf2ede6217..23d8aaf8d9fd 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -175,9 +175,6 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) frame = get_sigframe(ksig, regs, sizeof(struct rt_sigframe), &fp); uc_flags = frame_uc_flags(regs); - if (setup_signal_shadow_stack(ksig)) - return -EFAULT; - if (!user_access_begin(frame, sizeof(*frame))) return -EFAULT; @@ -198,6 +195,9 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) return -EFAULT; } + if (setup_signal_shadow_stack(ksig)) + return -EFAULT; + /* Set up registers for signal handler */ regs->di = ksig->sig; /* In case the signal handler was declared without prototypes */ diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 34f2f47cadf2..b9d35b6b19f7 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -23,17 +23,15 @@ config KVM depends on HAVE_KVM depends on HIGH_RES_TIMERS depends on X86_LOCAL_APIC - select PREEMPT_NOTIFIERS + select KVM_COMMON select KVM_GENERIC_MMU_NOTIFIER select HAVE_KVM_IRQCHIP select HAVE_KVM_PFNCACHE - select HAVE_KVM_IRQFD select HAVE_KVM_DIRTY_RING_TSO select HAVE_KVM_DIRTY_RING_ACQ_REL select IRQ_BYPASS_MANAGER select HAVE_KVM_IRQ_BYPASS select HAVE_KVM_IRQ_ROUTING - select HAVE_KVM_EVENTFD select KVM_ASYNC_PF select USER_RETURN_NOTIFIER select KVM_MMIO @@ -46,7 +44,6 @@ config KVM select KVM_XFER_TO_GUEST_WORK select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_VFIO - select INTERVAL_TREE select HAVE_KVM_PM_NOTIFIER if PM select KVM_GENERIC_HARDWARE_ENABLING help @@ -80,7 +77,7 @@ config KVM_WERROR config KVM_SW_PROTECTED_VM bool "Enable support for KVM software-protected VMs" depends on EXPERT - depends on X86_64 + depends on KVM && X86_64 select KVM_GENERIC_PRIVATE_MEM help Enable support for KVM software-protected VMs. Currently "protected" diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 1b278a3f0689..cab20216921b 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -475,7 +475,7 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, return -E2BIG; if (cpuid->nent) { - e = vmemdup_user(entries, array_size(sizeof(*e), cpuid->nent)); + e = vmemdup_array_user(entries, cpuid->nent, sizeof(*e)); if (IS_ERR(e)) return PTR_ERR(e); @@ -519,7 +519,7 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, return -E2BIG; if (cpuid->nent) { - e2 = vmemdup_user(entries, array_size(sizeof(*e2), cpuid->nent)); + e2 = vmemdup_array_user(entries, cpuid->nent, sizeof(*e2)); if (IS_ERR(e2)) return PTR_ERR(e2); } diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c index 42026b3f3ff3..95ea1a1f7403 100644 --- a/arch/x86/kvm/debugfs.c +++ b/arch/x86/kvm/debugfs.c @@ -182,6 +182,7 @@ static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file) } static const struct file_operations mmu_rmaps_stat_fops = { + .owner = THIS_MODULE, .open = kvm_mmu_rmaps_stat_open, .read = seq_read, .llseek = seq_lseek, diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 4900c078045a..6ee925d66648 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2972,6 +2972,25 @@ static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm) set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux); } + + /* + * For SEV-ES, accesses to MSR_IA32_XSS should not be intercepted if + * the host/guest supports its use. + * + * guest_can_use() checks a number of requirements on the host/guest to + * ensure that MSR_IA32_XSS is available, but it might report true even + * if X86_FEATURE_XSAVES isn't configured in the guest to ensure host + * MSR_IA32_XSS is always properly restored. For SEV-ES, it is better + * to further check that the guest CPUID actually supports + * X86_FEATURE_XSAVES so that accesses to MSR_IA32_XSS by misbehaved + * guests will still get intercepted and caught in the normal + * kvm_emulate_rdmsr()/kvm_emulated_wrmsr() paths. + */ + if (guest_can_use(vcpu, X86_FEATURE_XSAVES) && + guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 1, 1); + else + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 0, 0); } void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 712146312358..a8bd4e909a1e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -103,6 +103,7 @@ static const struct svm_direct_access_msrs { { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, { .index = MSR_IA32_LASTINTFROMIP, .always = false }, { .index = MSR_IA32_LASTINTTOIP, .always = false }, + { .index = MSR_IA32_XSS, .always = false }, { .index = MSR_EFER, .always = false }, { .index = MSR_IA32_CR_PAT, .always = false }, { .index = MSR_AMD64_SEV_ES_GHCB, .always = true }, @@ -1855,15 +1856,17 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) bool old_paging = is_paging(vcpu); #ifdef CONFIG_X86_64 - if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) { + if (vcpu->arch.efer & EFER_LME) { if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { vcpu->arch.efer |= EFER_LMA; - svm->vmcb->save.efer |= EFER_LMA | EFER_LME; + if (!vcpu->arch.guest_state_protected) + svm->vmcb->save.efer |= EFER_LMA | EFER_LME; } if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) { vcpu->arch.efer &= ~EFER_LMA; - svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); + if (!vcpu->arch.guest_state_protected) + svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); } } #endif diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 59adff7bbf55..8ef95139cd24 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -30,7 +30,7 @@ #define IOPM_SIZE PAGE_SIZE * 3 #define MSRPM_SIZE PAGE_SIZE * 2 -#define MAX_DIRECT_ACCESS_MSRS 46 +#define MAX_DIRECT_ACCESS_MSRS 47 #define MSRPM_OFFSETS 32 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; extern bool npt_enabled; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 598b057611e0..b6fca43cb455 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5546,8 +5546,8 @@ static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { - return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region, - sizeof(guest_xsave->region)); + kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region, + sizeof(guest_xsave->region)); } static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, @@ -13081,7 +13081,10 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) if (vcpu->arch.guest_state_protected) return true; - return vcpu->arch.preempted_in_kernel; + if (vcpu != kvm_get_running_vcpu()) + return vcpu->arch.preempted_in_kernel; + + return static_call(kvm_x86_get_cpl)(vcpu) == 0; } unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu) diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index a68f2dda0948..70b91de2e053 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -32,6 +32,7 @@ #include <asm/msr.h> #include <asm/cmdline.h> #include <asm/sev.h> +#include <asm/ia32.h> #include "mm_internal.h" @@ -481,6 +482,16 @@ void __init sme_early_init(void) */ if (sev_status & MSR_AMD64_SEV_ES_ENABLED) x86_cpuinit.parallel_bringup = false; + + /* + * The VMM is capable of injecting interrupt 0x80 and triggering the + * compatibility syscall path. + * + * By default, the 32-bit emulation is disabled in order to ensure + * the safety of the VM. + */ + if (sev_status & MSR_AMD64_SEV_ENABLED) + ia32_disable(); } void __init mem_encrypt_free_decrypted_mem(void) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 8c10d9abc239..e89e415aa743 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -3025,3 +3025,49 @@ void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp #endif WARN(1, "verification of programs using bpf_throw should have failed\n"); } + +void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old) +{ + u8 *old_addr, *new_addr, *old_bypass_addr; + int ret; + + old_bypass_addr = old ? NULL : poke->bypass_addr; + old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL; + new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL; + + /* + * On program loading or teardown, the program's kallsym entry + * might not be in place, so we use __bpf_arch_text_poke to skip + * the kallsyms check. + */ + if (new) { + ret = __bpf_arch_text_poke(poke->tailcall_target, + BPF_MOD_JUMP, + old_addr, new_addr); + BUG_ON(ret < 0); + if (!old) { + ret = __bpf_arch_text_poke(poke->tailcall_bypass, + BPF_MOD_JUMP, + poke->bypass_addr, + NULL); + BUG_ON(ret < 0); + } + } else { + ret = __bpf_arch_text_poke(poke->tailcall_bypass, + BPF_MOD_JUMP, + old_bypass_addr, + poke->bypass_addr); + BUG_ON(ret < 0); + /* let other CPUs finish the execution of program + * so that it will not possible to expose them + * to invalid nop, stack unwind, nop state + */ + if (!ret) + synchronize_rcu(); + ret = __bpf_arch_text_poke(poke->tailcall_target, + BPF_MOD_JUMP, + old_addr, NULL); + BUG_ON(ret < 0); + } +} diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 9b1ec5d8c99c..a65fc2ae15b4 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -9,6 +9,7 @@ config XEN select PARAVIRT_CLOCK select X86_HV_CALLBACK_VECTOR depends on X86_64 || (X86_32 && X86_PAE) + depends on X86_64 || (X86_GENERIC || MPENTIUM4 || MCORE2 || MATOM || MK8) depends on X86_LOCAL_APIC && X86_TSC help This is the Linux Xen port. Enabling this will allow the diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0337392a3121..3c61bb98c10e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -33,9 +33,12 @@ EXPORT_SYMBOL_GPL(hypercall_page); * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info * but during boot it is switched to point to xen_vcpu_info. * The pointer is used in xen_evtchn_do_upcall to acknowledge pending events. + * Make sure that xen_vcpu_info doesn't cross a page boundary by making it + * cache-line aligned (the struct is guaranteed to have a size of 64 bytes, + * which matches the cache line size of 64-bit x86 processors). */ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); -DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); +DEFINE_PER_CPU_ALIGNED(struct vcpu_info, xen_vcpu_info); /* Linux <-> Xen vCPU id mapping */ DEFINE_PER_CPU(uint32_t, xen_vcpu_id); @@ -160,6 +163,7 @@ void xen_vcpu_setup(int cpu) int err; struct vcpu_info *vcpup; + BUILD_BUG_ON(sizeof(*vcpup) > SMP_CACHE_BYTES); BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); /* diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index bbbfdd495ebd..aeb33e0a3f76 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -704,7 +704,7 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_int3, false ), TRAP_ENTRY(exc_overflow, false ), #ifdef CONFIG_IA32_EMULATION - { entry_INT80_compat, xen_entry_INT80_compat, false }, + TRAP_ENTRY(int80_emulation, false ), #endif TRAP_ENTRY(exc_page_fault, false ), TRAP_ENTRY(exc_divide_error, false ), diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 9e5e68008785..1a9cd18dfbd3 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -156,7 +156,7 @@ xen_pv_trap asm_xenpv_exc_machine_check #endif /* CONFIG_X86_MCE */ xen_pv_trap asm_exc_simd_coprocessor_error #ifdef CONFIG_IA32_EMULATION -xen_pv_trap entry_INT80_compat +xen_pv_trap asm_int80_emulation #endif xen_pv_trap asm_exc_xen_unknown_trap xen_pv_trap asm_exc_xen_hypervisor_callback diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 408a2aa66c69..a87ab36889e7 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -21,7 +21,7 @@ extern void *xen_initial_gdt; struct trap_info; void xen_copy_trap_info(struct trap_info *traps); -DECLARE_PER_CPU(struct vcpu_info, xen_vcpu_info); +DECLARE_PER_CPU_ALIGNED(struct vcpu_info, xen_vcpu_info); DECLARE_PER_CPU(unsigned long, xen_cr3); DECLARE_PER_CPU(unsigned long, xen_current_cr3); |