diff options
Diffstat (limited to 'arch')
307 files changed, 3459 insertions, 2341 deletions
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 18c842ca6c32..186e785f5b56 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -496,3 +496,5 @@ 564 common futex_wake sys_futex_wake 565 common futex_wait sys_futex_wait 566 common futex_requeue sys_futex_requeue +567 common statmount sys_statmount +568 common listmount sys_listmount diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 3162db540ee9..1b0483c51cc1 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -49,7 +49,6 @@ config ARC select OF select OF_EARLY_FLATTREE select PCI_SYSCALL if PCI - select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32 select TRACE_IRQFLAGS_SUPPORT @@ -232,10 +231,6 @@ config ARC_CACHE_PAGES Note that Global I/D ENABLE + Per Page DISABLE works but corollary Global DISABLE + Per Page ENABLE won't work -config ARC_CACHE_VIPT_ALIASING - bool "Support VIPT Aliasing D$" - depends on ARC_HAS_DCACHE && ISA_ARCOMPACT - endif #ARC_CACHE config ARC_HAS_ICCM diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h index bd5b1a9a0544..563af3e75f01 100644 --- a/arch/arc/include/asm/cacheflush.h +++ b/arch/arc/include/asm/cacheflush.h @@ -44,31 +44,10 @@ void dma_cache_wback(phys_addr_t start, unsigned long sz); #define flush_cache_dup_mm(mm) /* called on fork (VIVT only) */ -#ifndef CONFIG_ARC_CACHE_VIPT_ALIASING - #define flush_cache_mm(mm) /* called on munmap/exit */ #define flush_cache_range(mm, u_vstart, u_vend) #define flush_cache_page(vma, u_vaddr, pfn) /* PF handling/COW-break */ -#else /* VIPT aliasing dcache */ - -/* To clear out stale userspace mappings */ -void flush_cache_mm(struct mm_struct *mm); -void flush_cache_range(struct vm_area_struct *vma, - unsigned long start,unsigned long end); -void flush_cache_page(struct vm_area_struct *vma, - unsigned long user_addr, unsigned long page); - -/* - * To make sure that userspace mapping is flushed to memory before - * get_user_pages() uses a kernel mapping to access the page - */ -#define ARCH_HAS_FLUSH_ANON_PAGE -void flush_anon_page(struct vm_area_struct *vma, - struct page *page, unsigned long u_vaddr); - -#endif /* CONFIG_ARC_CACHE_VIPT_ALIASING */ - /* * A new pagecache page has PG_arch_1 clear - thus dcache dirty by default * This works around some PIO based drivers which don't call flush_dcache_page @@ -76,28 +55,6 @@ void flush_anon_page(struct vm_area_struct *vma, */ #define PG_dc_clean PG_arch_1 -#define CACHE_COLORS_NUM 4 -#define CACHE_COLORS_MSK (CACHE_COLORS_NUM - 1) -#define CACHE_COLOR(addr) (((unsigned long)(addr) >> (PAGE_SHIFT)) & CACHE_COLORS_MSK) - -/* - * Simple wrapper over config option - * Bootup code ensures that hardware matches kernel configuration - */ -static inline int cache_is_vipt_aliasing(void) -{ - return IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); -} - -/* - * checks if two addresses (after page aligning) index into same cache set - */ -#define addr_not_cache_congruent(addr1, addr2) \ -({ \ - cache_is_vipt_aliasing() ? \ - (CACHE_COLOR(addr1) != CACHE_COLOR(addr2)) : 0; \ -}) - #define copy_to_user_page(vma, page, vaddr, dst, src, len) \ do { \ memcpy(dst, src, len); \ diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h index 4d13320e0c1b..3802a2daaf86 100644 --- a/arch/arc/include/asm/entry-arcv2.h +++ b/arch/arc/include/asm/entry-arcv2.h @@ -291,4 +291,36 @@ /* M = 8-1 N = 8 */ .endm +.macro SAVE_ABI_CALLEE_REGS + push r13 + push r14 + push r15 + push r16 + push r17 + push r18 + push r19 + push r20 + push r21 + push r22 + push r23 + push r24 + push r25 +.endm + +.macro RESTORE_ABI_CALLEE_REGS + pop r25 + pop r24 + pop r23 + pop r22 + pop r21 + pop r20 + pop r19 + pop r18 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 +.endm + #endif diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index a0e760eb35a8..92c3e9f13252 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -33,6 +33,91 @@ #include <asm/irqflags-compact.h> #include <asm/thread_info.h> /* For THREAD_SIZE */ +/* Note on the LD/ST addr modes with addr reg wback + * + * LD.a same as LD.aw + * + * LD.a reg1, [reg2, x] => Pre Incr + * Eff Addr for load = [reg2 + x] + * + * LD.ab reg1, [reg2, x] => Post Incr + * Eff Addr for load = [reg2] + */ + +.macro PUSHAX aux + lr r9, [\aux] + push r9 +.endm + +.macro POPAX aux + pop r9 + sr r9, [\aux] +.endm + +.macro SAVE_R0_TO_R12 + push r0 + push r1 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 +.endm + +.macro RESTORE_R12_TO_R0 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r1 + pop r0 +.endm + +.macro SAVE_ABI_CALLEE_REGS + push r13 + push r14 + push r15 + push r16 + push r17 + push r18 + push r19 + push r20 + push r21 + push r22 + push r23 + push r24 + push r25 +.endm + +.macro RESTORE_ABI_CALLEE_REGS + pop r25 + pop r24 + pop r23 + pop r22 + pop r21 + pop r20 + pop r19 + pop r18 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 +.endm + /*-------------------------------------------------------------- * Switch to Kernel Mode stack if SP points to User Mode stack * @@ -235,7 +320,7 @@ SWITCH_TO_KERNEL_STK - PUSH 0x003\LVL\()abcd /* Dummy ECR */ + st.a 0x003\LVL\()abcd, [sp, -4] /* Dummy ECR */ sub sp, sp, 8 /* skip orig_r0 (not needed) skip pt_regs->sp, already saved above */ diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index 49c2e090cb5c..cf1ba376e992 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -21,114 +21,12 @@ #include <asm/entry-arcv2.h> #endif -/* Note on the LD/ST addr modes with addr reg wback - * - * LD.a same as LD.aw - * - * LD.a reg1, [reg2, x] => Pre Incr - * Eff Addr for load = [reg2 + x] - * - * LD.ab reg1, [reg2, x] => Post Incr - * Eff Addr for load = [reg2] - */ - -.macro PUSH reg - st.a \reg, [sp, -4] -.endm - -.macro PUSHAX aux - lr r9, [\aux] - PUSH r9 -.endm - -.macro POP reg - ld.ab \reg, [sp, 4] -.endm - -.macro POPAX aux - POP r9 - sr r9, [\aux] -.endm - -/*-------------------------------------------------------------- - * Helpers to save/restore Scratch Regs: - * used by Interrupt/Exception Prologue/Epilogue - *-------------------------------------------------------------*/ -.macro SAVE_R0_TO_R12 - PUSH r0 - PUSH r1 - PUSH r2 - PUSH r3 - PUSH r4 - PUSH r5 - PUSH r6 - PUSH r7 - PUSH r8 - PUSH r9 - PUSH r10 - PUSH r11 - PUSH r12 -.endm - -.macro RESTORE_R12_TO_R0 - POP r12 - POP r11 - POP r10 - POP r9 - POP r8 - POP r7 - POP r6 - POP r5 - POP r4 - POP r3 - POP r2 - POP r1 - POP r0 - -.endm - -/*-------------------------------------------------------------- - * Helpers to save/restore callee-saved regs: - * used by several macros below - *-------------------------------------------------------------*/ -.macro SAVE_R13_TO_R25 - PUSH r13 - PUSH r14 - PUSH r15 - PUSH r16 - PUSH r17 - PUSH r18 - PUSH r19 - PUSH r20 - PUSH r21 - PUSH r22 - PUSH r23 - PUSH r24 - PUSH r25 -.endm - -.macro RESTORE_R25_TO_R13 - POP r25 - POP r24 - POP r23 - POP r22 - POP r21 - POP r20 - POP r19 - POP r18 - POP r17 - POP r16 - POP r15 - POP r14 - POP r13 -.endm - /* * save user mode callee regs as struct callee_regs * - needed by fork/do_signal/unaligned-access-emulation. */ .macro SAVE_CALLEE_SAVED_USER - SAVE_R13_TO_R25 + SAVE_ABI_CALLEE_REGS .endm /* @@ -136,18 +34,18 @@ * - could have been changed by ptrace tracer or unaligned-access fixup */ .macro RESTORE_CALLEE_SAVED_USER - RESTORE_R25_TO_R13 + RESTORE_ABI_CALLEE_REGS .endm /* * save/restore kernel mode callee regs at the time of context switch */ .macro SAVE_CALLEE_SAVED_KERNEL - SAVE_R13_TO_R25 + SAVE_ABI_CALLEE_REGS .endm .macro RESTORE_CALLEE_SAVED_KERNEL - RESTORE_R25_TO_R13 + RESTORE_ABI_CALLEE_REGS .endm /*-------------------------------------------------------------- diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h index ef8d4166370c..8a2441670a8f 100644 --- a/arch/arc/include/asm/hugepage.h +++ b/arch/arc/include/asm/hugepage.h @@ -10,6 +10,13 @@ #include <linux/types.h> #include <asm-generic/pgtable-nopmd.h> +/* + * Hugetlb definitions. + */ +#define HPAGE_SHIFT PMD_SHIFT +#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) + static inline pte_t pmd_pte(pmd_t pmd) { return __pte(pmd_val(pmd)); diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index 4a2b30fb5a98..00b9318e551e 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -54,6 +54,10 @@ struct pt_regs { ecr_reg ecr; }; +struct callee_regs { + unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13; +}; + #define MAX_REG_OFFSET offsetof(struct pt_regs, ecr) #else @@ -92,16 +96,14 @@ struct pt_regs { unsigned long status32; }; -#define MAX_REG_OFFSET offsetof(struct pt_regs, status32) - -#endif - -/* Callee saved registers - need to be saved only when you are scheduled out */ - struct callee_regs { unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13; }; +#define MAX_REG_OFFSET offsetof(struct pt_regs, status32) + +#endif + #define instruction_pointer(regs) ((regs)->ret) #define profile_pc(regs) instruction_pointer(regs) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 4dcf8589b708..d08a5092c2b4 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -153,7 +153,7 @@ static int arcv2_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len) { int n = 0; #ifdef CONFIG_ISA_ARCV2 - const char *release, *cpu_nm, *isa_nm = "ARCv2"; + const char *release = "", *cpu_nm = "HS38", *isa_nm = "ARCv2"; int dual_issue = 0, dual_enb = 0, mpy_opt, present; int bpu_full, bpu_cache, bpu_pred, bpu_ret_stk; char mpy_nm[16], lpb_nm[32]; @@ -172,8 +172,6 @@ static int arcv2_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len) * releases only update it. */ - cpu_nm = "HS38"; - if (info->arcver > 0x50 && info->arcver <= 0x53) { release = arc_hs_rel[info->arcver - 0x51].str; } else { diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index 0b3bb529d246..8f6f4a542964 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -62,7 +62,7 @@ struct rt_sigframe { unsigned int sigret_magic; }; -static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) +static int save_arcv2_regs(struct sigcontext __user *mctx, struct pt_regs *regs) { int err = 0; #ifndef CONFIG_ISA_ARCOMPACT @@ -75,12 +75,12 @@ static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) #else v2abi.r58 = v2abi.r59 = 0; #endif - err = __copy_to_user(&mctx->v2abi, &v2abi, sizeof(v2abi)); + err = __copy_to_user(&mctx->v2abi, (void const *)&v2abi, sizeof(v2abi)); #endif return err; } -static int restore_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) +static int restore_arcv2_regs(struct sigcontext __user *mctx, struct pt_regs *regs) { int err = 0; #ifndef CONFIG_ISA_ARCOMPACT diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index f7e05c146637..9106ceac323c 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -145,10 +145,9 @@ dc_chk: p_dc->sz_k = 1 << (dbcr.sz - 1); n += scnprintf(buf + n, len - n, - "D-Cache\t\t: %uK, %dway/set, %uB Line, %s%s%s\n", + "D-Cache\t\t: %uK, %dway/set, %uB Line, %s%s\n", p_dc->sz_k, assoc, p_dc->line_len, vipt ? "VIPT" : "PIPT", - p_dc->colors > 1 ? " aliasing" : "", IS_USED_CFG(CONFIG_ARC_HAS_DCACHE)); slc_chk: @@ -703,51 +702,10 @@ static inline void arc_slc_enable(void) * Exported APIs */ -/* - * Handle cache congruency of kernel and userspace mappings of page when kernel - * writes-to/reads-from - * - * The idea is to defer flushing of kernel mapping after a WRITE, possible if: - * -dcache is NOT aliasing, hence any U/K-mappings of page are congruent - * -U-mapping doesn't exist yet for page (finalised in update_mmu_cache) - * -In SMP, if hardware caches are coherent - * - * There's a corollary case, where kernel READs from a userspace mapped page. - * If the U-mapping is not congruent to K-mapping, former needs flushing. - */ void flush_dcache_folio(struct folio *folio) { - struct address_space *mapping; - - if (!cache_is_vipt_aliasing()) { - clear_bit(PG_dc_clean, &folio->flags); - return; - } - - /* don't handle anon pages here */ - mapping = folio_flush_mapping(folio); - if (!mapping) - return; - - /* - * pagecache page, file not yet mapped to userspace - * Make a note that K-mapping is dirty - */ - if (!mapping_mapped(mapping)) { - clear_bit(PG_dc_clean, &folio->flags); - } else if (folio_mapped(folio)) { - /* kernel reading from page with U-mapping */ - phys_addr_t paddr = (unsigned long)folio_address(folio); - unsigned long vaddr = folio_pos(folio); - - /* - * vaddr is not actually the virtual address, but is - * congruent to every user mapping. - */ - if (addr_not_cache_congruent(paddr, vaddr)) - __flush_dcache_pages(paddr, vaddr, - folio_nr_pages(folio)); - } + clear_bit(PG_dc_clean, &folio->flags); + return; } EXPORT_SYMBOL(flush_dcache_folio); @@ -921,44 +879,6 @@ noinline void flush_cache_all(void) } -#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING - -void flush_cache_mm(struct mm_struct *mm) -{ - flush_cache_all(); -} - -void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr, - unsigned long pfn) -{ - phys_addr_t paddr = pfn << PAGE_SHIFT; - - u_vaddr &= PAGE_MASK; - - __flush_dcache_pages(paddr, u_vaddr, 1); - - if (vma->vm_flags & VM_EXEC) - __inv_icache_pages(paddr, u_vaddr, 1); -} - -void flush_cache_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - flush_cache_all(); -} - -void flush_anon_page(struct vm_area_struct *vma, struct page *page, - unsigned long u_vaddr) -{ - /* TBD: do we really need to clear the kernel mapping */ - __flush_dcache_pages((phys_addr_t)page_address(page), u_vaddr, 1); - __flush_dcache_pages((phys_addr_t)page_address(page), - (phys_addr_t)page_address(page), 1); - -} - -#endif - void copy_user_highpage(struct page *to, struct page *from, unsigned long u_vaddr, struct vm_area_struct *vma) { @@ -966,46 +886,11 @@ void copy_user_highpage(struct page *to, struct page *from, struct folio *dst = page_folio(to); void *kfrom = kmap_atomic(from); void *kto = kmap_atomic(to); - int clean_src_k_mappings = 0; - - /* - * If SRC page was already mapped in userspace AND it's U-mapping is - * not congruent with K-mapping, sync former to physical page so that - * K-mapping in memcpy below, sees the right data - * - * Note that while @u_vaddr refers to DST page's userspace vaddr, it is - * equally valid for SRC page as well - * - * For !VIPT cache, all of this gets compiled out as - * addr_not_cache_congruent() is 0 - */ - if (page_mapcount(from) && addr_not_cache_congruent(kfrom, u_vaddr)) { - __flush_dcache_pages((unsigned long)kfrom, u_vaddr, 1); - clean_src_k_mappings = 1; - } copy_page(kto, kfrom); - /* - * Mark DST page K-mapping as dirty for a later finalization by - * update_mmu_cache(). Although the finalization could have been done - * here as well (given that both vaddr/paddr are available). - * But update_mmu_cache() already has code to do that for other - * non copied user pages (e.g. read faults which wire in pagecache page - * directly). - */ clear_bit(PG_dc_clean, &dst->flags); - - /* - * if SRC was already usermapped and non-congruent to kernel mapping - * sync the kernel mapping back to physical page - */ - if (clean_src_k_mappings) { - __flush_dcache_pages((unsigned long)kfrom, - (unsigned long)kfrom, 1); - } else { - clear_bit(PG_dc_clean, &src->flags); - } + clear_bit(PG_dc_clean, &src->flags); kunmap_atomic(kto); kunmap_atomic(kfrom); @@ -1140,17 +1025,8 @@ static noinline void __init arc_cache_init_master(void) dc->line_len, L1_CACHE_BYTES); /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */ - if (is_isa_arcompact()) { - int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); - - if (dc->colors > 1) { - if (!handled) - panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); - if (CACHE_COLORS_NUM != dc->colors) - panic("CACHE_COLORS_NUM not optimized for config\n"); - } else if (handled && dc->colors == 1) { - panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); - } + if (is_isa_arcompact() && dc->colors > 1) { + panic("Aliasing VIPT cache not supported\n"); } } diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c index fce5fa2b4f52..3c1c7ae73292 100644 --- a/arch/arc/mm/mmap.c +++ b/arch/arc/mm/mmap.c @@ -14,10 +14,6 @@ #include <asm/cacheflush.h> -#define COLOUR_ALIGN(addr, pgoff) \ - ((((addr) + SHMLBA - 1) & ~(SHMLBA - 1)) + \ - (((pgoff) << PAGE_SHIFT) & (SHMLBA - 1))) - /* * Ensure that shared mappings are correctly aligned to * avoid aliasing issues with VIPT caches. @@ -31,21 +27,13 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - int do_align = 0; - int aliasing = cache_is_vipt_aliasing(); struct vm_unmapped_area_info info; /* - * We only need to do colour alignment if D cache aliases. - */ - if (aliasing) - do_align = filp || (flags & MAP_SHARED); - - /* * We enforce the MAP_FIXED case. */ if (flags & MAP_FIXED) { - if (aliasing && flags & MAP_SHARED && + if (flags & MAP_SHARED && (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)) return -EINVAL; return addr; @@ -55,10 +43,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, return -ENOMEM; if (addr) { - if (do_align) - addr = COLOUR_ALIGN(addr, pgoff); - else - addr = PAGE_ALIGN(addr); + addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && @@ -70,7 +55,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, info.length = len; info.low_limit = mm->mmap_base; info.high_limit = TASK_SIZE; - info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; + info.align_mask = 0; info.align_offset = pgoff << PAGE_SHIFT; return vm_unmapped_area(&info); } diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index e536b2dcd4b0..ad702b49aeb3 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -478,21 +478,15 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, create_tlb(vma, vaddr, ptep); - if (page == ZERO_PAGE(0)) { + if (page == ZERO_PAGE(0)) return; - } /* - * Exec page : Independent of aliasing/page-color considerations, - * since icache doesn't snoop dcache on ARC, any dirty - * K-mapping of a code page needs to be wback+inv so that - * icache fetch by userspace sees code correctly. - * !EXEC page: If K-mapping is NOT congruent to U-mapping, flush it - * so userspace sees the right data. - * (Avoids the flush for Non-exec + congruent mapping case) + * For executable pages, since icache doesn't snoop dcache, any + * dirty K-mapping of a code page needs to be wback+inv so that + * icache fetch by userspace sees code correctly. */ - if ((vma->vm_flags & VM_EXEC) || - addr_not_cache_congruent(paddr, vaddr)) { + if (vma->vm_flags & VM_EXEC) { struct folio *folio = page_folio(page); int dirty = !test_and_set_bit(PG_dc_clean, &folio->flags); if (dirty) { diff --git a/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts b/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts index 1ab8184302db..5a2869a18bd5 100644 --- a/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts +++ b/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts @@ -36,9 +36,7 @@ gpios = <&gpio 42 GPIO_ACTIVE_HIGH>; }; -&leds { - /delete-node/ led_act; -}; +/delete-node/ &led_act; &pm { /delete-property/ system-power-controller; diff --git a/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts b/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts index a3f247c722b4..0342a79ccd5d 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts +++ b/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts @@ -37,9 +37,9 @@ &clks { assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>, - <&clks IMX6QDL_CLK_LDB_DI1_SEL>; + <&clks IMX6QDL_CLK_LDB_DI1_SEL>, <&clks IMX6QDL_CLK_ENET_REF_SEL>; assigned-clock-parents = <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, - <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>; + <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, <&clk50m_phy>; }; &hdmi { diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi index 4ffe99ed55ca..07dcecbe485d 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi @@ -121,6 +121,8 @@ max-speed = <100>; interrupt-parent = <&gpio5>; interrupts = <6 IRQ_TYPE_LEVEL_LOW>; + clocks = <&clks IMX6UL_CLK_ENET_REF>; + clock-names = "rmii-ref"; }; }; }; diff --git a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi index 29b8fd03567a..5387da8a2a0a 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi @@ -454,7 +454,7 @@ }; gpt1: timer@302d0000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x302d0000 0x10000>; interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX7D_GPT1_ROOT_CLK>, @@ -463,7 +463,7 @@ }; gpt2: timer@302e0000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x302e0000 0x10000>; interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX7D_GPT2_ROOT_CLK>, @@ -473,7 +473,7 @@ }; gpt3: timer@302f0000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x302f0000 0x10000>; interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX7D_GPT3_ROOT_CLK>, @@ -483,7 +483,7 @@ }; gpt4: timer@30300000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x30300000 0x10000>; interrupts = <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX7D_GPT4_ROOT_CLK>, diff --git a/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts b/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts index a400c108f66a..6c5e6856648a 100644 --- a/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts +++ b/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts @@ -8,6 +8,7 @@ #include "imx28-lwe.dtsi" / { + model = "Liebherr XEA board"; compatible = "lwn,imx28-xea", "fsl,imx28"; }; diff --git a/arch/arm/boot/dts/rockchip/rk3128.dtsi b/arch/arm/boot/dts/rockchip/rk3128.dtsi index 7bf557c99561..01edf244ddee 100644 --- a/arch/arm/boot/dts/rockchip/rk3128.dtsi +++ b/arch/arm/boot/dts/rockchip/rk3128.dtsi @@ -848,7 +848,7 @@ }; sdmmc_pwren: sdmmc-pwren { - rockchip,pins = <1 RK_PB6 1 &pcfg_pull_default>; + rockchip,pins = <1 RK_PB6 RK_FUNC_GPIO &pcfg_pull_default>; }; sdmmc_bus4: sdmmc-bus4 { diff --git a/arch/arm/boot/dts/rockchip/rk322x.dtsi b/arch/arm/boot/dts/rockchip/rk322x.dtsi index ffc16d6b97e1..a721744cbfd1 100644 --- a/arch/arm/boot/dts/rockchip/rk322x.dtsi +++ b/arch/arm/boot/dts/rockchip/rk322x.dtsi @@ -215,9 +215,9 @@ power-domain@RK3228_PD_VOP { reg = <RK3228_PD_VOP>; - clocks =<&cru ACLK_VOP>, - <&cru DCLK_VOP>, - <&cru HCLK_VOP>; + clocks = <&cru ACLK_VOP>, + <&cru DCLK_VOP>, + <&cru HCLK_VOP>; pm_qos = <&qos_vop>; #power-domain-cells = <0>; }; diff --git a/arch/arm/boot/dts/ti/omap/am33xx.dtsi b/arch/arm/boot/dts/ti/omap/am33xx.dtsi index 1a2cd5baf402..5b9e01a8aa5d 100644 --- a/arch/arm/boot/dts/ti/omap/am33xx.dtsi +++ b/arch/arm/boot/dts/ti/omap/am33xx.dtsi @@ -359,6 +359,7 @@ <SYSC_IDLE_NO>, <SYSC_IDLE_SMART>, <SYSC_IDLE_SMART_WKUP>; + ti,sysc-delay-us = <2>; clocks = <&l3s_clkctrl AM3_L3S_USB_OTG_HS_CLKCTRL 0>; clock-names = "fck"; #address-cells = <1>; diff --git a/arch/arm/boot/dts/ti/omap/dra7.dtsi b/arch/arm/boot/dts/ti/omap/dra7.dtsi index 3f3e52e3b375..6509c742fb58 100644 --- a/arch/arm/boot/dts/ti/omap/dra7.dtsi +++ b/arch/arm/boot/dts/ti/omap/dra7.dtsi @@ -147,7 +147,7 @@ l3-noc@44000000 { compatible = "ti,dra7-l3-noc"; - reg = <0x44000000 0x1000>, + reg = <0x44000000 0x1000000>, <0x45000000 0x1000>; interrupts-extended = <&crossbar_mpu GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, <&wakeupgen GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm/include/asm/kexec.h b/arch/arm/include/asm/kexec.h index e62832dcba76..a8287e7ab9d4 100644 --- a/arch/arm/include/asm/kexec.h +++ b/arch/arm/include/asm/kexec.h @@ -2,8 +2,6 @@ #ifndef _ARM_KEXEC_H #define _ARM_KEXEC_H -#ifdef CONFIG_KEXEC - /* Maximum physical address we can use pages from */ #define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) /* Maximum address we can reach in physical address mode */ @@ -82,6 +80,4 @@ static inline struct page *boot_pfn_to_page(unsigned long boot_pfn) #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_KEXEC */ - #endif /* _ARM_KEXEC_H */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index d53f56d6f840..771264d4726a 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -59,7 +59,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += entry-ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o patch.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o patch.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o insn.o patch.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o # Main staffs in KPROBES are in arch/arm/probes/ . obj-$(CONFIG_KPROBES) += patch.o insn.o obj-$(CONFIG_OABI_COMPAT) += sys_oabi-compat.o diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index 2157493b78a9..df69af932375 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -501,6 +501,10 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "mmdc%d", ret); + if (!name) { + ret = -ENOMEM; + goto pmu_release_id; + } pmu_mmdc->mmdc_ipg_clk = mmdc_ipg_clk; pmu_mmdc->devtype_data = (struct fsl_mmdc_devtype_data *)of_id->data; @@ -523,9 +527,10 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b pmu_register_err: pr_warn("MMDC Perf PMU failed (%d), disabled\n", ret); - ida_simple_remove(&mmdc_ida, pmu_mmdc->id); cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node); hrtimer_cancel(&pmu_mmdc->hrtimer); +pmu_release_id: + ida_simple_remove(&mmdc_ida, pmu_mmdc->id); pmu_free: kfree(pmu_mmdc); return ret; diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index 98999aa8cc0c..7f387706368a 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -793,11 +793,16 @@ void __init omap_soc_device_init(void) soc_dev_attr->machine = soc_name; soc_dev_attr->family = omap_get_family(); + if (!soc_dev_attr->family) { + kfree(soc_dev_attr); + return; + } soc_dev_attr->revision = soc_rev; soc_dev_attr->custom_attr_group = omap_soc_groups[0]; soc_dev = soc_device_register(soc_dev_attr); if (IS_ERR(soc_dev)) { + kfree(soc_dev_attr->family); kfree(soc_dev_attr); return; } diff --git a/arch/arm/mach-sunxi/mc_smp.c b/arch/arm/mach-sunxi/mc_smp.c index cb63921232a6..277f6aa8e6c2 100644 --- a/arch/arm/mach-sunxi/mc_smp.c +++ b/arch/arm/mach-sunxi/mc_smp.c @@ -803,16 +803,16 @@ static int __init sunxi_mc_smp_init(void) for (i = 0; i < ARRAY_SIZE(sunxi_mc_smp_data); i++) { ret = of_property_match_string(node, "enable-method", sunxi_mc_smp_data[i].enable_method); - if (!ret) + if (ret >= 0) break; } - is_a83t = sunxi_mc_smp_data[i].is_a83t; - of_node_put(node); - if (ret) + if (ret < 0) return -ENODEV; + is_a83t = sunxi_mc_smp_data[i].is_a83t; + if (!sunxi_mc_smp_cpu_table_init()) return -EINVAL; diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 584f9528c996..d6a324dbff2e 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -470,3 +470,5 @@ 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue +457 common statmount sys_statmount +458 common listmount sys_listmount diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi index 15290e6892fc..fc7315b94406 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi @@ -68,10 +68,7 @@ &emac0 { pinctrl-names = "default"; pinctrl-0 = <&ext_rgmii_pins>; - phy-mode = "rgmii"; phy-handle = <&ext_rgmii_phy>; - allwinner,rx-delay-ps = <3100>; - allwinner,tx-delay-ps = <700>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts index d83852e72f06..b5d713926a34 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts @@ -13,6 +13,9 @@ }; &emac0 { + allwinner,rx-delay-ps = <3100>; + allwinner,tx-delay-ps = <700>; + phy-mode = "rgmii"; phy-supply = <®_dcdce>; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts b/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts index 00fe28caac93..b3b1b8692125 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts @@ -13,6 +13,8 @@ }; &emac0 { + allwinner,tx-delay-ps = <700>; + phy-mode = "rgmii-rxid"; phy-supply = <®_dldo1>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi b/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi index 5ce5fbf2b38e..f69b0c17560a 100644 --- a/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi @@ -82,12 +82,9 @@ pinctrl-0 = <&pinctrl_wifi_pdn>; gpio = <&lsio_gpio1 28 GPIO_ACTIVE_HIGH>; enable-active-high; + regulator-always-on; regulator-name = "wifi_pwrdn_fake_regulator"; regulator-settling-time-us = <100>; - - regulator-state-mem { - regulator-off-in-suspend; - }; }; reg_pcie_switch: regulator-pcie-switch { diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi index ce66d30a4839..b0bb77150adc 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi @@ -149,7 +149,7 @@ dma_subsys: bus@5a000000 { clock-names = "ipg", "per"; assigned-clocks = <&clk IMX_SC_R_LCD_0_PWM_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; power-domains = <&pd IMX_SC_R_LCD_0_PWM_0>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi index 49ad3413db94..7e510b21bbac 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi @@ -29,7 +29,7 @@ lsio_subsys: bus@5d000000 { <&pwm0_lpcg 1>; assigned-clocks = <&clk IMX_SC_R_PWM_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>; status = "disabled"; }; @@ -42,7 +42,7 @@ lsio_subsys: bus@5d000000 { <&pwm1_lpcg 1>; assigned-clocks = <&clk IMX_SC_R_PWM_1 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; interrupts = <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>; status = "disabled"; }; @@ -55,7 +55,7 @@ lsio_subsys: bus@5d000000 { <&pwm2_lpcg 1>; assigned-clocks = <&clk IMX_SC_R_PWM_2 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>; status = "disabled"; }; @@ -68,7 +68,7 @@ lsio_subsys: bus@5d000000 { <&pwm3_lpcg 1>; assigned-clocks = <&clk IMX_SC_R_PWM_3 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index c9a610ba4836..1264da6012f9 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -2072,6 +2072,7 @@ phys = <&usb3_phy0>, <&usb3_phy0>; phy-names = "usb2-phy", "usb3-phy"; snps,gfladj-refclk-lpm-sel-quirk; + snps,parkmode-disable-ss-quirk; }; }; @@ -2114,6 +2115,7 @@ phys = <&usb3_phy1>, <&usb3_phy1>; phy-names = "usb2-phy", "usb3-phy"; snps,gfladj-refclk-lpm-sel-quirk; + snps,parkmode-disable-ss-quirk; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 4b1ce9fc1758..c6dc3ba0d43b 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -1649,6 +1649,7 @@ phys = <&usb3_phy0>, <&usb3_phy0>; phy-names = "usb2-phy", "usb3-phy"; power-domains = <&pgc_otg1>; + snps,parkmode-disable-ss-quirk; status = "disabled"; }; @@ -1680,6 +1681,7 @@ phys = <&usb3_phy1>, <&usb3_phy1>; phy-names = "usb2-phy", "usb3-phy"; power-domains = <&pgc_otg2>; + snps,parkmode-disable-ss-quirk; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi index 01539df335f8..8439dd6b3935 100644 --- a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi @@ -96,6 +96,17 @@ status = "okay"; }; +&edma3 { + power-domains = <&pd IMX_SC_R_DMA_1_CH0>, + <&pd IMX_SC_R_DMA_1_CH1>, + <&pd IMX_SC_R_DMA_1_CH2>, + <&pd IMX_SC_R_DMA_1_CH3>, + <&pd IMX_SC_R_DMA_1_CH4>, + <&pd IMX_SC_R_DMA_1_CH5>, + <&pd IMX_SC_R_DMA_1_CH6>, + <&pd IMX_SC_R_DMA_1_CH7>; +}; + &flexcan1 { fsl,clk-source = /bits/ 8 <1>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi index f22c1ac391c9..c4a0082f30d3 100644 --- a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi @@ -483,7 +483,7 @@ }; }; - gpioe: gpio@2d000080 { + gpioe: gpio@2d000000 { compatible = "fsl,imx8ulp-gpio"; reg = <0x2d000000 0x1000>; gpio-controller; @@ -498,7 +498,7 @@ gpio-ranges = <&iomuxc1 0 32 24>; }; - gpiof: gpio@2d010080 { + gpiof: gpio@2d010000 { compatible = "fsl,imx8ulp-gpio"; reg = <0x2d010000 0x1000>; gpio-controller; @@ -534,7 +534,7 @@ }; }; - gpiod: gpio@2e200080 { + gpiod: gpio@2e200000 { compatible = "fsl,imx8ulp-gpio"; reg = <0x2e200000 0x1000>; gpio-controller; diff --git a/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts b/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts index f06139bdff97..3c5c67ebee5d 100644 --- a/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts +++ b/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts @@ -577,7 +577,7 @@ fsl,pins = < MX93_PAD_UART2_TXD__LPUART2_TX 0x31e MX93_PAD_UART2_RXD__LPUART2_RX 0x31e - MX93_PAD_SAI1_TXD0__LPUART2_RTS_B 0x31e + MX93_PAD_SAI1_TXD0__LPUART2_RTS_B 0x51e >; }; diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index ceccf4766440..34c0540276d1 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -417,7 +417,7 @@ compatible = "fsl,imx93-src-slice"; reg = <0x44462400 0x400>, <0x44465800 0x400>; #power-domain-cells = <0>; - clocks = <&clk IMX93_CLK_MEDIA_AXI>, + clocks = <&clk IMX93_CLK_NIC_MEDIA_GATE>, <&clk IMX93_CLK_MEDIA_APB>; }; }; @@ -957,7 +957,7 @@ }; }; - gpio2: gpio@43810080 { + gpio2: gpio@43810000 { compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio"; reg = <0x43810000 0x1000>; gpio-controller; @@ -972,7 +972,7 @@ gpio-ranges = <&iomuxc 0 4 30>; }; - gpio3: gpio@43820080 { + gpio3: gpio@43820000 { compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio"; reg = <0x43820000 0x1000>; gpio-controller; @@ -988,7 +988,7 @@ <&iomuxc 26 34 2>, <&iomuxc 28 0 4>; }; - gpio4: gpio@43830080 { + gpio4: gpio@43830000 { compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio"; reg = <0x43830000 0x1000>; gpio-controller; @@ -1003,7 +1003,7 @@ gpio-ranges = <&iomuxc 0 38 28>, <&iomuxc 28 36 2>; }; - gpio1: gpio@47400080 { + gpio1: gpio@47400000 { compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio"; reg = <0x47400000 0x1000>; gpio-controller; diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts index 3b7a176b7904..c46682150e50 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts @@ -73,7 +73,7 @@ }; }; - memory { + memory@40000000 { reg = <0 0x40000000 0 0x40000000>; }; diff --git a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts index a885a3fbe456..2dc1bdc74e21 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts @@ -55,7 +55,7 @@ }; }; - memory { + memory@40000000 { reg = <0 0x40000000 0 0x20000000>; }; diff --git a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts index af4a4309bda4..b876e501216b 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts +++ b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts @@ -126,6 +126,7 @@ compatible = "sff,sfp"; i2c-bus = <&i2c_sfp1>; los-gpios = <&pio 46 GPIO_ACTIVE_HIGH>; + maximum-power-milliwatt = <3000>; mod-def0-gpios = <&pio 49 GPIO_ACTIVE_LOW>; tx-disable-gpios = <&pio 20 GPIO_ACTIVE_HIGH>; tx-fault-gpios = <&pio 7 GPIO_ACTIVE_HIGH>; @@ -137,6 +138,7 @@ i2c-bus = <&i2c_sfp2>; los-gpios = <&pio 31 GPIO_ACTIVE_HIGH>; mod-def0-gpios = <&pio 47 GPIO_ACTIVE_LOW>; + maximum-power-milliwatt = <3000>; tx-disable-gpios = <&pio 15 GPIO_ACTIVE_HIGH>; tx-fault-gpios = <&pio 48 GPIO_ACTIVE_HIGH>; }; @@ -150,16 +152,16 @@ trip = <&cpu_trip_active_high>; }; - cpu-active-low { + cpu-active-med { /* active: set fan to cooling level 1 */ cooling-device = <&fan 1 1>; - trip = <&cpu_trip_active_low>; + trip = <&cpu_trip_active_med>; }; - cpu-passive { - /* passive: set fan to cooling level 0 */ + cpu-active-low { + /* active: set fan to cooling level 0 */ cooling-device = <&fan 0 0>; - trip = <&cpu_trip_passive>; + trip = <&cpu_trip_active_low>; }; }; }; diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index 24eda00e320d..fc751e049953 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -374,6 +374,10 @@ reg = <0 0x11230000 0 0x1000>, <0 0x11c20000 0 0x1000>; interrupts = <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>; + assigned-clocks = <&topckgen CLK_TOP_EMMC_416M_SEL>, + <&topckgen CLK_TOP_EMMC_250M_SEL>; + assigned-clock-parents = <&apmixedsys CLK_APMIXED_MPLL>, + <&topckgen CLK_TOP_NET1PLL_D5_D2>; clocks = <&topckgen CLK_TOP_EMMC_416M_SEL>, <&infracfg CLK_INFRA_MSDC_HCK_CK>, <&infracfg CLK_INFRA_MSDC_CK>, @@ -610,22 +614,34 @@ thermal-sensors = <&thermal 0>; trips { + cpu_trip_crit: crit { + temperature = <125000>; + hysteresis = <2000>; + type = "critical"; + }; + + cpu_trip_hot: hot { + temperature = <120000>; + hysteresis = <2000>; + type = "hot"; + }; + cpu_trip_active_high: active-high { temperature = <115000>; hysteresis = <2000>; type = "active"; }; - cpu_trip_active_low: active-low { + cpu_trip_active_med: active-med { temperature = <85000>; hysteresis = <2000>; type = "active"; }; - cpu_trip_passive: passive { - temperature = <40000>; + cpu_trip_active_low: active-low { + temperature = <60000>; hysteresis = <2000>; - type = "passive"; + type = "active"; }; }; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts index 5122963d8743..d258c80213b2 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts @@ -44,7 +44,7 @@ id-gpio = <&pio 16 GPIO_ACTIVE_HIGH>; }; - usb_p1_vbus: regulator@0 { + usb_p1_vbus: regulator-usb-p1 { compatible = "regulator-fixed"; regulator-name = "usb_vbus"; regulator-min-microvolt = <5000000>; @@ -53,7 +53,7 @@ enable-active-high; }; - usb_p0_vbus: regulator@1 { + usb_p0_vbus: regulator-usb-p0 { compatible = "regulator-fixed"; regulator-name = "vbus"; regulator-min-microvolt = <5000000>; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts index ce336a48c897..77f9ab94c00b 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts @@ -31,14 +31,14 @@ #address-cells = <2>; #size-cells = <2>; ranges; - scp_mem_reserved: scp_mem_region { + scp_mem_reserved: memory@50000000 { compatible = "shared-dma-pool"; reg = <0 0x50000000 0 0x2900000>; no-map; }; }; - ntc@0 { + thermal-sensor { compatible = "murata,ncp03wf104"; pullup-uv = <1800000>; pullup-ohm = <390000>; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi index bf97b60ae4d1..820260348de9 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi @@ -91,6 +91,8 @@ &dsi0 { status = "okay"; + /delete-property/#size-cells; + /delete-property/#address-cells; /delete-node/panel@0; ports { port { @@ -441,20 +443,20 @@ }; touchscreen_pins: touchscreen-pins { - touch_int_odl { + touch-int-odl { pinmux = <PINMUX_GPIO155__FUNC_GPIO155>; input-enable; bias-pull-up; }; - touch_rst_l { + touch-rst-l { pinmux = <PINMUX_GPIO156__FUNC_GPIO156>; output-high; }; }; trackpad_pins: trackpad-pins { - trackpad_int { + trackpad-int { pinmux = <PINMUX_GPIO7__FUNC_GPIO7>; input-enable; bias-disable; /* pulled externally */ diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index bf7de35ffcbc..7881a27be029 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -116,7 +116,7 @@ #size-cells = <2>; ranges; - scp_mem_reserved: scp_mem_region { + scp_mem_reserved: memory@50000000 { compatible = "shared-dma-pool"; reg = <0 0x50000000 0 0x2900000>; no-map; @@ -460,7 +460,7 @@ &pio { aud_pins_default: audiopins { - pins_bus { + pins-bus { pinmux = <PINMUX_GPIO97__FUNC_I2S2_MCK>, <PINMUX_GPIO98__FUNC_I2S2_BCK>, <PINMUX_GPIO101__FUNC_I2S2_LRCK>, @@ -482,7 +482,7 @@ }; aud_pins_tdm_out_on: audiotdmouton { - pins_bus { + pins-bus { pinmux = <PINMUX_GPIO169__FUNC_TDM_BCK_2ND>, <PINMUX_GPIO170__FUNC_TDM_LRCK_2ND>, <PINMUX_GPIO171__FUNC_TDM_DATA0_2ND>, @@ -494,7 +494,7 @@ }; aud_pins_tdm_out_off: audiotdmoutoff { - pins_bus { + pins-bus { pinmux = <PINMUX_GPIO169__FUNC_GPIO169>, <PINMUX_GPIO170__FUNC_GPIO170>, <PINMUX_GPIO171__FUNC_GPIO171>, @@ -508,13 +508,13 @@ }; bt_pins: bt-pins { - pins_bt_en { + pins-bt-en { pinmux = <PINMUX_GPIO120__FUNC_GPIO120>; output-low; }; }; - ec_ap_int_odl: ec_ap_int_odl { + ec_ap_int_odl: ec-ap-int-odl { pins1 { pinmux = <PINMUX_GPIO151__FUNC_GPIO151>; input-enable; @@ -522,7 +522,7 @@ }; }; - h1_int_od_l: h1_int_od_l { + h1_int_od_l: h1-int-od-l { pins1 { pinmux = <PINMUX_GPIO153__FUNC_GPIO153>; input-enable; @@ -530,7 +530,7 @@ }; i2c0_pins: i2c0 { - pins_bus { + pins-bus { pinmux = <PINMUX_GPIO82__FUNC_SDA0>, <PINMUX_GPIO83__FUNC_SCL0>; mediatek,pull-up-adv = <3>; @@ -539,7 +539,7 @@ }; i2c1_pins: i2c1 { - pins_bus { + pins-bus { pinmux = <PINMUX_GPIO81__FUNC_SDA1>, <PINMUX_GPIO84__FUNC_SCL1>; mediatek,pull-up-adv = <3>; @@ -548,7 +548,7 @@ }; i2c2_pins: i2c2 { - pins_bus { + pins-bus { pinmux = <PINMUX_GPIO103__FUNC_SCL2>, <PINMUX_GPIO104__FUNC_SDA2>; bias-disable; @@ -557,7 +557,7 @@ }; i2c3_pins: i2c3 { - pins_bus { + pins-bus { pinmux = <PINMUX_GPIO50__FUNC_SCL3>, <PINMUX_GPIO51__FUNC_SDA3>; mediatek,pull-up-adv = <3>; @@ -566,7 +566,7 @@ }; i2c4_pins: i2c4 { - pins_bus { + pins-bus { pinmux = <PINMUX_GPIO105__FUNC_SCL4>, <PINMUX_GPIO106__FUNC_SDA4>; bias-disable; @@ -575,7 +575,7 @@ }; i2c5_pins: i2c5 { - pins_bus { + pins-bus { pinmux = <PINMUX_GPIO48__FUNC_SCL5>, <PINMUX_GPIO49__FUNC_SDA5>; mediatek,pull-up-adv = <3>; @@ -584,7 +584,7 @@ }; i2c6_pins: i2c6 { - pins_bus { + pins-bus { pinmux = <PINMUX_GPIO11__FUNC_SCL6>, <PINMUX_GPIO12__FUNC_SDA6>; bias-disable; @@ -592,7 +592,7 @@ }; mmc0_pins_default: mmc0-pins-default { - pins_cmd_dat { + pins-cmd-dat { pinmux = <PINMUX_GPIO123__FUNC_MSDC0_DAT0>, <PINMUX_GPIO128__FUNC_MSDC0_DAT1>, <PINMUX_GPIO125__FUNC_MSDC0_DAT2>, @@ -607,13 +607,13 @@ mediatek,pull-up-adv = <01>; }; - pins_clk { + pins-clk { pinmux = <PINMUX_GPIO124__FUNC_MSDC0_CLK>; drive-strength = <MTK_DRIVE_14mA>; mediatek,pull-down-adv = <10>; }; - pins_rst { + pins-rst { pinmux = <PINMUX_GPIO133__FUNC_MSDC0_RSTB>; drive-strength = <MTK_DRIVE_14mA>; mediatek,pull-down-adv = <01>; @@ -621,7 +621,7 @@ }; mmc0_pins_uhs: mmc0-pins-uhs { - pins_cmd_dat { + pins-cmd-dat { pinmux = <PINMUX_GPIO123__FUNC_MSDC0_DAT0>, <PINMUX_GPIO128__FUNC_MSDC0_DAT1>, <PINMUX_GPIO125__FUNC_MSDC0_DAT2>, @@ -636,19 +636,19 @@ mediatek,pull-up-adv = <01>; }; - pins_clk { + pins-clk { pinmux = <PINMUX_GPIO124__FUNC_MSDC0_CLK>; drive-strength = <MTK_DRIVE_14mA>; mediatek,pull-down-adv = <10>; }; - pins_ds { + pins-ds { pinmux = <PINMUX_GPIO131__FUNC_MSDC0_DSL>; drive-strength = <MTK_DRIVE_14mA>; mediatek,pull-down-adv = <10>; }; - pins_rst { + pins-rst { pinmux = <PINMUX_GPIO133__FUNC_MSDC0_RSTB>; drive-strength = <MTK_DRIVE_14mA>; mediatek,pull-up-adv = <01>; @@ -656,7 +656,7 @@ }; mmc1_pins_default: mmc1-pins-default { - pins_cmd_dat { + pins-cmd-dat { pinmux = <PINMUX_GPIO31__FUNC_MSDC1_CMD>, <PINMUX_GPIO32__FUNC_MSDC1_DAT0>, <PINMUX_GPIO34__FUNC_MSDC1_DAT1>, @@ -666,7 +666,7 @@ mediatek,pull-up-adv = <10>; }; - pins_clk { + pins-clk { pinmux = <PINMUX_GPIO29__FUNC_MSDC1_CLK>; input-enable; mediatek,pull-down-adv = <10>; @@ -674,7 +674,7 @@ }; mmc1_pins_uhs: mmc1-pins-uhs { - pins_cmd_dat { + pins-cmd-dat { pinmux = <PINMUX_GPIO31__FUNC_MSDC1_CMD>, <PINMUX_GPIO32__FUNC_MSDC1_DAT0>, <PINMUX_GPIO34__FUNC_MSDC1_DAT1>, @@ -685,7 +685,7 @@ mediatek,pull-up-adv = <10>; }; - pins_clk { + pins-clk { pinmux = <PINMUX_GPIO29__FUNC_MSDC1_CLK>; drive-strength = <MTK_DRIVE_8mA>; mediatek,pull-down-adv = <10>; @@ -693,15 +693,15 @@ }; }; - panel_pins_default: panel_pins_default { - panel_reset { + panel_pins_default: panel-pins-default { + panel-reset { pinmux = <PINMUX_GPIO45__FUNC_GPIO45>; output-low; bias-pull-up; }; }; - pwm0_pin_default: pwm0_pin_default { + pwm0_pin_default: pwm0-pin-default { pins1 { pinmux = <PINMUX_GPIO176__FUNC_GPIO176>; output-high; @@ -713,14 +713,14 @@ }; scp_pins: scp { - pins_scp_uart { + pins-scp-uart { pinmux = <PINMUX_GPIO110__FUNC_TP_URXD1_AO>, <PINMUX_GPIO112__FUNC_TP_UTXD1_AO>; }; }; spi0_pins: spi0 { - pins_spi { + pins-spi { pinmux = <PINMUX_GPIO85__FUNC_SPI0_MI>, <PINMUX_GPIO86__FUNC_GPIO86>, <PINMUX_GPIO87__FUNC_SPI0_MO>, @@ -730,7 +730,7 @@ }; spi1_pins: spi1 { - pins_spi { + pins-spi { pinmux = <PINMUX_GPIO161__FUNC_SPI1_A_MI>, <PINMUX_GPIO162__FUNC_SPI1_A_CSB>, <PINMUX_GPIO163__FUNC_SPI1_A_MO>, @@ -740,20 +740,20 @@ }; spi2_pins: spi2 { - pins_spi { + pins-spi { pinmux = <PINMUX_GPIO0__FUNC_SPI2_CSB>, <PINMUX_GPIO1__FUNC_SPI2_MO>, <PINMUX_GPIO2__FUNC_SPI2_CLK>; bias-disable; }; - pins_spi_mi { + pins-spi-mi { pinmux = <PINMUX_GPIO94__FUNC_SPI2_MI>; mediatek,pull-down-adv = <00>; }; }; spi3_pins: spi3 { - pins_spi { + pins-spi { pinmux = <PINMUX_GPIO21__FUNC_SPI3_MI>, <PINMUX_GPIO22__FUNC_SPI3_CSB>, <PINMUX_GPIO23__FUNC_SPI3_MO>, @@ -763,7 +763,7 @@ }; spi4_pins: spi4 { - pins_spi { + pins-spi { pinmux = <PINMUX_GPIO17__FUNC_SPI4_MI>, <PINMUX_GPIO18__FUNC_SPI4_CSB>, <PINMUX_GPIO19__FUNC_SPI4_MO>, @@ -773,7 +773,7 @@ }; spi5_pins: spi5 { - pins_spi { + pins-spi { pinmux = <PINMUX_GPIO13__FUNC_SPI5_MI>, <PINMUX_GPIO14__FUNC_SPI5_CSB>, <PINMUX_GPIO15__FUNC_SPI5_MO>, @@ -783,63 +783,63 @@ }; uart0_pins_default: uart0-pins-default { - pins_rx { + pins-rx { pinmux = <PINMUX_GPIO95__FUNC_URXD0>; input-enable; bias-pull-up; }; - pins_tx { + pins-tx { pinmux = <PINMUX_GPIO96__FUNC_UTXD0>; }; }; uart1_pins_default: uart1-pins-default { - pins_rx { + pins-rx { pinmux = <PINMUX_GPIO121__FUNC_URXD1>; input-enable; bias-pull-up; }; - pins_tx { + pins-tx { pinmux = <PINMUX_GPIO115__FUNC_UTXD1>; }; - pins_rts { + pins-rts { pinmux = <PINMUX_GPIO47__FUNC_URTS1>; output-enable; }; - pins_cts { + pins-cts { pinmux = <PINMUX_GPIO46__FUNC_UCTS1>; input-enable; }; }; uart1_pins_sleep: uart1-pins-sleep { - pins_rx { + pins-rx { pinmux = <PINMUX_GPIO121__FUNC_GPIO121>; input-enable; bias-pull-up; }; - pins_tx { + pins-tx { pinmux = <PINMUX_GPIO115__FUNC_UTXD1>; }; - pins_rts { + pins-rts { pinmux = <PINMUX_GPIO47__FUNC_URTS1>; output-enable; }; - pins_cts { + pins-cts { pinmux = <PINMUX_GPIO46__FUNC_UCTS1>; input-enable; }; }; wifi_pins_pwrseq: wifi-pins-pwrseq { - pins_wifi_enable { + pins-wifi-enable { pinmux = <PINMUX_GPIO119__FUNC_GPIO119>; output-low; }; }; wifi_pins_wakeup: wifi-pins-wakeup { - pins_wifi_wakeup { + pins-wifi-wakeup { pinmux = <PINMUX_GPIO113__FUNC_GPIO113>; input-enable; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi index 5169779d01df..976dc968b3ca 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi @@ -1210,127 +1210,6 @@ nvmem-cell-names = "calibration-data"; }; - thermal_zones: thermal-zones { - cpu_thermal: cpu-thermal { - polling-delay-passive = <100>; - polling-delay = <500>; - thermal-sensors = <&thermal 0>; - sustainable-power = <5000>; - - trips { - threshold: trip-point0 { - temperature = <68000>; - hysteresis = <2000>; - type = "passive"; - }; - - target: trip-point1 { - temperature = <80000>; - hysteresis = <2000>; - type = "passive"; - }; - - cpu_crit: cpu-crit { - temperature = <115000>; - hysteresis = <2000>; - type = "critical"; - }; - }; - - cooling-maps { - map0 { - trip = <&target>; - cooling-device = <&cpu0 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu1 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu2 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu3 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>; - contribution = <3072>; - }; - map1 { - trip = <&target>; - cooling-device = <&cpu4 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu5 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu6 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu7 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>; - contribution = <1024>; - }; - }; - }; - - /* The tzts1 ~ tzts6 don't need to polling */ - /* The tzts1 ~ tzts6 don't need to thermal throttle */ - - tzts1: tzts1 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 1>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tzts2: tzts2 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 2>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tzts3: tzts3 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 3>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tzts4: tzts4 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 4>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tzts5: tzts5 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 5>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tztsABB: tztsABB { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 6>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - }; - pwm0: pwm@1100e000 { compatible = "mediatek,mt8183-disp-pwm"; reg = <0 0x1100e000 0 0x1000>; @@ -2105,4 +1984,125 @@ power-domains = <&spm MT8183_POWER_DOMAIN_CAM>; }; }; + + thermal_zones: thermal-zones { + cpu_thermal: cpu-thermal { + polling-delay-passive = <100>; + polling-delay = <500>; + thermal-sensors = <&thermal 0>; + sustainable-power = <5000>; + + trips { + threshold: trip-point0 { + temperature = <68000>; + hysteresis = <2000>; + type = "passive"; + }; + + target: trip-point1 { + temperature = <80000>; + hysteresis = <2000>; + type = "passive"; + }; + + cpu_crit: cpu-crit { + temperature = <115000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + + cooling-maps { + map0 { + trip = <&target>; + cooling-device = <&cpu0 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu1 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu2 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu3 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + contribution = <3072>; + }; + map1 { + trip = <&target>; + cooling-device = <&cpu4 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu5 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu6 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu7 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + contribution = <1024>; + }; + }; + }; + + /* The tzts1 ~ tzts6 don't need to polling */ + /* The tzts1 ~ tzts6 don't need to thermal throttle */ + + tzts1: tzts1 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 1>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tzts2: tzts2 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 2>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tzts3: tzts3 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 3>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tzts4: tzts4 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 4>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tzts5: tzts5 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 5>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tztsABB: tztsABB { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 6>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + }; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8186.dtsi b/arch/arm64/boot/dts/mediatek/mt8186.dtsi index f04ae70c470a..df0c04f2ba1d 100644 --- a/arch/arm64/boot/dts/mediatek/mt8186.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8186.dtsi @@ -924,7 +924,8 @@ reg = <MT8186_POWER_DOMAIN_CSIRX_TOP>; clocks = <&topckgen CLK_TOP_SENINF>, <&topckgen CLK_TOP_SENINF1>; - clock-names = "csirx_top0", "csirx_top1"; + clock-names = "subsys-csirx-top0", + "subsys-csirx-top1"; #power-domain-cells = <0>; }; @@ -942,7 +943,8 @@ reg = <MT8186_POWER_DOMAIN_ADSP_AO>; clocks = <&topckgen CLK_TOP_AUDIODSP>, <&topckgen CLK_TOP_ADSP_BUS>; - clock-names = "audioadsp", "adsp_bus"; + clock-names = "audioadsp", + "subsys-adsp-bus"; #address-cells = <1>; #size-cells = <0>; #power-domain-cells = <1>; @@ -975,8 +977,11 @@ <&mmsys CLK_MM_SMI_COMMON>, <&mmsys CLK_MM_SMI_GALS>, <&mmsys CLK_MM_SMI_IOMMU>; - clock-names = "disp", "mdp", "smi_infra", "smi_common", - "smi_gals", "smi_iommu"; + clock-names = "disp", "mdp", + "subsys-smi-infra", + "subsys-smi-common", + "subsys-smi-gals", + "subsys-smi-iommu"; mediatek,infracfg = <&infracfg_ao>; #address-cells = <1>; #size-cells = <0>; @@ -993,15 +998,17 @@ power-domain@MT8186_POWER_DOMAIN_CAM { reg = <MT8186_POWER_DOMAIN_CAM>; - clocks = <&topckgen CLK_TOP_CAM>, - <&topckgen CLK_TOP_SENINF>, + clocks = <&topckgen CLK_TOP_SENINF>, <&topckgen CLK_TOP_SENINF1>, <&topckgen CLK_TOP_SENINF2>, <&topckgen CLK_TOP_SENINF3>, + <&camsys CLK_CAM2MM_GALS>, <&topckgen CLK_TOP_CAMTM>, - <&camsys CLK_CAM2MM_GALS>; - clock-names = "cam-top", "cam0", "cam1", "cam2", - "cam3", "cam-tm", "gals"; + <&topckgen CLK_TOP_CAM>; + clock-names = "cam0", "cam1", "cam2", + "cam3", "gals", + "subsys-cam-tm", + "subsys-cam-top"; mediatek,infracfg = <&infracfg_ao>; #address-cells = <1>; #size-cells = <0>; @@ -1020,9 +1027,9 @@ power-domain@MT8186_POWER_DOMAIN_IMG { reg = <MT8186_POWER_DOMAIN_IMG>; - clocks = <&topckgen CLK_TOP_IMG1>, - <&imgsys1 CLK_IMG1_GALS_IMG1>; - clock-names = "img-top", "gals"; + clocks = <&imgsys1 CLK_IMG1_GALS_IMG1>, + <&topckgen CLK_TOP_IMG1>; + clock-names = "gals", "subsys-img-top"; mediatek,infracfg = <&infracfg_ao>; #address-cells = <1>; #size-cells = <0>; @@ -1041,8 +1048,11 @@ <&ipesys CLK_IPE_LARB20>, <&ipesys CLK_IPE_SMI_SUBCOM>, <&ipesys CLK_IPE_GALS_IPE>; - clock-names = "ipe-top", "ipe-larb0", "ipe-larb1", - "ipe-smi", "ipe-gals"; + clock-names = "subsys-ipe-top", + "subsys-ipe-larb0", + "subsys-ipe-larb1", + "subsys-ipe-smi", + "subsys-ipe-gals"; mediatek,infracfg = <&infracfg_ao>; #power-domain-cells = <0>; }; @@ -1061,7 +1071,9 @@ clocks = <&topckgen CLK_TOP_WPE>, <&wpesys CLK_WPE_SMI_LARB8_CK_EN>, <&wpesys CLK_WPE_SMI_LARB8_PCLK_EN>; - clock-names = "wpe0", "larb-ck", "larb-pclk"; + clock-names = "wpe0", + "subsys-larb-ck", + "subsys-larb-pclk"; mediatek,infracfg = <&infracfg_ao>; #power-domain-cells = <0>; }; @@ -1656,7 +1668,7 @@ #address-cells = <1>; #size-cells = <1>; - gpu_speedbin: gpu-speed-bin@59c { + gpu_speedbin: gpu-speedbin@59c { reg = <0x59c 0x4>; bits = <0 3>; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi index dd5b89b73190..5a7cab489ff3 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi @@ -389,7 +389,7 @@ pinctrl-0 = <&i2c7_pins>; pmic@34 { - #interrupt-cells = <1>; + #interrupt-cells = <2>; compatible = "mediatek,mt6360"; reg = <0x34>; interrupt-controller; diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index 54c674c45b49..e0ac2e9f5b72 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -627,6 +627,8 @@ power-domain@MT8195_POWER_DOMAIN_VENC_CORE1 { reg = <MT8195_POWER_DOMAIN_VENC_CORE1>; + clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>; + clock-names = "venc1-larb"; mediatek,infracfg = <&infracfg_ao>; #power-domain-cells = <0>; }; @@ -689,6 +691,8 @@ power-domain@MT8195_POWER_DOMAIN_VENC { reg = <MT8195_POWER_DOMAIN_VENC>; + clocks = <&vencsys CLK_VENC_LARB>; + clock-names = "venc0-larb"; mediatek,infracfg = <&infracfg_ao>; #power-domain-cells = <0>; }; @@ -2665,7 +2669,7 @@ reg = <0 0x1b010000 0 0x1000>; mediatek,larb-id = <20>; mediatek,smi = <&smi_common_vpp>; - clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>, + clocks = <&vencsys_core1 CLK_VENC_CORE1_VENC>, <&vencsys_core1 CLK_VENC_CORE1_GALS>, <&vppsys0 CLK_VPP0_GALS_VDO0_VDO1_VENCSYS_CORE1>; clock-names = "apb", "smi", "gals"; diff --git a/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts b/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts index 70b465f7c6a7..00ac59a873e8 100644 --- a/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts +++ b/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts @@ -238,6 +238,7 @@ mt6360: pmic@34 { compatible = "mediatek,mt6360"; reg = <0x34>; + interrupt-parent = <&pio>; interrupts = <128 IRQ_TYPE_EDGE_FALLING>; interrupt-names = "IRQB"; interrupt-controller; diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts index de0a1f2af983..7d4c5324c61b 100644 --- a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts @@ -86,7 +86,7 @@ sgtl5000_clk: sgtl5000-oscillator { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <24576000>; + clock-frequency = <24576000>; }; dc_12v: dc-12v-regulator { diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index e729e7a22b23..cc8209795c3e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -668,7 +668,7 @@ vdec: video-codec@ff360000 { compatible = "rockchip,rk3328-vdec", "rockchip,rk3399-vdec"; - reg = <0x0 0xff360000 0x0 0x400>; + reg = <0x0 0xff360000 0x0 0x480>; interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru ACLK_RKVDEC>, <&cru HCLK_RKVDEC>, <&cru SCLK_VDEC_CABAC>, <&cru SCLK_VDEC_CORE>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi index 5c1929d41cc0..cacbad35cfc8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi @@ -509,8 +509,7 @@ ap_i2c_tp: &i2c5 { &pci_rootport { mvl_wifi: wifi@0,0 { compatible = "pci1b4b,2b42"; - reg = <0x83010000 0x0 0x00000000 0x0 0x00100000 - 0x83010000 0x0 0x00100000 0x0 0x00100000>; + reg = <0x0000 0x0 0x0 0x0 0x0>; interrupt-parent = <&gpio0>; interrupts = <8 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts index 853e88455e75..9e4b12ed62cb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts @@ -34,8 +34,8 @@ &pci_rootport { wifi@0,0 { compatible = "qcom,ath10k"; - reg = <0x00010000 0x0 0x00000000 0x0 0x00000000>, - <0x03010010 0x0 0x00000000 0x0 0x00200000>; + reg = <0x00000000 0x0 0x00000000 0x0 0x00000000>, + <0x03000010 0x0 0x00000000 0x0 0x00200000>; qcom,ath10k-calibration-variant = "GO_DUMO"; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi index c9bf1d5c3a42..789fd0dcc88b 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi @@ -489,6 +489,7 @@ ap_i2c_audio: &i2c8 { #address-cells = <3>; #size-cells = <2>; ranges; + device_type = "pci"; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index faf02e59d6c7..da0dfb237f85 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1109,7 +1109,9 @@ power-domain@RK3399_PD_VDU { reg = <RK3399_PD_VDU>; clocks = <&cru ACLK_VDU>, - <&cru HCLK_VDU>; + <&cru HCLK_VDU>, + <&cru SCLK_VDU_CA>, + <&cru SCLK_VDU_CORE>; pm_qos = <&qos_video_m1_r>, <&qos_video_m1_w>; #power-domain-cells = <0>; @@ -1384,7 +1386,7 @@ vdec: video-codec@ff660000 { compatible = "rockchip,rk3399-vdec"; - reg = <0x0 0xff660000 0x0 0x400>; + reg = <0x0 0xff660000 0x0 0x480>; interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH 0>; clocks = <&cru ACLK_VDU>, <&cru HCLK_VDU>, <&cru SCLK_VDU_CA>, <&cru SCLK_VDU_CORE>; diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi index 0964761e3ce9..c19c0f1b3778 100644 --- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi @@ -977,7 +977,7 @@ <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 71 IRQ_TYPE_LEVEL_HIGH>; - interrupt-names = "sys", "pmc", "msi", "legacy", "err"; + interrupt-names = "sys", "pmc", "msg", "legacy", "err"; bus-range = <0x0 0xf>; clocks = <&cru ACLK_PCIE20_MST>, <&cru ACLK_PCIE20_SLV>, <&cru ACLK_PCIE20_DBI>, <&cru PCLK_PCIE20>, diff --git a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi index 9570b34aca2e..d88c0e852356 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi @@ -235,13 +235,13 @@ &pinctrl { fan { fan_int: fan-int { - rockchip,pins = <0 RK_PA4 RK_FUNC_GPIO &pcfg_pull_none>; + rockchip,pins = <0 RK_PA4 RK_FUNC_GPIO &pcfg_pull_up>; }; }; hym8563 { hym8563_int: hym8563-int { - rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>; + rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_up>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts index 8f399c4317bd..e3a839a12dc6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts @@ -38,7 +38,7 @@ leds { compatible = "gpio-leds"; pinctrl-names = "default"; - pinctrl-0 =<&leds_gpio>; + pinctrl-0 = <&leds_gpio>; led-1 { gpios = <&gpio1 RK_PA2 GPIO_ACTIVE_HIGH>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi index 63151d9d2377..30db12c4fc82 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi @@ -369,7 +369,7 @@ emmc_data_strobe: emmc-data-strobe { rockchip,pins = /* emmc_data_strobe */ - <2 RK_PA2 1 &pcfg_pull_none>; + <2 RK_PA2 1 &pcfg_pull_down>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi index 7064c0e9179f..8aa0499f9b03 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi @@ -1362,7 +1362,6 @@ <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH 0>, <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH 0>, <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH 0>; - interrupt-names = "ch0", "ch1", "ch2", "ch3"; rockchip,pmu = <&pmu1grf>; }; diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h index d977713ec0ba..abb57bc54305 100644 --- a/arch/arm64/include/asm/syscall_wrapper.h +++ b/arch/arm64/include/asm/syscall_wrapper.h @@ -44,9 +44,6 @@ return sys_ni_syscall(); \ } -#define COMPAT_SYS_NI(name) \ - SYSCALL_ALIAS(__arm64_compat_sys_##name, sys_ni_posix_timers); - #endif /* CONFIG_COMPAT */ #define __SYSCALL_DEFINEx(x, name, ...) \ @@ -81,6 +78,5 @@ } asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused); -#define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers); #endif /* __ASM_SYSCALL_WRAPPER_H */ diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 9f7c1bf99526..8a191423c316 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -919,6 +919,10 @@ __SYSCALL(__NR_futex_wake, sys_futex_wake) __SYSCALL(__NR_futex_wait, sys_futex_wait) #define __NR_futex_requeue 456 __SYSCALL(__NR_futex_requeue, sys_futex_requeue) +#define __NR_statmount 457 +__SYSCALL(__NR_statmount, sys_statmount) +#define __NR_listmount 458 +__SYSCALL(__NR_listmount, sys_listmount) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e5f75f1f1085..4796104c4471 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -410,7 +410,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); kvm_timer_vcpu_terminate(vcpu); kvm_pmu_vcpu_destroy(vcpu); - + kvm_vgic_vcpu_destroy(vcpu); kvm_arm_vcpu_destroy(vcpu); } diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index c8c3cb812783..e949e1d0fd9f 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -368,7 +368,7 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm) vgic_v4_teardown(kvm); } -void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) +static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -379,29 +379,39 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) vgic_flush_pending_lpis(vcpu); INIT_LIST_HEAD(&vgic_cpu->ap_list_head); - vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + vgic_unregister_redist_iodev(vcpu); + vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; + } } -static void __kvm_vgic_destroy(struct kvm *kvm) +void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + mutex_lock(&kvm->slots_lock); + __kvm_vgic_vcpu_destroy(vcpu); + mutex_unlock(&kvm->slots_lock); +} + +void kvm_vgic_destroy(struct kvm *kvm) { struct kvm_vcpu *vcpu; unsigned long i; - lockdep_assert_held(&kvm->arch.config_lock); + mutex_lock(&kvm->slots_lock); vgic_debug_destroy(kvm); kvm_for_each_vcpu(i, vcpu, kvm) - kvm_vgic_vcpu_destroy(vcpu); + __kvm_vgic_vcpu_destroy(vcpu); + + mutex_lock(&kvm->arch.config_lock); kvm_vgic_dist_destroy(kvm); -} -void kvm_vgic_destroy(struct kvm *kvm) -{ - mutex_lock(&kvm->arch.config_lock); - __kvm_vgic_destroy(kvm); mutex_unlock(&kvm->arch.config_lock); + mutex_unlock(&kvm->slots_lock); } /** @@ -469,25 +479,26 @@ int kvm_vgic_map_resources(struct kvm *kvm) type = VGIC_V3; } - if (ret) { - __kvm_vgic_destroy(kvm); + if (ret) goto out; - } + dist->ready = true; dist_base = dist->vgic_dist_base; mutex_unlock(&kvm->arch.config_lock); ret = vgic_register_dist_iodev(kvm, dist_base, type); - if (ret) { + if (ret) kvm_err("Unable to register VGIC dist MMIO regions\n"); - kvm_vgic_destroy(kvm); - } - mutex_unlock(&kvm->slots_lock); - return ret; + goto out_slots; out: mutex_unlock(&kvm->arch.config_lock); +out_slots: mutex_unlock(&kvm->slots_lock); + + if (ret) + kvm_vgic_destroy(kvm); + return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 89117ba2528a..a764b0ab8bf9 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -820,7 +820,7 @@ out_unlock: return ret; } -static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) +void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) { struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; @@ -833,6 +833,8 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm) unsigned long c; int ret = 0; + lockdep_assert_held(&kvm->slots_lock); + kvm_for_each_vcpu(c, vcpu, kvm) { ret = vgic_register_redist_iodev(vcpu); if (ret) diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index 339a55194b2c..74a67ad87f29 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -436,6 +436,10 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, if (ret) goto out; + /* Silently exit if the vLPI is already mapped */ + if (irq->hw) + goto out; + /* * Emit the mapping request. If it fails, the ITS probably * isn't v4 compatible, so let's silently bail out. Holding diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 0ab09b0d4440..8d134569d0a1 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -241,6 +241,7 @@ int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq); int vgic_v3_save_pending_tables(struct kvm *kvm); int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count); int vgic_register_redist_iodev(struct kvm_vcpu *vcpu); +void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu); bool vgic_v3_check_base(struct kvm *kvm); void vgic_v3_load(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 460d799e1296..55f6455a8284 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -607,6 +607,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, goto done; } count_vm_vma_lock_event(VMA_LOCK_RETRY); + if (fault & VM_FAULT_MAJOR) + mm_flags |= FAULT_FLAG_TRIED; /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 204b94b2e6aa..4ba8d67ddb09 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -83,7 +83,7 @@ endif ifeq ($(CONFIG_RELOCATABLE),y) KBUILD_CFLAGS_KERNEL += -fPIE -LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext +LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext $(call ld-option, --apply-dynamic-relocs) endif cflags-y += $(call cc-option, -mno-check-zero-division) diff --git a/arch/loongarch/include/asm/efi.h b/arch/loongarch/include/asm/efi.h index 091897d40b03..91d81f9730ab 100644 --- a/arch/loongarch/include/asm/efi.h +++ b/arch/loongarch/include/asm/efi.h @@ -32,6 +32,6 @@ static inline unsigned long efi_get_kimg_min_align(void) #define EFI_KIMG_PREFERRED_ADDRESS PHYSADDR(VMLINUX_LOAD_ADDRESS) -unsigned long kernel_entry_address(void); +unsigned long kernel_entry_address(unsigned long kernel_addr); #endif /* _ASM_LOONGARCH_EFI_H */ diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h index b9a4ab54285c..9b16a3b8e706 100644 --- a/arch/loongarch/include/asm/elf.h +++ b/arch/loongarch/include/asm/elf.h @@ -293,7 +293,7 @@ extern const char *__elf_platform; #define ELF_PLAT_INIT(_r, load_addr) do { \ _r->regs[1] = _r->regs[2] = _r->regs[3] = _r->regs[4] = 0; \ _r->regs[5] = _r->regs[6] = _r->regs[7] = _r->regs[8] = 0; \ - _r->regs[9] = _r->regs[10] = _r->regs[11] = _r->regs[12] = 0; \ + _r->regs[9] = _r->regs[10] /* syscall n */ = _r->regs[12] = 0; \ _r->regs[13] = _r->regs[14] = _r->regs[15] = _r->regs[16] = 0; \ _r->regs[17] = _r->regs[18] = _r->regs[19] = _r->regs[20] = 0; \ _r->regs[21] = _r->regs[22] = _r->regs[23] = _r->regs[24] = 0; \ diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 9b4957cefa8a..46366e783c84 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -1098,12 +1098,11 @@ static __always_inline u64 drdtime(void) { - int rID = 0; u64 val = 0; __asm__ __volatile__( - "rdtime.d %0, %1 \n\t" - : "=r"(val), "=r"(rID) + "rdtime.d %0, $zero\n\t" + : "=r"(val) : ); return val; diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 4fcc168f0732..3c808c680370 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -57,7 +57,7 @@ obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o obj-$(CONFIG_RELOCATABLE) += relocate.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c index 92270f14db94..f623feb2129f 100644 --- a/arch/loongarch/kernel/stacktrace.c +++ b/arch/loongarch/kernel/stacktrace.c @@ -32,7 +32,7 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, } for (unwind_start(&state, task, regs); - !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) { + !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); if (!addr || !consume_entry(cookie, addr)) break; diff --git a/arch/loongarch/kernel/unwind.c b/arch/loongarch/kernel/unwind.c index ba324ba76fa1..a463d6961344 100644 --- a/arch/loongarch/kernel/unwind.c +++ b/arch/loongarch/kernel/unwind.c @@ -28,6 +28,5 @@ bool default_next_frame(struct unwind_state *state) } while (!get_stack_info(state->sp, state->task, info)); - state->error = true; return false; } diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index 55afc27320e1..929ae240280a 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -227,7 +227,7 @@ static bool next_frame(struct unwind_state *state) } while (!get_stack_info(state->sp, state->task, info)); out: - state->error = true; + state->stack_info.type = STACK_TYPE_UNKNOWN; return false; } diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 169ff8b3915e..4fcd6cd6da23 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -480,10 +480,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext case 8: move_reg(ctx, t1, src); emit_insn(ctx, extwb, dst, t1); + emit_zext_32(ctx, dst, is32); break; case 16: move_reg(ctx, t1, src); emit_insn(ctx, extwh, dst, t1); + emit_zext_32(ctx, dst, is32); break; case 32: emit_insn(ctx, addw, dst, src, LOONGARCH_GPR_ZERO); @@ -772,8 +774,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext break; case 32: emit_insn(ctx, revb2w, dst, dst); - /* zero-extend 32 bits into 64 bits */ - emit_zext_32(ctx, dst, is32); + /* clear the upper 32 bits */ + emit_zext_32(ctx, dst, true); break; case 64: emit_insn(ctx, revbd, dst, dst); @@ -911,8 +913,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext /* function return */ case BPF_JMP | BPF_EXIT: - emit_sext_32(ctx, regmap[BPF_REG_0], true); - if (i == ctx->prog->len - 1) break; @@ -988,14 +988,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext } break; case BPF_DW: - if (is_signed_imm12(off)) { - emit_insn(ctx, ldd, dst, src, off); - } else if (is_signed_imm14(off)) { - emit_insn(ctx, ldptrd, dst, src, off); - } else { - move_imm(ctx, t1, off, is32); - emit_insn(ctx, ldxd, dst, src, t1); - } + move_imm(ctx, t1, off, is32); + emit_insn(ctx, ldxd, dst, src, t1); break; } diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 7e6b74b6eecd..b4d71fea558f 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -453,6 +453,7 @@ CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_BCACHEFS_FS=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_AUTOFS_FS=m @@ -550,7 +551,6 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 0b403e2efcd5..682d8cd3dd3c 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -410,6 +410,7 @@ CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_BCACHEFS_FS=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_AUTOFS_FS=m @@ -507,7 +508,6 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 57aac3f4b001..15259ced8463 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -430,6 +430,7 @@ CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_BCACHEFS_FS=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_AUTOFS_FS=m @@ -527,7 +528,6 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 3c160636a2e9..7395c12caef6 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -402,6 +402,7 @@ CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_BCACHEFS_FS=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_AUTOFS_FS=m @@ -499,7 +500,6 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 23cf07c49d14..92506bc7f78d 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -412,6 +412,7 @@ CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_BCACHEFS_FS=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_AUTOFS_FS=m @@ -509,7 +510,6 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 619a0d93ce5b..144bc8c0d8b5 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -269,9 +269,6 @@ CONFIG_RDS_TCP=m CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m -CONFIG_DEV_APPLETALK=m -CONFIG_IPDDP=m -CONFIG_IPDDP_ENCAP=y CONFIG_6LOWPAN=m CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m CONFIG_6LOWPAN_GHC_UDP=m @@ -432,6 +429,7 @@ CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_BCACHEFS_FS=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_AUTOFS_FS=m @@ -529,7 +527,6 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index d9430bc2b2de..07594c729497 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -289,9 +289,6 @@ CONFIG_RDS_TCP=m CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m -CONFIG_DEV_APPLETALK=m -CONFIG_IPDDP=m -CONFIG_IPDDP_ENCAP=y CONFIG_6LOWPAN=m CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m CONFIG_6LOWPAN_GHC_UDP=m @@ -518,6 +515,7 @@ CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_BCACHEFS_FS=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_AUTOFS_FS=m @@ -615,7 +613,6 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index eb6132f29bf5..c34de6c1de20 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -401,6 +401,7 @@ CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_BCACHEFS_FS=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_AUTOFS_FS=m @@ -498,7 +499,6 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index d0bad674cbb7..83bc029d1f33 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -402,6 +402,7 @@ CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_BCACHEFS_FS=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_AUTOFS_FS=m @@ -499,7 +500,6 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index dad6bcfcaeed..4f551dac2ed7 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -419,6 +419,7 @@ CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_BCACHEFS_FS=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_AUTOFS_FS=m @@ -516,7 +517,6 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index eb1b489b3139..b1bf01182bd3 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -400,6 +400,7 @@ CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_BCACHEFS_FS=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_AUTOFS_FS=m @@ -497,7 +498,6 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 939589826546..5c9a3f71f036 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -400,6 +400,7 @@ CONFIG_REISERFS_FS=m CONFIG_JFS_FS=m CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set +CONFIG_BCACHEFS_FS=m CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_AUTOFS_FS=m @@ -497,7 +498,6 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m diff --git a/arch/m68k/include/asm/kexec.h b/arch/m68k/include/asm/kexec.h index f5a8b2defa4b..3b0b64f0a353 100644 --- a/arch/m68k/include/asm/kexec.h +++ b/arch/m68k/include/asm/kexec.h @@ -2,7 +2,7 @@ #ifndef _ASM_M68K_KEXEC_H #define _ASM_M68K_KEXEC_H -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* Maximum physical address we can use pages from */ #define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) @@ -25,6 +25,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs, #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ #endif /* _ASM_M68K_KEXEC_H */ diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile index 01fb69a5095f..f335bf3268a1 100644 --- a/arch/m68k/kernel/Makefile +++ b/arch/m68k/kernel/Makefile @@ -25,7 +25,7 @@ obj-$(CONFIG_PCI) += pcibios.o obj-$(CONFIG_M68K_NONCOHERENT_DMA) += dma.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_BOOTINFO_PROC) += bootinfo_proc.o obj-$(CONFIG_UBOOT) += uboot.o diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 7a4b780e82cb..37db1a810b67 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -456,3 +456,5 @@ 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue +457 common statmount sys_statmount +458 common listmount sys_listmount diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 5b6a0b02b7de..07fff5ad1c9c 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -462,3 +462,5 @@ 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue +457 common statmount sys_statmount +458 common listmount sys_listmount diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 76db82542519..797ae590ebdb 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -460,6 +460,7 @@ config MACH_LOONGSON2EF config MACH_LOONGSON64 bool "Loongson 64-bit family of machines" + select ARCH_DMA_DEFAULT_COHERENT select ARCH_SPARSEMEM_ENABLE select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO @@ -1251,6 +1252,7 @@ config CPU_LOONGSON64 select CPU_SUPPORTS_MSA select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT select CPU_MIPSR2_IRQ_VI + select DMA_NONCOHERENT select WEAK_ORDERING select WEAK_REORDERING_BEYOND_LLSC select MIPS_ASID_BITS_VARIABLE diff --git a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi index f878f47e4501..ee3e2153dd13 100644 --- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi +++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi @@ -130,8 +130,7 @@ compatible = "pci0014,7a03.0", "pci0014,7a03", "pciclass0c0320", - "pciclass0c03", - "loongson, pci-gmac"; + "pciclass0c03"; reg = <0x1800 0x0 0x0 0x0 0x0>; interrupts = <12 IRQ_TYPE_LEVEL_LOW>, diff --git a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi index 7c69e8245c2f..cce9428afc41 100644 --- a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi +++ b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi @@ -193,8 +193,7 @@ compatible = "pci0014,7a03.0", "pci0014,7a03", "pciclass020000", - "pciclass0200", - "loongson, pci-gmac"; + "pciclass0200"; reg = <0x1800 0x0 0x0 0x0 0x0>; interrupts = <12 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 33c09688210f..08ea2cde1eb5 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -422,7 +422,7 @@ static const struct plat_smp_ops octeon_smp_ops = { .cpu_disable = octeon_cpu_disable, .cpu_die = octeon_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; @@ -502,7 +502,7 @@ static const struct plat_smp_ops octeon_78xx_smp_ops = { .cpu_disable = octeon_cpu_disable, .cpu_die = octeon_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; diff --git a/arch/mips/include/asm/kexec.h b/arch/mips/include/asm/kexec.h index d6d5fa5cc31d..69e579e41e66 100644 --- a/arch/mips/include/asm/kexec.h +++ b/arch/mips/include/asm/kexec.h @@ -31,7 +31,7 @@ static inline void crash_setup_regs(struct pt_regs *newregs, prepare_frametrace(newregs); } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE struct kimage; extern unsigned long kexec_args[4]; extern int (*_machine_kexec_prepare)(struct kimage *); diff --git a/arch/mips/include/asm/mach-loongson64/boot_param.h b/arch/mips/include/asm/mach-loongson64/boot_param.h index 035b1a69e2d0..e007edd6b60a 100644 --- a/arch/mips/include/asm/mach-loongson64/boot_param.h +++ b/arch/mips/include/asm/mach-loongson64/boot_param.h @@ -14,7 +14,11 @@ #define ADAPTER_ROM 8 #define ACPI_TABLE 9 #define SMBIOS_TABLE 10 -#define MAX_MEMORY_TYPE 11 +#define UMA_VIDEO_RAM 11 +#define VUMA_VIDEO_RAM 12 +#define MAX_MEMORY_TYPE 13 + +#define MEM_SIZE_IS_IN_BYTES (1 << 31) #define LOONGSON3_BOOT_MEM_MAP_MAX 128 struct efi_memory_map_loongson { @@ -117,7 +121,8 @@ struct irq_source_routing_table { u64 pci_io_start_addr; u64 pci_io_end_addr; u64 pci_config_addr; - u32 dma_mask_bits; + u16 dma_mask_bits; + u16 dma_noncoherent; } __packed; struct interface_info { diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h index 5719ff49eff1..0c59e168f800 100644 --- a/arch/mips/include/asm/smp-ops.h +++ b/arch/mips/include/asm/smp-ops.h @@ -35,7 +35,7 @@ struct plat_smp_ops { void (*cpu_die)(unsigned int cpu); void (*cleanup_dead_cpu)(unsigned cpu); #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE void (*kexec_nonboot_cpu)(void); #endif }; diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index a40d8c0e4b87..901bc61fa7ae 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -93,7 +93,7 @@ static inline void __cpu_die(unsigned int cpu) extern void __noreturn play_dead(void); #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE static inline void kexec_nonboot_cpu(void) { extern const struct plat_smp_ops *mp_ops; /* private */ diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index 853a43ee4b44..ecf3278a32f7 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -90,7 +90,7 @@ obj-$(CONFIG_GPIO_TXX9) += gpio_txx9.o obj-$(CONFIG_RELOCATABLE) += relocate.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_EARLY_PRINTK_8250) += early_printk_8250.o diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 5387ed0a5186..b630604c577f 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -121,6 +121,19 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) /* Put the stack after the struct pt_regs. */ childksp = (unsigned long) childregs; p->thread.cp0_status = (read_c0_status() & ~(ST0_CU2|ST0_CU1)) | ST0_KERNEL_CUMASK; + + /* + * New tasks lose permission to use the fpu. This accelerates context + * switching for most programs since they don't use the fpu. + */ + clear_tsk_thread_flag(p, TIF_USEDFPU); + clear_tsk_thread_flag(p, TIF_USEDMSA); + clear_tsk_thread_flag(p, TIF_MSA_CTX_LIVE); + +#ifdef CONFIG_MIPS_MT_FPAFF + clear_tsk_thread_flag(p, TIF_FPUBOUND); +#endif /* CONFIG_MIPS_MT_FPAFF */ + if (unlikely(args->fn)) { /* kernel thread */ unsigned long status = p->thread.cp0_status; @@ -149,20 +162,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.reg29 = (unsigned long) childregs; p->thread.reg31 = (unsigned long) ret_from_fork; - /* - * New tasks lose permission to use the fpu. This accelerates context - * switching for most programs since they don't use the fpu. - */ childregs->cp0_status &= ~(ST0_CU2|ST0_CU1); - clear_tsk_thread_flag(p, TIF_USEDFPU); - clear_tsk_thread_flag(p, TIF_USEDMSA); - clear_tsk_thread_flag(p, TIF_MSA_CTX_LIVE); - -#ifdef CONFIG_MIPS_MT_FPAFF - clear_tsk_thread_flag(p, TIF_FPUBOUND); -#endif /* CONFIG_MIPS_MT_FPAFF */ - #ifdef CONFIG_MIPS_FP_SUPPORT atomic_set(&p->thread.bd_emu_frame, BD_EMUFRAME_NONE); #endif diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index c074ecce3fbf..b3dbf9ecb0d6 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -434,7 +434,7 @@ const struct plat_smp_ops bmips43xx_smp_ops = { .cpu_disable = bmips_cpu_disable, .cpu_die = bmips_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; @@ -451,7 +451,7 @@ const struct plat_smp_ops bmips5000_smp_ops = { .cpu_disable = bmips_cpu_disable, .cpu_die = bmips_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index dd55d59b88db..f6c37d407f36 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -392,7 +392,7 @@ static void cps_smp_finish(void) local_irq_enable(); } -#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC) +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC_CORE) enum cpu_death { CPU_DEATH_HALT, @@ -429,7 +429,7 @@ static void cps_shutdown_this_cpu(enum cpu_death death) } } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE static void cps_kexec_nonboot_cpu(void) { @@ -439,9 +439,9 @@ static void cps_kexec_nonboot_cpu(void) cps_shutdown_this_cpu(CPU_DEATH_POWER); } -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ -#endif /* CONFIG_HOTPLUG_CPU || CONFIG_KEXEC */ +#endif /* CONFIG_HOTPLUG_CPU || CONFIG_KEXEC_CORE */ #ifdef CONFIG_HOTPLUG_CPU @@ -610,7 +610,7 @@ static const struct plat_smp_ops cps_smp_ops = { .cpu_die = cps_cpu_die, .cleanup_dead_cpu = cps_cleanup_dead_cpu, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = cps_kexec_nonboot_cpu, #endif }; diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 8fbef537fb88..82e2e051b416 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -351,10 +351,11 @@ early_initcall(mips_smp_ipi_init); */ asmlinkage void start_secondary(void) { - unsigned int cpu; + unsigned int cpu = raw_smp_processor_id(); cpu_probe(); per_cpu_trap_init(false); + rcutree_report_cpu_starting(cpu); mips_clockevent_init(); mp_ops->init_secondary(); cpu_report(); @@ -366,7 +367,6 @@ asmlinkage void start_secondary(void) */ calibrate_delay(); - cpu = smp_processor_id(); cpu_data[cpu].udelay_val = loops_per_jiffy; set_cpu_sibling_map(cpu); diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index a842b41c8e06..134ea054b1c7 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -395,3 +395,5 @@ 454 n32 futex_wake sys_futex_wake 455 n32 futex_wait sys_futex_wait 456 n32 futex_requeue sys_futex_requeue +457 n32 statmount sys_statmount +458 n32 listmount sys_listmount diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 116ff501bf92..959a21664703 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -371,3 +371,5 @@ 454 n64 futex_wake sys_futex_wake 455 n64 futex_wait sys_futex_wait 456 n64 futex_requeue sys_futex_requeue +457 n64 statmount sys_statmount +458 n64 listmount sys_listmount diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 525cc54bc63b..e55bc1d4bf0f 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -444,3 +444,5 @@ 454 o32 futex_wake sys_futex_wake 455 o32 futex_wait sys_futex_wait 456 o32 futex_requeue sys_futex_requeue +457 o32 statmount sys_statmount +458 o32 listmount sys_listmount diff --git a/arch/mips/loongson64/env.c b/arch/mips/loongson64/env.c index c961e2999f15..ef3750a6ffac 100644 --- a/arch/mips/loongson64/env.c +++ b/arch/mips/loongson64/env.c @@ -13,6 +13,8 @@ * Copyright (C) 2009 Lemote Inc. * Author: Wu Zhangjin, wuzhangjin@gmail.com */ + +#include <linux/dma-map-ops.h> #include <linux/export.h> #include <linux/pci_ids.h> #include <asm/bootinfo.h> @@ -147,8 +149,14 @@ void __init prom_lefi_init_env(void) loongson_sysconf.dma_mask_bits = eirq_source->dma_mask_bits; if (loongson_sysconf.dma_mask_bits < 32 || - loongson_sysconf.dma_mask_bits > 64) + loongson_sysconf.dma_mask_bits > 64) { loongson_sysconf.dma_mask_bits = 32; + dma_default_coherent = true; + } else { + dma_default_coherent = !eirq_source->dma_noncoherent; + } + + pr_info("Firmware: Coherent DMA: %s\n", dma_default_coherent ? "on" : "off"); loongson_sysconf.restart_addr = boot_p->reset_system.ResetWarm; loongson_sysconf.poweroff_addr = boot_p->reset_system.Shutdown; diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c index ee8de1735b7c..f25caa6aa9d3 100644 --- a/arch/mips/loongson64/init.c +++ b/arch/mips/loongson64/init.c @@ -49,8 +49,7 @@ void virtual_early_config(void) void __init szmem(unsigned int node) { u32 i, mem_type; - static unsigned long num_physpages; - u64 node_id, node_psize, start_pfn, end_pfn, mem_start, mem_size; + phys_addr_t node_id, mem_start, mem_size; /* Otherwise come from DTB */ if (loongson_sysconf.fw_interface != LOONGSON_LEFI) @@ -64,30 +63,46 @@ void __init szmem(unsigned int node) mem_type = loongson_memmap->map[i].mem_type; mem_size = loongson_memmap->map[i].mem_size; - mem_start = loongson_memmap->map[i].mem_start; + + /* Memory size comes in MB if MEM_SIZE_IS_IN_BYTES not set */ + if (mem_size & MEM_SIZE_IS_IN_BYTES) + mem_size &= ~MEM_SIZE_IS_IN_BYTES; + else + mem_size = mem_size << 20; + + mem_start = (node_id << 44) | loongson_memmap->map[i].mem_start; switch (mem_type) { case SYSTEM_RAM_LOW: case SYSTEM_RAM_HIGH: - start_pfn = ((node_id << 44) + mem_start) >> PAGE_SHIFT; - node_psize = (mem_size << 20) >> PAGE_SHIFT; - end_pfn = start_pfn + node_psize; - num_physpages += node_psize; - pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n", - (u32)node_id, mem_type, mem_start, mem_size); - pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", - start_pfn, end_pfn, num_physpages); - memblock_add_node(PFN_PHYS(start_pfn), - PFN_PHYS(node_psize), node, + case UMA_VIDEO_RAM: + pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes usable\n", + (u32)node_id, mem_type, &mem_start, &mem_size); + memblock_add_node(mem_start, mem_size, node, MEMBLOCK_NONE); break; case SYSTEM_RAM_RESERVED: - pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n", - (u32)node_id, mem_type, mem_start, mem_size); - memblock_reserve(((node_id << 44) + mem_start), mem_size << 20); + case VIDEO_ROM: + case ADAPTER_ROM: + case ACPI_TABLE: + case SMBIOS_TABLE: + pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes reserved\n", + (u32)node_id, mem_type, &mem_start, &mem_size); + memblock_reserve(mem_start, mem_size); + break; + /* We should not reserve VUMA_VIDEO_RAM as it overlaps with MMIO */ + case VUMA_VIDEO_RAM: + default: + pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes unhandled\n", + (u32)node_id, mem_type, &mem_start, &mem_size); break; } } + + /* Reserve vgabios if it comes from firmware */ + if (loongson_sysconf.vgabios_addr) + memblock_reserve(virt_to_phys((void *)loongson_sysconf.vgabios_addr), + SZ_256K); } #ifndef CONFIG_NUMA diff --git a/arch/mips/loongson64/reset.c b/arch/mips/loongson64/reset.c index e420800043b0..e01c8d4a805a 100644 --- a/arch/mips/loongson64/reset.c +++ b/arch/mips/loongson64/reset.c @@ -53,7 +53,7 @@ static void loongson_halt(void) } } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* 0X80000000~0X80200000 is safe */ #define MAX_ARGS 64 @@ -158,7 +158,7 @@ static int __init mips_reboot_setup(void) _machine_halt = loongson_halt; pm_power_off = loongson_poweroff; -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE kexec_argv = kmalloc(KEXEC_ARGV_SIZE, GFP_KERNEL); if (WARN_ON(!kexec_argv)) return -ENOMEM; diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c index e015a26a40f7..498bdc1bb0ed 100644 --- a/arch/mips/loongson64/smp.c +++ b/arch/mips/loongson64/smp.c @@ -864,7 +864,7 @@ const struct plat_smp_ops loongson3_smp_ops = { .cpu_disable = loongson3_cpu_disable, .cpu_die = loongson3_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h index 1641ff9a8b83..833555f74ffa 100644 --- a/arch/parisc/include/asm/bug.h +++ b/arch/parisc/include/asm/bug.h @@ -71,7 +71,7 @@ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ "\t.pushsection __bug_table,\"a\"\n" \ - "\t.align %2\n" \ + "\t.align 4\n" \ "2:\t" __BUG_REL(1b) "\n" \ "\t.short %0\n" \ "\t.blockz %1-4-2\n" \ diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index a47798fed54e..9c84470c31c7 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -455,3 +455,5 @@ 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue +457 common statmount sys_statmount +458 common listmount sys_listmount diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 6f105ee4f3cf..76f05373361f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -189,6 +189,7 @@ config PPC select EDAC_ATOMIC_SCRUB select EDAC_SUPPORT select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if ARCH_USING_PATCHABLE_FUNCTION_ENTRY + select FUNCTION_ALIGNMENT_4B select GENERIC_ATOMIC64 if PPC32 select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_CMOS_UPDATE @@ -608,10 +609,10 @@ config ARCH_SUPPORTS_KEXEC def_bool PPC_BOOK3S || PPC_E500 || (44x && !SMP) config ARCH_SUPPORTS_KEXEC_FILE - def_bool PPC64 && CRYPTO=y && CRYPTO_SHA256=y + def_bool PPC64 config ARCH_SUPPORTS_KEXEC_PURGATORY - def_bool KEXEC_FILE + def_bool y config ARCH_SELECTS_KEXEC_FILE def_bool y diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index ea4033abc07d..8c80b154e814 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -271,7 +271,6 @@ config PPC_EARLY_DEBUG_USBGECKO config PPC_EARLY_DEBUG_PS3GELIC bool "Early debugging through the PS3 Ethernet port" depends on PPC_PS3 - select PS3GELIC_UDBG help Select this to enable early debugging for the PlayStation3 via UDP broadcasts sent out through the Ethernet port. diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index f19dbaa1d541..051247027da0 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -10,15 +10,26 @@ # Rewritten by Cort Dougan and Paul Mackerras # +ifdef cross_compiling + ifeq ($(CROSS_COMPILE),) + # Auto detect cross compiler prefix. + # Look for: (powerpc(64(le)?)?)(-unknown)?-linux(-gnu)?- + CC_ARCHES := powerpc powerpc64 powerpc64le + CC_SUFFIXES := linux linux-gnu unknown-linux-gnu + CROSS_COMPILE := $(call cc-cross-prefix, $(foreach a,$(CC_ARCHES), \ + $(foreach s,$(CC_SUFFIXES),$(a)-$(s)-))) + endif +endif + HAS_BIARCH := $(call cc-option-yn, -m32) # Set default 32 bits cross compilers for vdso and boot wrapper CROSS32_COMPILE ?= # If we're on a ppc/ppc64/ppc64le machine use that defconfig, otherwise just use -# ppc64_defconfig because we have nothing better to go on. +# ppc64le_defconfig because we have nothing better to go on. uname := $(shell uname -m) -KBUILD_DEFCONFIG := $(if $(filter ppc%,$(uname)),$(uname),ppc64)_defconfig +KBUILD_DEFCONFIG := $(if $(filter ppc%,$(uname)),$(uname),ppc64le)_defconfig new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi) @@ -161,7 +172,7 @@ CFLAGS-y += $(CONFIG_TUNE_CPU) asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1) -KBUILD_CPPFLAGS += -I $(srctree)/arch/$(ARCH) $(asinstr) +KBUILD_CPPFLAGS += -I $(srctree)/arch/powerpc $(asinstr) KBUILD_AFLAGS += $(AFLAGS-y) KBUILD_CFLAGS += $(call cc-option,-msoft-float) KBUILD_CFLAGS += $(CFLAGS-y) @@ -232,7 +243,7 @@ BOOT_TARGETS2 := zImage% dtbImage% treeImage.% cuImage.% simpleImage.% uImage.% PHONY += $(BOOT_TARGETS1) $(BOOT_TARGETS2) -boot := arch/$(ARCH)/boot +boot := arch/powerpc/boot $(BOOT_TARGETS1): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@) @@ -336,7 +347,7 @@ PHONY += $(generated_configs) define archhelp echo '* zImage - Build default images selected by kernel config' - echo ' zImage.* - Compressed kernel image (arch/$(ARCH)/boot/zImage.*)' + echo ' zImage.* - Compressed kernel image (arch/powerpc/boot/zImage.*)' echo ' uImage - U-Boot native image format' echo ' cuImage.<dt> - Backwards compatible U-Boot image for older' echo ' versions which do not support device trees' @@ -347,12 +358,12 @@ define archhelp echo ' (your) ~/bin/$(INSTALLKERNEL) or' echo ' (distribution) /sbin/$(INSTALLKERNEL) or' echo ' install to $$(INSTALL_PATH) and run lilo' - echo ' *_defconfig - Select default config from arch/$(ARCH)/configs' + echo ' *_defconfig - Select default config from arch/powerpc/configs' echo '' echo ' Targets with <dt> embed a device tree blob inside the image' echo ' These targets support board with firmware that does not' echo ' support passing a device tree directly. Replace <dt> with the' - echo ' name of a dts file from the arch/$(ARCH)/boot/dts/ directory' + echo ' name of a dts file from the arch/powerpc/boot/dts/ directory' echo ' (minus the .dts extension).' echo $(foreach cfg,$(generated_configs), diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi index d552044c5afc..aa5152ca8120 100644 --- a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi @@ -367,45 +367,46 @@ reg = <0xf0000 0x1000>; interrupts = <18 2 0 0>; fsl,tmu-range = <0xb0000 0xa0026 0x80048 0x30061>; - fsl,tmu-calibration = <0x00000000 0x0000000f - 0x00000001 0x00000017 - 0x00000002 0x0000001e - 0x00000003 0x00000026 - 0x00000004 0x0000002e - 0x00000005 0x00000035 - 0x00000006 0x0000003d - 0x00000007 0x00000044 - 0x00000008 0x0000004c - 0x00000009 0x00000053 - 0x0000000a 0x0000005b - 0x0000000b 0x00000064 - - 0x00010000 0x00000011 - 0x00010001 0x0000001c - 0x00010002 0x00000024 - 0x00010003 0x0000002b - 0x00010004 0x00000034 - 0x00010005 0x00000039 - 0x00010006 0x00000042 - 0x00010007 0x0000004c - 0x00010008 0x00000051 - 0x00010009 0x0000005a - 0x0001000a 0x00000063 - - 0x00020000 0x00000013 - 0x00020001 0x00000019 - 0x00020002 0x00000024 - 0x00020003 0x0000002c - 0x00020004 0x00000035 - 0x00020005 0x0000003d - 0x00020006 0x00000046 - 0x00020007 0x00000050 - 0x00020008 0x00000059 - - 0x00030000 0x00000002 - 0x00030001 0x0000000d - 0x00030002 0x00000019 - 0x00030003 0x00000024>; + fsl,tmu-calibration = + <0x00000000 0x0000000f>, + <0x00000001 0x00000017>, + <0x00000002 0x0000001e>, + <0x00000003 0x00000026>, + <0x00000004 0x0000002e>, + <0x00000005 0x00000035>, + <0x00000006 0x0000003d>, + <0x00000007 0x00000044>, + <0x00000008 0x0000004c>, + <0x00000009 0x00000053>, + <0x0000000a 0x0000005b>, + <0x0000000b 0x00000064>, + + <0x00010000 0x00000011>, + <0x00010001 0x0000001c>, + <0x00010002 0x00000024>, + <0x00010003 0x0000002b>, + <0x00010004 0x00000034>, + <0x00010005 0x00000039>, + <0x00010006 0x00000042>, + <0x00010007 0x0000004c>, + <0x00010008 0x00000051>, + <0x00010009 0x0000005a>, + <0x0001000a 0x00000063>, + + <0x00020000 0x00000013>, + <0x00020001 0x00000019>, + <0x00020002 0x00000024>, + <0x00020003 0x0000002c>, + <0x00020004 0x00000035>, + <0x00020005 0x0000003d>, + <0x00020006 0x00000046>, + <0x00020007 0x00000050>, + <0x00020008 0x00000059>, + + <0x00030000 0x00000002>, + <0x00030001 0x0000000d>, + <0x00030002 0x00000019>, + <0x00030003 0x00000024>; #thermal-sensor-cells = <1>; }; diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi index ad0ab33336b8..776788623204 100644 --- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi @@ -447,41 +447,42 @@ reg = <0xf0000 0x1000>; interrupts = <18 2 0 0>; fsl,tmu-range = <0xa0000 0x90026 0x8004a 0x1006a>; - fsl,tmu-calibration = <0x00000000 0x00000025 - 0x00000001 0x00000028 - 0x00000002 0x0000002d - 0x00000003 0x00000031 - 0x00000004 0x00000036 - 0x00000005 0x0000003a - 0x00000006 0x00000040 - 0x00000007 0x00000044 - 0x00000008 0x0000004a - 0x00000009 0x0000004f - 0x0000000a 0x00000054 - - 0x00010000 0x0000000d - 0x00010001 0x00000013 - 0x00010002 0x00000019 - 0x00010003 0x0000001f - 0x00010004 0x00000025 - 0x00010005 0x0000002d - 0x00010006 0x00000033 - 0x00010007 0x00000043 - 0x00010008 0x0000004b - 0x00010009 0x00000053 - - 0x00020000 0x00000010 - 0x00020001 0x00000017 - 0x00020002 0x0000001f - 0x00020003 0x00000029 - 0x00020004 0x00000031 - 0x00020005 0x0000003c - 0x00020006 0x00000042 - 0x00020007 0x0000004d - 0x00020008 0x00000056 - - 0x00030000 0x00000012 - 0x00030001 0x0000001d>; + fsl,tmu-calibration = + <0x00000000 0x00000025>, + <0x00000001 0x00000028>, + <0x00000002 0x0000002d>, + <0x00000003 0x00000031>, + <0x00000004 0x00000036>, + <0x00000005 0x0000003a>, + <0x00000006 0x00000040>, + <0x00000007 0x00000044>, + <0x00000008 0x0000004a>, + <0x00000009 0x0000004f>, + <0x0000000a 0x00000054>, + + <0x00010000 0x0000000d>, + <0x00010001 0x00000013>, + <0x00010002 0x00000019>, + <0x00010003 0x0000001f>, + <0x00010004 0x00000025>, + <0x00010005 0x0000002d>, + <0x00010006 0x00000033>, + <0x00010007 0x00000043>, + <0x00010008 0x0000004b>, + <0x00010009 0x00000053>, + + <0x00020000 0x00000010>, + <0x00020001 0x00000017>, + <0x00020002 0x0000001f>, + <0x00020003 0x00000029>, + <0x00020004 0x00000031>, + <0x00020005 0x0000003c>, + <0x00020006 0x00000042>, + <0x00020007 0x0000004d>, + <0x00020008 0x00000056>, + + <0x00030000 0x00000012>, + <0x00030001 0x0000001d>; #thermal-sensor-cells = <1>; }; diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 6e7b9e8fd225..544a65fda77b 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -92,6 +92,7 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZONE_DEVICE=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 2b175ddf82f0..aa8bb0208bcc 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -24,6 +24,7 @@ CONFIG_PS3_VRAM=m CONFIG_PS3_LPM=m # CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set CONFIG_KEXEC=y +# CONFIG_PPC64_BIG_ENDIAN_ELF_ABI_V2 is not set CONFIG_PPC_4K_PAGES=y CONFIG_SCHED_SMT=y CONFIG_PM=y diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 8d3eacb50d56..9d44e6630908 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -301,7 +301,6 @@ CONFIG_WQ_WATCHDOG=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y CONFIG_BUG_ON_DATA_CORRUPTION=y -CONFIG_DEBUG_CREDENTIALS=y # CONFIG_FTRACE is not set CONFIG_XMON=y # CONFIG_RUNTIME_TESTING_MENU is not set diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index cb77eddca54b..927d585652bc 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -17,12 +17,6 @@ #define _PAGE_EXEC 0x00001 /* execute permission */ #define _PAGE_WRITE 0x00002 /* write access allowed */ #define _PAGE_READ 0x00004 /* read access allowed */ -#define _PAGE_NA _PAGE_PRIVILEGED -#define _PAGE_NAX _PAGE_EXEC -#define _PAGE_RO _PAGE_READ -#define _PAGE_ROX (_PAGE_READ | _PAGE_EXEC) -#define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) -#define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC) #define _PAGE_PRIVILEGED 0x00008 /* kernel access only */ #define _PAGE_SAO 0x00010 /* Strong access order */ #define _PAGE_NON_IDEMPOTENT 0x00020 /* non idempotent memory */ @@ -532,8 +526,8 @@ static inline bool pte_user(pte_t pte) static inline bool pte_access_permitted(pte_t pte, bool write) { /* - * _PAGE_READ is needed for any access and will be - * cleared for PROT_NONE + * _PAGE_READ is needed for any access and will be cleared for + * PROT_NONE. Execute-only mapping via PROT_EXEC also returns false. */ if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte)) return false; diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index 1950c1b825b4..fd642b729775 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -158,11 +158,6 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, */ } -static inline bool __pte_protnone(unsigned long pte) -{ - return (pte & (pgprot_val(PAGE_NONE) | _PAGE_RWX)) == pgprot_val(PAGE_NONE); -} - static inline bool __pte_flags_need_flush(unsigned long oldval, unsigned long newval) { @@ -179,8 +174,8 @@ static inline bool __pte_flags_need_flush(unsigned long oldval, /* * We do not expect kernel mappings or non-PTEs or not-present PTEs. */ - VM_WARN_ON_ONCE(!__pte_protnone(oldval) && oldval & _PAGE_PRIVILEGED); - VM_WARN_ON_ONCE(!__pte_protnone(newval) && newval & _PAGE_PRIVILEGED); + VM_WARN_ON_ONCE(oldval & _PAGE_PRIVILEGED); + VM_WARN_ON_ONCE(newval & _PAGE_PRIVILEGED); VM_WARN_ON_ONCE(!(oldval & _PAGE_PTE)); VM_WARN_ON_ONCE(!(newval & _PAGE_PTE)); VM_WARN_ON_ONCE(!(oldval & _PAGE_PRESENT)); diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 9e5a39b6a311..1ebd2ca97f12 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -25,7 +25,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) addr += MCOUNT_INSN_SIZE; - return addr; + return addr; } unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index ddb99e982917..a41e542ba94d 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -349,7 +349,16 @@ #define H_GET_ENERGY_SCALE_INFO 0x450 #define H_PKS_SIGNED_UPDATE 0x454 #define H_WATCHDOG 0x45C -#define MAX_HCALL_OPCODE H_WATCHDOG +#define H_GUEST_GET_CAPABILITIES 0x460 +#define H_GUEST_SET_CAPABILITIES 0x464 +#define H_GUEST_CREATE 0x470 +#define H_GUEST_CREATE_VCPU 0x474 +#define H_GUEST_GET_STATE 0x478 +#define H_GUEST_SET_STATE 0x47C +#define H_GUEST_RUN_VCPU 0x480 +#define H_GUEST_COPY_MEMORY 0x484 +#define H_GUEST_DELETE 0x488 +#define MAX_HCALL_OPCODE H_GUEST_DELETE /* Scope args for H_SCM_UNBIND_ALL */ #define H_UNBIND_SCOPE_ALL (0x1) @@ -393,15 +402,6 @@ #define H_ENTER_NESTED 0xF804 #define H_TLB_INVALIDATE 0xF808 #define H_COPY_TOFROM_GUEST 0xF80C -#define H_GUEST_GET_CAPABILITIES 0x460 -#define H_GUEST_SET_CAPABILITIES 0x464 -#define H_GUEST_CREATE 0x470 -#define H_GUEST_CREATE_VCPU 0x474 -#define H_GUEST_GET_STATE 0x478 -#define H_GUEST_SET_STATE 0x47C -#define H_GUEST_RUN_VCPU 0x480 -#define H_GUEST_COPY_MEMORY 0x484 -#define H_GUEST_DELETE 0x488 /* Flags for H_SVM_PAGE_IN */ #define H_PAGE_IN_SHARED 0x1 diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 4f527d09c92b..3e1e2a698c9e 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -302,6 +302,7 @@ void kvmhv_nested_exit(void); void kvmhv_vm_nested_init(struct kvm *kvm); long kvmhv_set_partition_table(struct kvm_vcpu *vcpu); long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu); +void kvmhv_flush_lpid(u64 lpid); void kvmhv_set_ptbl_entry(u64 lpid, u64 dw0, u64 dw1); void kvmhv_release_all_nested(struct kvm *kvm); long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu); @@ -593,13 +594,17 @@ static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ KVMPPC_BOOK3S_VCORE_ACCESSOR(vtb, 64, KVMPPC_GSID_VTB) -KVMPPC_BOOK3S_VCORE_ACCESSOR(tb_offset, 64, KVMPPC_GSID_TB_OFFSET) KVMPPC_BOOK3S_VCORE_ACCESSOR_GET(arch_compat, 32, KVMPPC_GSID_LOGICAL_PVR) KVMPPC_BOOK3S_VCORE_ACCESSOR_GET(lpcr, 64, KVMPPC_GSID_LPCR) +KVMPPC_BOOK3S_VCORE_ACCESSOR_SET(tb_offset, 64, KVMPPC_GSID_TB_OFFSET) + +static inline u64 kvmppc_get_tb_offset(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.vcore->tb_offset; +} static inline u64 kvmppc_get_dec_expires(struct kvm_vcpu *vcpu) { - WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_TB_OFFSET) < 0); WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_DEC_EXPIRY_TB) < 0); return vcpu->arch.dec_expires; } @@ -607,7 +612,6 @@ static inline u64 kvmppc_get_dec_expires(struct kvm_vcpu *vcpu) static inline void kvmppc_set_dec_expires(struct kvm_vcpu *vcpu, u64 val) { vcpu->arch.dec_expires = val; - WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_TB_OFFSET) < 0); kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_DEC_EXPIRY_TB); } diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 2477021bff54..d8729ec81ca0 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -682,6 +682,7 @@ void kvmhv_nestedv2_vcpu_free(struct kvm_vcpu *vcpu, struct kvmhv_nestedv2_io *i int kvmhv_nestedv2_flush_vcpu(struct kvm_vcpu *vcpu, u64 time_limit); int kvmhv_nestedv2_set_ptbl_entry(unsigned long lpid, u64 dw0, u64 dw1); int kvmhv_nestedv2_parse_output(struct kvm_vcpu *vcpu); +int kvmhv_nestedv2_set_vpa(struct kvm_vcpu *vcpu, unsigned long vpa); #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ diff --git a/arch/powerpc/include/asm/linkage.h b/arch/powerpc/include/asm/linkage.h index b88d1d2cf304..b71b9582e754 100644 --- a/arch/powerpc/include/asm/linkage.h +++ b/arch/powerpc/include/asm/linkage.h @@ -4,9 +4,6 @@ #include <asm/types.h> -#define __ALIGN .align 2 -#define __ALIGN_STR ".align 2" - #ifdef CONFIG_PPC64_ELF_ABI_V1 #define cond_syscall(x) \ asm ("\t.weak " #x "\n\t.set " #x ", sys_ni_syscall\n" \ diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 52cc25864a1b..d8b7e246a32f 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -412,5 +412,9 @@ extern void *abatron_pteptrs[2]; #include <asm/nohash/mmu.h> #endif +#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) +#define __HAVE_ARCH_RESERVED_KERNEL_PAGES +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_MMU_H_ */ diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h index 4c6c6dbd182f..da827d2d0866 100644 --- a/arch/powerpc/include/asm/mmzone.h +++ b/arch/powerpc/include/asm/mmzone.h @@ -42,14 +42,6 @@ u64 memory_hotplug_max(void); #else #define memory_hotplug_max() memblock_end_of_DRAM() #endif /* CONFIG_NUMA */ -#ifdef CONFIG_FA_DUMP -#define __HAVE_ARCH_RESERVED_KERNEL_PAGES -#endif - -#ifdef CONFIG_MEMORY_HOTPLUG -extern int create_section_mapping(unsigned long start, unsigned long end, - int nid, pgprot_t prot); -#endif #endif /* __KERNEL__ */ #endif /* _ASM_MMZONE_H_ */ diff --git a/arch/powerpc/include/asm/papr-sysparm.h b/arch/powerpc/include/asm/papr-sysparm.h index f5fdbd8ae9db..0dbbff59101d 100644 --- a/arch/powerpc/include/asm/papr-sysparm.h +++ b/arch/powerpc/include/asm/papr-sysparm.h @@ -2,8 +2,10 @@ #ifndef _ASM_POWERPC_PAPR_SYSPARM_H #define _ASM_POWERPC_PAPR_SYSPARM_H +#include <uapi/asm/papr-sysparm.h> + typedef struct { - const u32 token; + u32 token; } papr_sysparm_t; #define mk_papr_sysparm(x_) ((papr_sysparm_t){ .token = x_, }) @@ -20,11 +22,14 @@ typedef struct { #define PAPR_SYSPARM_TLB_BLOCK_INVALIDATE_ATTRS mk_papr_sysparm(50) #define PAPR_SYSPARM_LPAR_NAME mk_papr_sysparm(55) -enum { - PAPR_SYSPARM_MAX_INPUT = 1024, - PAPR_SYSPARM_MAX_OUTPUT = 4000, -}; - +/** + * struct papr_sysparm_buf - RTAS work area layout for system parameter functions. + * + * This is the memory layout of the buffers passed to/from + * ibm,get-system-parameter and ibm,set-system-parameter. It is + * distinct from the papr_sysparm_io_block structure that is passed + * between user space and the kernel. + */ struct papr_sysparm_buf { __be16 len; char val[PAPR_SYSPARM_MAX_OUTPUT]; diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h index ac4279208d63..b78b82d66057 100644 --- a/arch/powerpc/include/asm/paravirt.h +++ b/arch/powerpc/include/asm/paravirt.h @@ -76,6 +76,17 @@ static inline bool is_vcpu_idle(int vcpu) { return lppaca_of(vcpu).idle; } + +static inline bool vcpu_is_dispatched(int vcpu) +{ + /* + * This is the yield_count. An "odd" value (low bit on) means that + * the processor is yielded (either because of an OS yield or a + * hypervisor preempt). An even value implies that the processor is + * currently executing. + */ + return (!(yield_count_of(vcpu) & 1)); +} #else static inline bool is_shared_processor(void) { @@ -109,6 +120,10 @@ static inline bool is_vcpu_idle(int vcpu) { return false; } +static inline bool vcpu_is_dispatched(int vcpu) +{ + return true; +} #endif #define vcpu_is_preempted vcpu_is_preempted @@ -134,12 +149,12 @@ static inline bool vcpu_is_preempted(int cpu) * If the hypervisor has dispatched the target CPU on a physical * processor, then the target CPU is definitely not preempted. */ - if (!(yield_count_of(cpu) & 1)) + if (vcpu_is_dispatched(cpu)) return false; /* - * If the target CPU has yielded to Hypervisor but OS has not - * requested idle then the target CPU is definitely preempted. + * if the target CPU is not dispatched and the guest OS + * has not marked the CPU idle, then it is hypervisor preempted. */ if (!is_vcpu_idle(cpu)) return true; @@ -166,7 +181,7 @@ static inline bool vcpu_is_preempted(int cpu) /* * The PowerVM hypervisor dispatches VMs on a whole core - * basis. So we know that a thread sibling of the local CPU + * basis. So we know that a thread sibling of the executing CPU * cannot have been preempted by the hypervisor, even if it * has called H_CONFER, which will set the yield bit. */ @@ -174,15 +189,17 @@ static inline bool vcpu_is_preempted(int cpu) return false; /* - * If any of the threads of the target CPU's core are not - * preempted or ceded, then consider target CPU to be - * non-preempted. + * The specific target CPU was marked by guest OS as idle, but + * then also check all other cpus in the core for PowerVM + * because it does core scheduling and one of the vcpu + * of the core getting preempted by hypervisor implies + * other vcpus can also be considered preempted. */ first_cpu = cpu_first_thread_sibling(cpu); for (i = first_cpu; i < first_cpu + threads_per_core; i++) { if (i == cpu) continue; - if (!(yield_count_of(i) & 1)) + if (vcpu_is_dispatched(i)) return false; if (!is_vcpu_idle(i)) return true; diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index d9fcff575027..ce2b1b5eebdd 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -35,6 +35,9 @@ extern void init_pci_config_tokens (void); extern unsigned long get_phb_buid (struct device_node *); extern int rtas_setup_phb(struct pci_controller *phb); +int rtas_pci_dn_read_config(struct pci_dn *pdn, int where, int size, u32 *val); +int rtas_pci_dn_write_config(struct pci_dn *pdn, int where, int size, u32 val); + #ifdef CONFIG_EEH void eeh_addr_cache_insert_dev(struct pci_dev *dev); @@ -44,8 +47,6 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity); int eeh_pci_enable(struct eeh_pe *pe, int function); int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed); void eeh_save_bars(struct eeh_dev *edev); -int rtas_write_config(struct pci_dn *, int where, int size, u32 val); -int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); void eeh_pe_state_mark(struct eeh_pe *pe, int state); void eeh_pe_mark_isolated(struct eeh_pe *pe); void eeh_pe_state_clear(struct eeh_pe *pe, int state, bool include_passed); diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index a5f36546a052..d13d8fdc3411 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -514,4 +514,10 @@ u64 ps3_get_spe_id(void *arg); void ps3_early_mm_init(void); +#ifdef CONFIG_PPC_EARLY_DEBUG_PS3GELIC +void udbg_shutdown_ps3gelic(void); +#else +static inline void udbg_shutdown_ps3gelic(void) {} +#endif + #endif diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 4ae4ab9090a2..7fd09f25452d 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1361,6 +1361,7 @@ #define PVR_POWER8E 0x004B #define PVR_POWER8NVL 0x004C #define PVR_POWER8 0x004D +#define PVR_HX_C2000 0x0066 #define PVR_POWER9 0x004E #define PVR_POWER10 0x0080 #define PVR_BE 0x0070 diff --git a/arch/powerpc/include/asm/reg_a2.h b/arch/powerpc/include/asm/reg_a2.h deleted file mode 100644 index 74fba29e9491..000000000000 --- a/arch/powerpc/include/asm/reg_a2.h +++ /dev/null @@ -1,154 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Register definitions specific to the A2 core - * - * Copyright (C) 2008 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp. - */ - -#ifndef __ASM_POWERPC_REG_A2_H__ -#define __ASM_POWERPC_REG_A2_H__ - -#include <asm/asm-const.h> - -#define SPRN_TENSR 0x1b5 -#define SPRN_TENS 0x1b6 /* Thread ENable Set */ -#define SPRN_TENC 0x1b7 /* Thread ENable Clear */ - -#define SPRN_A2_CCR0 0x3f0 /* Core Configuration Register 0 */ -#define SPRN_A2_CCR1 0x3f1 /* Core Configuration Register 1 */ -#define SPRN_A2_CCR2 0x3f2 /* Core Configuration Register 2 */ -#define SPRN_MMUCR0 0x3fc /* MMU Control Register 0 */ -#define SPRN_MMUCR1 0x3fd /* MMU Control Register 1 */ -#define SPRN_MMUCR2 0x3fe /* MMU Control Register 2 */ -#define SPRN_MMUCR3 0x3ff /* MMU Control Register 3 */ - -#define SPRN_IAR 0x372 - -#define SPRN_IUCR0 0x3f3 -#define IUCR0_ICBI_ACK 0x1000 - -#define SPRN_XUCR0 0x3f6 /* Execution Unit Config Register 0 */ - -#define A2_IERAT_SIZE 16 -#define A2_DERAT_SIZE 32 - -/* A2 MMUCR0 bits */ -#define MMUCR0_ECL 0x80000000 /* Extended Class for TLB fills */ -#define MMUCR0_TID_NZ 0x40000000 /* TID is non-zero */ -#define MMUCR0_TS 0x10000000 /* Translation space for TLB fills */ -#define MMUCR0_TGS 0x20000000 /* Guest space for TLB fills */ -#define MMUCR0_TLBSEL 0x0c000000 /* TLB or ERAT target for TLB fills */ -#define MMUCR0_TLBSEL_U 0x00000000 /* TLBSEL = UTLB */ -#define MMUCR0_TLBSEL_I 0x08000000 /* TLBSEL = I-ERAT */ -#define MMUCR0_TLBSEL_D 0x0c000000 /* TLBSEL = D-ERAT */ -#define MMUCR0_LOCKSRSH 0x02000000 /* Use TLB lock on tlbsx. */ -#define MMUCR0_TID_MASK 0x000000ff /* TID field */ - -/* A2 MMUCR1 bits */ -#define MMUCR1_IRRE 0x80000000 /* I-ERAT round robin enable */ -#define MMUCR1_DRRE 0x40000000 /* D-ERAT round robin enable */ -#define MMUCR1_REE 0x20000000 /* Reference Exception Enable*/ -#define MMUCR1_CEE 0x10000000 /* Change exception enable */ -#define MMUCR1_CSINV_ALL 0x00000000 /* Inval ERAT on all CS evts */ -#define MMUCR1_CSINV_NISYNC 0x04000000 /* Inval ERAT on all ex isync*/ -#define MMUCR1_CSINV_NEVER 0x0c000000 /* Don't inval ERAT on CS */ -#define MMUCR1_ICTID 0x00080000 /* IERAT class field as TID */ -#define MMUCR1_ITTID 0x00040000 /* IERAT thdid field as TID */ -#define MMUCR1_DCTID 0x00020000 /* DERAT class field as TID */ -#define MMUCR1_DTTID 0x00010000 /* DERAT thdid field as TID */ -#define MMUCR1_DCCD 0x00008000 /* DERAT class ignore */ -#define MMUCR1_TLBWE_BINV 0x00004000 /* back invalidate on tlbwe */ - -/* A2 MMUCR2 bits */ -#define MMUCR2_PSSEL_SHIFT 4 - -/* A2 MMUCR3 bits */ -#define MMUCR3_THID 0x0000000f /* Thread ID */ - -/* *** ERAT TLB bits definitions */ -#define TLB0_EPN_MASK ASM_CONST(0xfffffffffffff000) -#define TLB0_CLASS_MASK ASM_CONST(0x0000000000000c00) -#define TLB0_CLASS_00 ASM_CONST(0x0000000000000000) -#define TLB0_CLASS_01 ASM_CONST(0x0000000000000400) -#define TLB0_CLASS_10 ASM_CONST(0x0000000000000800) -#define TLB0_CLASS_11 ASM_CONST(0x0000000000000c00) -#define TLB0_V ASM_CONST(0x0000000000000200) -#define TLB0_X ASM_CONST(0x0000000000000100) -#define TLB0_SIZE_MASK ASM_CONST(0x00000000000000f0) -#define TLB0_SIZE_4K ASM_CONST(0x0000000000000010) -#define TLB0_SIZE_64K ASM_CONST(0x0000000000000030) -#define TLB0_SIZE_1M ASM_CONST(0x0000000000000050) -#define TLB0_SIZE_16M ASM_CONST(0x0000000000000070) -#define TLB0_SIZE_1G ASM_CONST(0x00000000000000a0) -#define TLB0_THDID_MASK ASM_CONST(0x000000000000000f) -#define TLB0_THDID_0 ASM_CONST(0x0000000000000001) -#define TLB0_THDID_1 ASM_CONST(0x0000000000000002) -#define TLB0_THDID_2 ASM_CONST(0x0000000000000004) -#define TLB0_THDID_3 ASM_CONST(0x0000000000000008) -#define TLB0_THDID_ALL ASM_CONST(0x000000000000000f) - -#define TLB1_RESVATTR ASM_CONST(0x00f0000000000000) -#define TLB1_U0 ASM_CONST(0x0008000000000000) -#define TLB1_U1 ASM_CONST(0x0004000000000000) -#define TLB1_U2 ASM_CONST(0x0002000000000000) -#define TLB1_U3 ASM_CONST(0x0001000000000000) -#define TLB1_R ASM_CONST(0x0000800000000000) -#define TLB1_C ASM_CONST(0x0000400000000000) -#define TLB1_RPN_MASK ASM_CONST(0x000003fffffff000) -#define TLB1_W ASM_CONST(0x0000000000000800) -#define TLB1_I ASM_CONST(0x0000000000000400) -#define TLB1_M ASM_CONST(0x0000000000000200) -#define TLB1_G ASM_CONST(0x0000000000000100) -#define TLB1_E ASM_CONST(0x0000000000000080) -#define TLB1_VF ASM_CONST(0x0000000000000040) -#define TLB1_UX ASM_CONST(0x0000000000000020) -#define TLB1_SX ASM_CONST(0x0000000000000010) -#define TLB1_UW ASM_CONST(0x0000000000000008) -#define TLB1_SW ASM_CONST(0x0000000000000004) -#define TLB1_UR ASM_CONST(0x0000000000000002) -#define TLB1_SR ASM_CONST(0x0000000000000001) - -/* A2 erativax attributes definitions */ -#define ERATIVAX_RS_IS_ALL 0x000 -#define ERATIVAX_RS_IS_TID 0x040 -#define ERATIVAX_RS_IS_CLASS 0x080 -#define ERATIVAX_RS_IS_FULLMATCH 0x0c0 -#define ERATIVAX_CLASS_00 0x000 -#define ERATIVAX_CLASS_01 0x010 -#define ERATIVAX_CLASS_10 0x020 -#define ERATIVAX_CLASS_11 0x030 -#define ERATIVAX_PSIZE_4K (TLB_PSIZE_4K >> 1) -#define ERATIVAX_PSIZE_64K (TLB_PSIZE_64K >> 1) -#define ERATIVAX_PSIZE_1M (TLB_PSIZE_1M >> 1) -#define ERATIVAX_PSIZE_16M (TLB_PSIZE_16M >> 1) -#define ERATIVAX_PSIZE_1G (TLB_PSIZE_1G >> 1) - -/* A2 eratilx attributes definitions */ -#define ERATILX_T_ALL 0 -#define ERATILX_T_TID 1 -#define ERATILX_T_TGS 2 -#define ERATILX_T_FULLMATCH 3 -#define ERATILX_T_CLASS0 4 -#define ERATILX_T_CLASS1 5 -#define ERATILX_T_CLASS2 6 -#define ERATILX_T_CLASS3 7 - -/* XUCR0 bits */ -#define XUCR0_TRACE_UM_T0 0x40000000 /* Thread 0 */ -#define XUCR0_TRACE_UM_T1 0x20000000 /* Thread 1 */ -#define XUCR0_TRACE_UM_T2 0x10000000 /* Thread 2 */ -#define XUCR0_TRACE_UM_T3 0x08000000 /* Thread 3 */ - -/* A2 CCR0 register */ -#define A2_CCR0_PME_DISABLED 0x00000000 -#define A2_CCR0_PME_SLEEP 0x40000000 -#define A2_CCR0_PME_RVW 0x80000000 -#define A2_CCR0_PME_DISABLED2 0xc0000000 - -/* A2 CCR2 register */ -#define A2_CCR2_ERAT_ONLY_MODE 0x00000001 -#define A2_CCR2_ENABLE_ICSWX 0x00000002 -#define A2_CCR2_ENABLE_PC 0x20000000 -#define A2_CCR2_ENABLE_TRACE 0x40000000 - -#endif /* __ASM_POWERPC_REG_A2_H__ */ diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index c697c3c74694..9bb2210c8d44 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -3,6 +3,7 @@ #define _POWERPC_RTAS_H #ifdef __KERNEL__ +#include <linux/mutex.h> #include <linux/spinlock.h> #include <asm/page.h> #include <asm/rtas-types.h> @@ -201,12 +202,25 @@ typedef struct { /* Memory set aside for sys_rtas to use with calls that need a work area. */ #define RTAS_USER_REGION_SIZE (64 * 1024) -/* RTAS return status codes */ -#define RTAS_HARDWARE_ERROR -1 /* Hardware Error */ -#define RTAS_BUSY -2 /* RTAS Busy */ -#define RTAS_INVALID_PARAMETER -3 /* Invalid indicator/domain/sensor etc. */ -#define RTAS_EXTENDED_DELAY_MIN 9900 -#define RTAS_EXTENDED_DELAY_MAX 9905 +/* + * Common RTAS function return values, derived from the table "RTAS + * Status Word Values" in PAPR+ v2.13 7.2.8: "Return Codes". If a + * function can return a value in this table then generally it has the + * meaning listed here. More extended commentary in the documentation + * for rtas_call(). + * + * RTAS functions may use negative and positive numbers not in this + * set for function-specific error and success conditions, + * respectively. + */ +#define RTAS_SUCCESS 0 /* Success. */ +#define RTAS_HARDWARE_ERROR -1 /* Hardware or other unspecified error. */ +#define RTAS_BUSY -2 /* Retry immediately. */ +#define RTAS_INVALID_PARAMETER -3 /* Invalid indicator/domain/sensor etc. */ +#define RTAS_UNEXPECTED_STATE_CHANGE -7 /* Seems limited to EEH and slot reset. */ +#define RTAS_EXTENDED_DELAY_MIN 9900 /* Retry after delaying for ~1ms. */ +#define RTAS_EXTENDED_DELAY_MAX 9905 /* Retry after delaying for ~100s. */ +#define RTAS_ML_ISOLATION_ERROR -9000 /* Multi-level isolation error. */ /* statuses specific to ibm,suspend-me */ #define RTAS_SUSPEND_ABORTED 9000 /* Suspension aborted */ @@ -268,7 +282,7 @@ typedef struct { #define RTAS_TYPE_DEALLOC 0xE3 #define RTAS_TYPE_DUMP 0xE4 #define RTAS_TYPE_HOTPLUG 0xE5 -/* I don't add PowerMGM events right now, this is a different topic */ +/* I don't add PowerMGM events right now, this is a different topic */ #define RTAS_TYPE_PMGM_POWER_SW_ON 0x60 #define RTAS_TYPE_PMGM_POWER_SW_OFF 0x61 #define RTAS_TYPE_PMGM_LID_OPEN 0x62 @@ -408,44 +422,41 @@ static inline bool rtas_function_implemented(const rtas_fn_handle_t handle) { return rtas_function_token(handle) != RTAS_UNKNOWN_SERVICE; } -extern int rtas_token(const char *service); -extern int rtas_service_present(const char *service); -extern int rtas_call(int token, int, int, int *, ...); +int rtas_token(const char *service); +int rtas_call(int token, int nargs, int nret, int *outputs, ...); void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...); -extern void __noreturn rtas_restart(char *cmd); -extern void rtas_power_off(void); -extern void __noreturn rtas_halt(void); -extern void rtas_os_term(char *str); +void __noreturn rtas_restart(char *cmd); +void rtas_power_off(void); +void __noreturn rtas_halt(void); +void rtas_os_term(char *str); void rtas_activate_firmware(void); -extern int rtas_get_sensor(int sensor, int index, int *state); -extern int rtas_get_sensor_fast(int sensor, int index, int *state); -extern int rtas_get_power_level(int powerdomain, int *level); -extern int rtas_set_power_level(int powerdomain, int level, int *setlevel); -extern bool rtas_indicator_present(int token, int *maxindex); -extern int rtas_set_indicator(int indicator, int index, int new_value); -extern int rtas_set_indicator_fast(int indicator, int index, int new_value); -extern void rtas_progress(char *s, unsigned short hex); +int rtas_get_sensor(int sensor, int index, int *state); +int rtas_get_sensor_fast(int sensor, int index, int *state); +int rtas_get_power_level(int powerdomain, int *level); +int rtas_set_power_level(int powerdomain, int level, int *setlevel); +bool rtas_indicator_present(int token, int *maxindex); +int rtas_set_indicator(int indicator, int index, int new_value); +int rtas_set_indicator_fast(int indicator, int index, int new_value); +void rtas_progress(char *s, unsigned short hex); int rtas_ibm_suspend_me(int *fw_status); int rtas_error_rc(int rtas_rc); struct rtc_time; -extern time64_t rtas_get_boot_time(void); -extern void rtas_get_rtc_time(struct rtc_time *rtc_time); -extern int rtas_set_rtc_time(struct rtc_time *rtc_time); +time64_t rtas_get_boot_time(void); +void rtas_get_rtc_time(struct rtc_time *rtc_time); +int rtas_set_rtc_time(struct rtc_time *rtc_time); -extern unsigned int rtas_busy_delay_time(int status); +unsigned int rtas_busy_delay_time(int status); bool rtas_busy_delay(int status); -extern int early_init_dt_scan_rtas(unsigned long node, - const char *uname, int depth, void *data); +int early_init_dt_scan_rtas(unsigned long node, const char *uname, int depth, void *data); -extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal); +void pSeries_log_error(char *buf, unsigned int err_type, int fatal); #ifdef CONFIG_PPC_PSERIES extern time64_t last_rtas_event; -extern int clobbering_unread_rtas_event(void); -extern void post_mobility_fixup(void); +int clobbering_unread_rtas_event(void); int rtas_syscall_dispatch_ibm_suspend_me(u64 handle); #else static inline int clobbering_unread_rtas_event(void) { return 0; } @@ -456,14 +467,14 @@ static inline int rtas_syscall_dispatch_ibm_suspend_me(u64 handle) #endif #ifdef CONFIG_PPC_RTAS_DAEMON -extern void rtas_cancel_event_scan(void); +void rtas_cancel_event_scan(void); #else static inline void rtas_cancel_event_scan(void) { } #endif /* Error types logged. */ #define ERR_FLAG_ALREADY_LOGGED 0x0 -#define ERR_FLAG_BOOT 0x1 /* log was pulled from NVRAM on boot */ +#define ERR_FLAG_BOOT 0x1 /* log was pulled from NVRAM on boot */ #define ERR_TYPE_RTAS_LOG 0x2 /* from rtas event-scan */ #define ERR_TYPE_KERNEL_PANIC 0x4 /* from die()/panic() */ #define ERR_TYPE_KERNEL_PANIC_GZ 0x8 /* ditto, compressed */ @@ -473,7 +484,7 @@ static inline void rtas_cancel_event_scan(void) { } (ERR_TYPE_RTAS_LOG | ERR_TYPE_KERNEL_PANIC | ERR_TYPE_KERNEL_PANIC_GZ) #define RTAS_DEBUG KERN_DEBUG "RTAS: " - + #define RTAS_ERROR_LOG_MAX 2048 /* @@ -481,7 +492,7 @@ static inline void rtas_cancel_event_scan(void) { } * for all rtas calls that require an error buffer argument. * This includes 'check-exception' and 'rtas-last-error'. */ -extern int rtas_get_error_log_max(void); +int rtas_get_error_log_max(void); /* Event Scan Parameters */ #define EVENT_SCAN_ALL_EVENTS 0xf0000000 @@ -502,6 +513,8 @@ extern char rtas_data_buf[RTAS_DATA_BUF_SIZE]; /* RMO buffer reserved for user-space RTAS use */ extern unsigned long rtas_rmo_buf; +extern struct mutex rtas_ibm_get_vpd_lock; + #define GLOBAL_INTERRUPT_QUEUE 9005 /** @@ -520,8 +533,8 @@ static inline u32 rtas_config_addr(int busno, int devfn, int reg) (devfn << 8) | (reg & 0xff); } -extern void rtas_give_timebase(void); -extern void rtas_take_timebase(void); +void rtas_give_timebase(void); +void rtas_take_timebase(void); #ifdef CONFIG_PPC_RTAS static inline int page_is_rtas_user_buf(unsigned long pfn) @@ -534,7 +547,7 @@ static inline int page_is_rtas_user_buf(unsigned long pfn) /* Not the best place to put pSeries_coalesce_init, will be fixed when we * move some of the rtas suspend-me stuff to pseries */ -extern void pSeries_coalesce_init(void); +void pSeries_coalesce_init(void); void rtas_initialize(void); #else static inline int page_is_rtas_user_buf(unsigned long pfn) { return 0;} @@ -542,8 +555,6 @@ static inline void pSeries_coalesce_init(void) { } static inline void rtas_initialize(void) { } #endif -extern int call_rtas(const char *, int, int, unsigned long *, ...); - #ifdef CONFIG_HV_PERF_CTRS void read_24x7_sys_info(void); #else diff --git a/arch/powerpc/include/uapi/asm/papr-miscdev.h b/arch/powerpc/include/uapi/asm/papr-miscdev.h new file mode 100644 index 000000000000..49a2a270b7f3 --- /dev/null +++ b/arch/powerpc/include/uapi/asm/papr-miscdev.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_PAPR_MISCDEV_H_ +#define _UAPI_PAPR_MISCDEV_H_ + +enum { + PAPR_MISCDEV_IOC_ID = 0xb2, +}; + +#endif /* _UAPI_PAPR_MISCDEV_H_ */ diff --git a/arch/powerpc/include/uapi/asm/papr-sysparm.h b/arch/powerpc/include/uapi/asm/papr-sysparm.h new file mode 100644 index 000000000000..9f9a0f267ea5 --- /dev/null +++ b/arch/powerpc/include/uapi/asm/papr-sysparm.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_PAPR_SYSPARM_H_ +#define _UAPI_PAPR_SYSPARM_H_ + +#include <linux/types.h> +#include <asm/ioctl.h> +#include <asm/papr-miscdev.h> + +enum { + PAPR_SYSPARM_MAX_INPUT = 1024, + PAPR_SYSPARM_MAX_OUTPUT = 4000, +}; + +struct papr_sysparm_io_block { + __u32 parameter; + __u16 length; + char data[PAPR_SYSPARM_MAX_OUTPUT]; +}; + +/** + * PAPR_SYSPARM_IOC_GET - Retrieve the value of a PAPR system parameter. + * + * Uses _IOWR because of one corner case: Retrieving the value of the + * "OS Service Entitlement Status" parameter (60) requires the caller + * to supply input data (a date string) in the buffer passed to + * firmware. So the @length and @data of the incoming + * papr_sysparm_io_block are always used to initialize the work area + * supplied to ibm,get-system-parameter. No other parameters are known + * to parameterize the result this way, and callers are encouraged + * (but not required) to zero-initialize @length and @data in the + * common case. + * + * On error the contents of the ioblock are indeterminate. + * + * Return: + * 0: Success; @length is the length of valid data in @data, not to exceed @PAPR_SYSPARM_MAX_OUTPUT. + * -EIO: Platform error. (-1) + * -EINVAL: Incorrect data length or format. (-9999) + * -EPERM: The calling partition is not allowed to access this parameter. (-9002) + * -EOPNOTSUPP: Parameter not supported on this platform (-3) + */ +#define PAPR_SYSPARM_IOC_GET _IOWR(PAPR_MISCDEV_IOC_ID, 1, struct papr_sysparm_io_block) + +/** + * PAPR_SYSPARM_IOC_SET - Update the value of a PAPR system parameter. + * + * The contents of the ioblock are unchanged regardless of success. + * + * Return: + * 0: Success; the parameter has been updated. + * -EIO: Platform error. (-1) + * -EINVAL: Incorrect data length or format. (-9999) + * -EPERM: The calling partition is not allowed to access this parameter. (-9002) + * -EOPNOTSUPP: Parameter not supported on this platform (-3) + */ +#define PAPR_SYSPARM_IOC_SET _IOW(PAPR_MISCDEV_IOC_ID, 2, struct papr_sysparm_io_block) + +#endif /* _UAPI_PAPR_SYSPARM_H_ */ diff --git a/arch/powerpc/include/uapi/asm/papr-vpd.h b/arch/powerpc/include/uapi/asm/papr-vpd.h new file mode 100644 index 000000000000..1c88e87cb420 --- /dev/null +++ b/arch/powerpc/include/uapi/asm/papr-vpd.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_PAPR_VPD_H_ +#define _UAPI_PAPR_VPD_H_ + +#include <asm/ioctl.h> +#include <asm/papr-miscdev.h> + +struct papr_location_code { + /* + * PAPR+ v2.13 12.3.2.4 Converged Location Code Rules - Length + * Restrictions. 79 characters plus nul. + */ + char str[80]; +}; + +/* + * ioctl for /dev/papr-vpd. Returns a VPD handle fd corresponding to + * the location code. + */ +#define PAPR_VPD_IOC_CREATE_HANDLE _IOW(PAPR_MISCDEV_IOC_ID, 0, struct papr_location_code) + +#endif /* _UAPI_PAPR_VPD_H_ */ diff --git a/arch/powerpc/kernel/cpu_specs_book3s_64.h b/arch/powerpc/kernel/cpu_specs_book3s_64.h index c370c1b804a9..3ff9757df4c0 100644 --- a/arch/powerpc/kernel/cpu_specs_book3s_64.h +++ b/arch/powerpc/kernel/cpu_specs_book3s_64.h @@ -238,6 +238,21 @@ static struct cpu_spec cpu_specs[] __initdata = { .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, + { /* 2.07-compliant processor, HeXin C2000 processor */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00660000, + .cpu_name = "HX-C2000", + .cpu_features = CPU_FTRS_POWER8, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .machine_check_early = __machine_check_early_realmode_p8, + .platform = "power8", + }, { /* 3.00-compliant processor, i.e. Power9 "architected" mode */ .pvr_mask = 0xffffffff, .pvr_value = 0x0f000005, diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index e97a0fd0ae90..6f6801da9dc1 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -20,9 +20,9 @@ #include <asm/setup.h> #include <asm/cpu_setup.h> -static struct cpu_spec the_cpu_spec __read_mostly; +static struct cpu_spec the_cpu_spec __ro_after_init; -struct cpu_spec* cur_cpu_spec __read_mostly = NULL; +struct cpu_spec *cur_cpu_spec __ro_after_init = NULL; EXPORT_SYMBOL(cur_cpu_spec); /* The platform string corresponding to the real PVR */ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 7ab4c8c0f1ab..dcf0591ad3c2 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -14,7 +14,6 @@ #include <asm/cputable.h> #include <asm/setup.h> #include <asm/thread_info.h> -#include <asm/reg_a2.h> #include <asm/exception-64e.h> #include <asm/bug.h> #include <asm/irqflags.h> diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 6a9acfb690c9..2f8f3f93cbb6 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -23,6 +23,15 @@ #include <asm/feature-fixups.h> #ifdef CONFIG_VSX +#define __REST_1FPVSR(n,c,base) \ +BEGIN_FTR_SECTION \ + b 2f; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ + REST_FPR(n,base); \ + b 3f; \ +2: REST_VSR(n,c,base); \ +3: + #define __REST_32FPVSRS(n,c,base) \ BEGIN_FTR_SECTION \ b 2f; \ @@ -41,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ 2: SAVE_32VSRS(n,c,base); \ 3: #else +#define __REST_1FPVSR(n,b,base) REST_FPR(n, base) #define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) #define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) #endif +#define REST_1FPVSR(n,c,base) __REST_1FPVSR(n,__REG_##c,__REG_##base) #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) @@ -67,6 +78,7 @@ _GLOBAL(store_fp_state) SAVE_32FPVSRS(0, R4, R3) mffs fr0 stfd fr0,FPSTATE_FPSCR(r3) + REST_1FPVSR(0, R4, R3) blr EXPORT_SYMBOL(store_fp_state) @@ -138,4 +150,5 @@ _GLOBAL(save_fpu) 2: SAVE_32FPVSRS(0, R4, R6) mffs fr0 stfd fr0,FPSTATE_FPSCR(r6) + REST_1FPVSR(0, R4, R6) blr diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 392404688cec..9452a54d356c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1198,11 +1198,11 @@ void kvmppc_save_user_regs(void) usermsr = current->thread.regs->msr; + /* Caller has enabled FP/VEC/VSX/TM in MSR */ if (usermsr & MSR_FP) - save_fpu(current); - + __giveup_fpu(current); if (usermsr & MSR_VEC) - save_altivec(current); + __giveup_altivec(current); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (usermsr & MSR_TM) { diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index eddc031c4b95..7e793b503e29 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -18,6 +18,7 @@ #include <linux/kernel.h> #include <linux/lockdep.h> #include <linux/memblock.h> +#include <linux/mutex.h> #include <linux/of.h> #include <linux/of_fdt.h> #include <linux/reboot.h> @@ -70,14 +71,33 @@ struct rtas_filter { * ppc64le, and we want to keep it that way. It does * not make sense for this to be set when @filter * is NULL. + * @lock: Pointer to an optional dedicated per-function mutex. This + * should be set for functions that require multiple calls in + * sequence to complete a single operation, and such sequences + * will disrupt each other if allowed to interleave. Users of + * this function are required to hold the associated lock for + * the duration of the call sequence. Add an explanatory + * comment to the function table entry if setting this member. */ struct rtas_function { s32 token; const bool banned_for_syscall_on_le:1; const char * const name; const struct rtas_filter *filter; + struct mutex *lock; }; +/* + * Per-function locks for sequence-based RTAS functions. + */ +static DEFINE_MUTEX(rtas_ibm_activate_firmware_lock); +static DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock); +static DEFINE_MUTEX(rtas_ibm_get_indices_lock); +static DEFINE_MUTEX(rtas_ibm_lpar_perftools_lock); +static DEFINE_MUTEX(rtas_ibm_physical_attestation_lock); +static DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock); +DEFINE_MUTEX(rtas_ibm_get_vpd_lock); + static struct rtas_function rtas_function_table[] __ro_after_init = { [RTAS_FNIDX__CHECK_EXCEPTION] = { .name = "check-exception", @@ -125,6 +145,13 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = -1, .size_idx1 = -1, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * PAPR+ as of v2.13 doesn't explicitly impose any + * restriction, but this typically requires multiple + * calls before success, and there's no reason to + * allow sequences to interleave. + */ + .lock = &rtas_ibm_activate_firmware_lock, }, [RTAS_FNIDX__IBM_CBE_START_PTCAL] = { .name = "ibm,cbe-start-ptcal", @@ -196,6 +223,13 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = 1, .size_idx1 = -1, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * PAPR+ v2.13 R1–7.3.19–3 is explicit that the OS + * must not call ibm,get-dynamic-sensor-state with + * different inputs until a non-retry status has been + * returned. + */ + .lock = &rtas_ibm_get_dynamic_sensor_state_lock, }, [RTAS_FNIDX__IBM_GET_INDICES] = { .name = "ibm,get-indices", @@ -203,6 +237,12 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = 2, .size_idx1 = 3, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * PAPR+ v2.13 R1–7.3.17–2 says that the OS must not + * interleave ibm,get-indices call sequences with + * different inputs. + */ + .lock = &rtas_ibm_get_indices_lock, }, [RTAS_FNIDX__IBM_GET_RIO_TOPOLOGY] = { .name = "ibm,get-rio-topology", @@ -220,6 +260,11 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = 0, .size_idx1 = -1, .buf_idx2 = 1, .size_idx2 = 2, }, + /* + * PAPR+ v2.13 R1–7.3.20–4 indicates that sequences + * should not be allowed to interleave. + */ + .lock = &rtas_ibm_get_vpd_lock, }, [RTAS_FNIDX__IBM_GET_XIVE] = { .name = "ibm,get-xive", @@ -239,6 +284,11 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = 2, .size_idx1 = 3, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * PAPR+ v2.13 R1–7.3.26–6 says the OS should allow + * only one call sequence in progress at a time. + */ + .lock = &rtas_ibm_lpar_perftools_lock, }, [RTAS_FNIDX__IBM_MANAGE_FLASH_IMAGE] = { .name = "ibm,manage-flash-image", @@ -277,6 +327,14 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = 0, .size_idx1 = 1, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * This follows a sequence-based pattern similar to + * ibm,get-vpd et al. Since PAPR+ restricts + * interleaving call sequences for other functions of + * this style, assume the restriction applies here, + * even though it's not explicit in the spec. + */ + .lock = &rtas_ibm_physical_attestation_lock, }, [RTAS_FNIDX__IBM_PLATFORM_DUMP] = { .name = "ibm,platform-dump", @@ -284,6 +342,13 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = 4, .size_idx1 = 5, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * PAPR+ v2.13 7.3.3.4.1 indicates that concurrent + * sequences of ibm,platform-dump are allowed if they + * are operating on different dump tags. So leave the + * lock pointer unset for now. This may need + * reconsideration if kernel-internal users appear. + */ }, [RTAS_FNIDX__IBM_POWER_OFF_UPS] = { .name = "ibm,power-off-ups", @@ -326,6 +391,12 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = 2, .size_idx1 = -1, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * PAPR+ v2.13 R1–7.3.18–3 says the OS must not call + * this function with different inputs until a + * non-retry status has been returned. + */ + .lock = &rtas_ibm_set_dynamic_indicator_lock, }, [RTAS_FNIDX__IBM_SET_EEH_OPTION] = { .name = "ibm,set-eeh-option", @@ -454,6 +525,11 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { }, }; +#define for_each_rtas_function(funcp) \ + for (funcp = &rtas_function_table[0]; \ + funcp < &rtas_function_table[ARRAY_SIZE(rtas_function_table)]; \ + ++funcp) + /* * Nearly all RTAS calls need to be serialized. All uses of the * default rtas_args block must hold rtas_lock. @@ -525,10 +601,10 @@ static DEFINE_XARRAY(rtas_token_to_function_xarray); static int __init rtas_token_to_function_xarray_init(void) { + const struct rtas_function *func; int err = 0; - for (size_t i = 0; i < ARRAY_SIZE(rtas_function_table); ++i) { - const struct rtas_function *func = &rtas_function_table[i]; + for_each_rtas_function(func) { const s32 token = func->token; if (token == RTAS_UNKNOWN_SERVICE) @@ -544,6 +620,21 @@ static int __init rtas_token_to_function_xarray_init(void) } arch_initcall(rtas_token_to_function_xarray_init); +/* + * For use by sys_rtas(), where the token value is provided by user + * space and we don't want to warn on failed lookups. + */ +static const struct rtas_function *rtas_token_to_function_untrusted(s32 token) +{ + return xa_load(&rtas_token_to_function_xarray, token); +} + +/* + * Reverse lookup for deriving the function descriptor from a + * known-good token value in contexts where the former is not already + * available. @token must be valid, e.g. derived from the result of a + * prior lookup against the function table. + */ static const struct rtas_function *rtas_token_to_function(s32 token) { const struct rtas_function *func; @@ -551,12 +642,22 @@ static const struct rtas_function *rtas_token_to_function(s32 token) if (WARN_ONCE(token < 0, "invalid token %d", token)) return NULL; - func = xa_load(&rtas_token_to_function_xarray, token); - - if (WARN_ONCE(!func, "unexpected failed lookup for token %d", token)) - return NULL; + func = rtas_token_to_function_untrusted(token); + if (func) + return func; + /* + * Fall back to linear scan in case the reverse mapping hasn't + * been initialized yet. + */ + if (xa_empty(&rtas_token_to_function_xarray)) { + for_each_rtas_function(func) { + if (func->token == token) + return func; + } + } - return func; + WARN_ONCE(true, "unexpected failed lookup for token %d", token); + return NULL; } /* This is here deliberately so it's only used in this file */ @@ -570,28 +671,25 @@ static void __do_enter_rtas(struct rtas_args *args) static void __do_enter_rtas_trace(struct rtas_args *args) { - const char *name = NULL; + const struct rtas_function *func = rtas_token_to_function(be32_to_cpu(args->token)); - if (args == &rtas_args) - lockdep_assert_held(&rtas_lock); /* - * If the tracepoints that consume the function name aren't - * active, avoid the lookup. + * If there is a per-function lock, it must be held by the + * caller. */ - if ((trace_rtas_input_enabled() || trace_rtas_output_enabled())) { - const s32 token = be32_to_cpu(args->token); - const struct rtas_function *func = rtas_token_to_function(token); + if (func->lock) + lockdep_assert_held(func->lock); - name = func->name; - } + if (args == &rtas_args) + lockdep_assert_held(&rtas_lock); - trace_rtas_input(args, name); + trace_rtas_input(args, func->name); trace_rtas_ll_entry(args); __do_enter_rtas(args); trace_rtas_ll_exit(args); - trace_rtas_output(args, name); + trace_rtas_output(args, func->name); } static void do_enter_rtas(struct rtas_args *args) @@ -670,7 +768,7 @@ static void call_rtas_display_status_delay(char c) static int pending_newline = 0; /* did last write end with unprinted newline? */ static int width = 16; - if (c == '\n') { + if (c == '\n') { while (width-- > 0) call_rtas_display_status(' '); width = 16; @@ -680,7 +778,7 @@ static void call_rtas_display_status_delay(char c) if (pending_newline) { call_rtas_display_status('\r'); call_rtas_display_status('\n'); - } + } pending_newline = 0; if (width--) { call_rtas_display_status(c); @@ -820,7 +918,7 @@ void rtas_progress(char *s, unsigned short hex) else rtas_call(display_character, 1, 1, NULL, '\r'); } - + if (row_width) width = row_width[current_line]; else @@ -840,9 +938,9 @@ void rtas_progress(char *s, unsigned short hex) spin_unlock(&progress_lock); return; } - + /* RTAS wants CR-LF, not just LF */ - + if (*os == '\n') { rtas_call(display_character, 1, 1, NULL, '\r'); rtas_call(display_character, 1, 1, NULL, '\n'); @@ -852,7 +950,7 @@ void rtas_progress(char *s, unsigned short hex) */ rtas_call(display_character, 1, 1, NULL, *os); } - + if (row_width) width = row_width[current_line]; else @@ -861,15 +959,15 @@ void rtas_progress(char *s, unsigned short hex) width--; rtas_call(display_character, 1, 1, NULL, *os); } - + os++; - + /* if we overwrite the screen length */ if (width <= 0) while ((*os != 0) && (*os != '\n') && (*os != '\r')) os++; } - + spin_unlock(&progress_lock); } EXPORT_SYMBOL_GPL(rtas_progress); /* needed by rtas_flash module */ @@ -900,11 +998,6 @@ int rtas_token(const char *service) } EXPORT_SYMBOL_GPL(rtas_token); -int rtas_service_present(const char *service) -{ - return rtas_token(service) != RTAS_UNKNOWN_SERVICE; -} - #ifdef CONFIG_RTAS_ERROR_LOGGING static u32 rtas_error_log_max __ro_after_init = RTAS_ERROR_LOG_MAX; @@ -1638,10 +1731,14 @@ void rtas_activate_firmware(void) return; } + mutex_lock(&rtas_ibm_activate_firmware_lock); + do { fwrc = rtas_call(token, 0, 1, NULL); } while (rtas_busy_delay(fwrc)); + mutex_unlock(&rtas_ibm_activate_firmware_lock); + if (fwrc) pr_err("ibm,activate-firmware failed (%i)\n", fwrc); } @@ -1713,24 +1810,18 @@ static bool in_rmo_buf(u32 base, u32 end) end < (rtas_rmo_buf + RTAS_USER_REGION_SIZE); } -static bool block_rtas_call(int token, int nargs, +static bool block_rtas_call(const struct rtas_function *func, int nargs, struct rtas_args *args) { - const struct rtas_function *func; const struct rtas_filter *f; - const bool is_platform_dump = token == rtas_function_token(RTAS_FN_IBM_PLATFORM_DUMP); - const bool is_config_conn = token == rtas_function_token(RTAS_FN_IBM_CONFIGURE_CONNECTOR); + const bool is_platform_dump = + func == &rtas_function_table[RTAS_FNIDX__IBM_PLATFORM_DUMP]; + const bool is_config_conn = + func == &rtas_function_table[RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR]; u32 base, size, end; /* - * If this token doesn't correspond to a function the kernel - * understands, you're not allowed to call it. - */ - func = rtas_token_to_function(token); - if (!func) - goto err; - /* - * And only functions with filters attached are allowed. + * Only functions with filters attached are allowed. */ f = func->filter; if (!f) @@ -1787,14 +1878,15 @@ static bool block_rtas_call(int token, int nargs, return false; err: pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n"); - pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n", - token, nargs, current->comm); + pr_err_ratelimited("sys_rtas: %s nargs=%d (called by %s)\n", + func->name, nargs, current->comm); return true; } /* We assume to be passed big endian arguments */ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { + const struct rtas_function *func; struct pin_cookie cookie; struct rtas_args args; unsigned long flags; @@ -1824,13 +1916,18 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) nargs * sizeof(rtas_arg_t)) != 0) return -EFAULT; - if (token == RTAS_UNKNOWN_SERVICE) + /* + * If this token doesn't correspond to a function the kernel + * understands, you're not allowed to call it. + */ + func = rtas_token_to_function_untrusted(token); + if (!func) return -EINVAL; args.rets = &args.args[nargs]; memset(args.rets, 0, nret * sizeof(rtas_arg_t)); - if (block_rtas_call(token, nargs, &args)) + if (block_rtas_call(func, nargs, &args)) return -EINVAL; if (token_is_restricted_errinjct(token)) { @@ -1863,6 +1960,15 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) buff_copy = get_errorlog_buffer(); + /* + * If this function has a mutex assigned to it, we must + * acquire it to avoid interleaving with any kernel-based uses + * of the same function. Kernel-based sequences acquire the + * appropriate mutex explicitly. + */ + if (func->lock) + mutex_lock(func->lock); + raw_spin_lock_irqsave(&rtas_lock, flags); cookie = lockdep_pin_lock(&rtas_lock); @@ -1878,6 +1984,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) lockdep_unpin_lock(&rtas_lock, cookie); raw_spin_unlock_irqrestore(&rtas_lock, flags); + if (func->lock) + mutex_unlock(func->lock); + if (buff_copy) { if (errbuf) log_error(errbuf, ERR_TYPE_RTAS_LOG, 0); diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index e1fdc7473b72..fccf96e897f6 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -43,7 +43,7 @@ static inline int config_access_valid(struct pci_dn *dn, int where) return 0; } -int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val) +int rtas_pci_dn_read_config(struct pci_dn *pdn, int where, int size, u32 *val) { int returnval = -1; unsigned long buid, addr; @@ -87,7 +87,7 @@ static int rtas_pci_read_config(struct pci_bus *bus, pdn = pci_get_pdn_by_devfn(bus, devfn); /* Validity of pdn is checked in here */ - ret = rtas_read_config(pdn, where, size, val); + ret = rtas_pci_dn_read_config(pdn, where, size, val); if (*val == EEH_IO_ERROR_VALUE(size) && eeh_dev_check_failure(pdn_to_eeh_dev(pdn))) return PCIBIOS_DEVICE_NOT_FOUND; @@ -95,7 +95,7 @@ static int rtas_pci_read_config(struct pci_bus *bus, return ret; } -int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val) +int rtas_pci_dn_write_config(struct pci_dn *pdn, int where, int size, u32 val) { unsigned long buid, addr; int ret; @@ -134,7 +134,7 @@ static int rtas_pci_write_config(struct pci_bus *bus, pdn = pci_get_pdn_by_devfn(bus, devfn); /* Validity of pdn is checked in here. */ - return rtas_write_config(pdn, where, size, val); + return rtas_pci_dn_write_config(pdn, where, size, val); } static struct pci_ops rtas_pci_ops = { diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index ab691c89d787..693334c20d07 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -77,10 +77,10 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 }; #endif struct task_struct *secondary_current; -bool has_big_cores; -bool coregroup_enabled; -bool thread_group_shares_l2; -bool thread_group_shares_l3; +bool has_big_cores __ro_after_init; +bool coregroup_enabled __ro_after_init; +bool thread_group_shares_l2 __ro_after_init; +bool thread_group_shares_l3 __ro_after_init; DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); @@ -93,15 +93,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); EXPORT_SYMBOL_GPL(has_big_cores); -enum { -#ifdef CONFIG_SCHED_SMT - smt_idx, -#endif - cache_idx, - mc_idx, - die_idx, -}; - #define MAX_THREAD_LIST_SIZE 8 #define THREAD_GROUP_SHARE_L1 1 #define THREAD_GROUP_SHARE_L2_L3 2 @@ -987,7 +978,7 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property) return 0; } -static bool shared_caches; +static bool shared_caches __ro_after_init; #ifdef CONFIG_SCHED_SMT /* cpumask of CPUs with asymmetric SMT dependency */ @@ -1004,6 +995,13 @@ static int powerpc_smt_flags(void) #endif /* + * On shared processor LPARs scheduled on a big core (which has two or more + * independent thread groups per core), prefer lower numbered CPUs, so + * that workload consolidates to lesser number of cores. + */ +static __ro_after_init DEFINE_STATIC_KEY_FALSE(splpar_asym_pack); + +/* * P9 has a slightly odd architecture where pairs of cores share an L2 cache. * This topology makes it *much* cheaper to migrate tasks between adjacent cores * since the migrated task remains cache hot. We want to take advantage of this @@ -1011,9 +1009,20 @@ static int powerpc_smt_flags(void) */ static int powerpc_shared_cache_flags(void) { + if (static_branch_unlikely(&splpar_asym_pack)) + return SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING; + return SD_SHARE_PKG_RESOURCES; } +static int powerpc_shared_proc_flags(void) +{ + if (static_branch_unlikely(&splpar_asym_pack)) + return SD_ASYM_PACKING; + + return 0; +} + /* * We can't just pass cpu_l2_cache_mask() directly because * returns a non-const pointer and the compiler barfs on that. @@ -1037,6 +1046,10 @@ static struct cpumask *cpu_coregroup_mask(int cpu) static bool has_coregroup_support(void) { + /* Coregroup identification not available on shared systems */ + if (is_shared_processor()) + return 0; + return coregroup_enabled; } @@ -1045,16 +1058,6 @@ static const struct cpumask *cpu_mc_mask(int cpu) return cpu_coregroup_mask(cpu); } -static struct sched_domain_topology_level powerpc_topology[] = { -#ifdef CONFIG_SCHED_SMT - { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, -#endif - { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) }, - { cpu_mc_mask, SD_INIT_NAME(MC) }, - { cpu_cpu_mask, SD_INIT_NAME(PKG) }, - { NULL, }, -}; - static int __init init_big_cores(void) { int cpu; @@ -1682,43 +1685,45 @@ void start_secondary(void *unused) BUG(); } -static void __init fixup_topology(void) +static struct sched_domain_topology_level powerpc_topology[6]; + +static void __init build_sched_topology(void) { - int i; + int i = 0; + + if (is_shared_processor() && has_big_cores) + static_branch_enable(&splpar_asym_pack); #ifdef CONFIG_SCHED_SMT if (has_big_cores) { pr_info("Big cores detected but using small core scheduling\n"); - powerpc_topology[smt_idx].mask = smallcore_smt_mask; + powerpc_topology[i++] = (struct sched_domain_topology_level){ + smallcore_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) + }; + } else { + powerpc_topology[i++] = (struct sched_domain_topology_level){ + cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) + }; } #endif + if (shared_caches) { + powerpc_topology[i++] = (struct sched_domain_topology_level){ + shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) + }; + } + if (has_coregroup_support()) { + powerpc_topology[i++] = (struct sched_domain_topology_level){ + cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC) + }; + } + powerpc_topology[i++] = (struct sched_domain_topology_level){ + cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(PKG) + }; - if (!has_coregroup_support()) - powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask; - - /* - * Try to consolidate topology levels here instead of - * allowing scheduler to degenerate. - * - Dont consolidate if masks are different. - * - Dont consolidate if sd_flags exists and are different. - */ - for (i = 1; i <= die_idx; i++) { - if (powerpc_topology[i].mask != powerpc_topology[i - 1].mask) - continue; - - if (powerpc_topology[i].sd_flags && powerpc_topology[i - 1].sd_flags && - powerpc_topology[i].sd_flags != powerpc_topology[i - 1].sd_flags) - continue; - - if (!powerpc_topology[i - 1].sd_flags) - powerpc_topology[i - 1].sd_flags = powerpc_topology[i].sd_flags; + /* There must be one trailing NULL entry left. */ + BUG_ON(i >= ARRAY_SIZE(powerpc_topology) - 1); - powerpc_topology[i].mask = powerpc_topology[i + 1].mask; - powerpc_topology[i].sd_flags = powerpc_topology[i + 1].sd_flags; -#ifdef CONFIG_SCHED_DEBUG - powerpc_topology[i].name = powerpc_topology[i + 1].name; -#endif - } + set_sched_topology(powerpc_topology); } void __init smp_cpus_done(unsigned int max_cpus) @@ -1733,9 +1738,20 @@ void __init smp_cpus_done(unsigned int max_cpus) smp_ops->bringup_done(); dump_numa_cpu_topology(); + build_sched_topology(); +} - fixup_topology(); - set_sched_topology(powerpc_topology); +/* + * For asym packing, by default lower numbered CPU has higher priority. + * On shared processors, pack to lower numbered core. However avoid moving + * between thread_groups within the same core. + */ +int arch_asym_cpu_priority(int cpu) +{ + if (static_branch_unlikely(&splpar_asym_pack)) + return -cpu / threads_per_core; + + return -cpu; } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/powerpc/kernel/swsusp_64.c b/arch/powerpc/kernel/swsusp_64.c index 16ee3baaf09a..50fa8fc9ef95 100644 --- a/arch/powerpc/kernel/swsusp_64.c +++ b/arch/powerpc/kernel/swsusp_64.c @@ -11,6 +11,8 @@ #include <linux/interrupt.h> #include <linux/nmi.h> +void do_after_copyback(void); + void do_after_copyback(void) { iommu_restore(); diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 7fab411378f2..6988ecbc316e 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -543,3 +543,5 @@ 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue +457 common statmount sys_statmount +458 common listmount sys_listmount diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S index 90701885762c..76dbe9fd2c0f 100644 --- a/arch/powerpc/kernel/trace/ftrace_entry.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -62,7 +62,7 @@ .endif /* Save previous stack pointer (r1) */ - addi r8, r1, SWITCH_FRAME_SIZE + addi r8, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE PPC_STL r8, GPR1(r1) .if \allregs == 1 @@ -162,7 +162,6 @@ _GLOBAL(ftrace_regs_caller) .globl ftrace_regs_call ftrace_regs_call: bl ftrace_stub - nop ftrace_regs_exit 1 _GLOBAL(ftrace_caller) @@ -171,7 +170,6 @@ _GLOBAL(ftrace_caller) .globl ftrace_call ftrace_call: bl ftrace_stub - nop ftrace_regs_exit 0 _GLOBAL(ftrace_stub) @@ -182,7 +180,7 @@ ftrace_no_trace: mflr r3 mtctr r3 REST_GPR(3, r1) - addi r1, r1, SWITCH_FRAME_SIZE + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE mtlr r0 bctr #endif diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 5ea2014aff90..11e062b47d3f 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1439,10 +1439,12 @@ static int emulate_instruction(struct pt_regs *regs) return -EINVAL; } +#ifdef CONFIG_GENERIC_BUG int is_valid_bugaddr(unsigned long addr) { return is_kernel_addr(addr); } +#endif #ifdef CONFIG_MATH_EMULATION static int emulate_math(struct pt_regs *regs) diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 74ddf836f7a2..a0467e528b70 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -7,7 +7,6 @@ #include <linux/types.h> #include <asm/udbg.h> #include <asm/io.h> -#include <asm/reg_a2.h> #include <asm/early_ioremap.h> extern u8 real_readb(volatile u8 __iomem *addr); diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index 0c7d82c270c3..1b93655c2857 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -71,7 +71,7 @@ AS64FLAGS := -D__VDSO64__ targets += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -Upowerpc targets += vdso64.lds -CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) +CPPFLAGS_vdso64.lds += -P -C # link rule for the .so file, .lds has to be first $(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o FORCE diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 4094e4c4c77a..80b3f6e476b6 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -33,6 +33,7 @@ _GLOBAL(store_vr_state) mfvscr v0 li r4, VRSTATE_VSCR stvx v0, r4, r3 + lvx v0, 0, r3 blr EXPORT_SYMBOL(store_vr_state) @@ -109,6 +110,7 @@ _GLOBAL(save_altivec) mfvscr v0 li r4,VRSTATE_VSCR stvx v0,r4,r7 + lvx v0,0,r7 blr #ifdef CONFIG_VSX diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 85846cadb9b5..27fa9098a5b7 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -75,6 +75,7 @@ void arch_crash_save_vmcoreinfo(void) VMCOREINFO_OFFSET(mmu_psize_def, shift); #endif VMCOREINFO_SYMBOL(cur_cpu_spec); + VMCOREINFO_OFFSET(cpu_spec, cpu_features); VMCOREINFO_OFFSET(cpu_spec, mmu_features); vmcoreinfo_append_str("NUMBER(RADIX_MMU)=%d\n", early_radix_enabled()); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 6cd20ab9e94e..8acec144120e 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -302,11 +302,11 @@ static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, switch (priority) { case BOOK3S_IRQPRIO_DECREMENTER: - deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit; + deliver = !kvmhv_is_nestedv2() && (kvmppc_get_msr(vcpu) & MSR_EE) && !crit; vec = BOOK3S_INTERRUPT_DECREMENTER; break; case BOOK3S_IRQPRIO_EXTERNAL: - deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit; + deliver = !kvmhv_is_nestedv2() && (kvmppc_get_msr(vcpu) & MSR_EE) && !crit; vec = BOOK3S_INTERRUPT_EXTERNAL; break; case BOOK3S_IRQPRIO_SYSTEM_RESET: diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 175a8eb2681f..4a1abb9f7c05 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -40,6 +40,9 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, unsigned long quadrant, ret = n; bool is_load = !!to; + if (kvmhv_is_nestedv2()) + return H_UNSUPPORTED; + /* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */ if (kvmhv_on_pseries()) return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr, @@ -97,7 +100,7 @@ static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to, void *from, unsigned long n) { int lpid = vcpu->kvm->arch.lpid; - int pid = kvmppc_get_pid(vcpu); + int pid; /* This would cause a data segment intr so don't allow the access */ if (eaddr & (0x3FFUL << 52)) @@ -110,6 +113,8 @@ static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, /* If accessing quadrant 3 then pid is expected to be 0 */ if (((eaddr >> 62) & 0x3) == 0x3) pid = 0; + else + pid = kvmppc_get_pid(vcpu); eaddr &= ~(0xFFFUL << 52); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1ed6ec140701..e48126a59ba7 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -650,7 +650,8 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, return err; } -static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) +static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap, + struct kvmppc_vpa *old_vpap) { struct kvm *kvm = vcpu->kvm; void *va; @@ -690,9 +691,8 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) kvmppc_unpin_guest_page(kvm, va, gpa, false); va = NULL; } - if (vpap->pinned_addr) - kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa, - vpap->dirty); + *old_vpap = *vpap; + vpap->gpa = gpa; vpap->pinned_addr = va; vpap->dirty = false; @@ -702,6 +702,9 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) { + struct kvm *kvm = vcpu->kvm; + struct kvmppc_vpa old_vpa = { 0 }; + if (!(vcpu->arch.vpa.update_pending || vcpu->arch.slb_shadow.update_pending || vcpu->arch.dtl.update_pending)) @@ -709,17 +712,34 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) spin_lock(&vcpu->arch.vpa_update_lock); if (vcpu->arch.vpa.update_pending) { - kvmppc_update_vpa(vcpu, &vcpu->arch.vpa); - if (vcpu->arch.vpa.pinned_addr) + kvmppc_update_vpa(vcpu, &vcpu->arch.vpa, &old_vpa); + if (old_vpa.pinned_addr) { + if (kvmhv_is_nestedv2()) + kvmhv_nestedv2_set_vpa(vcpu, ~0ull); + kvmppc_unpin_guest_page(kvm, old_vpa.pinned_addr, old_vpa.gpa, + old_vpa.dirty); + } + if (vcpu->arch.vpa.pinned_addr) { init_vpa(vcpu, vcpu->arch.vpa.pinned_addr); + if (kvmhv_is_nestedv2()) + kvmhv_nestedv2_set_vpa(vcpu, __pa(vcpu->arch.vpa.pinned_addr)); + } } if (vcpu->arch.dtl.update_pending) { - kvmppc_update_vpa(vcpu, &vcpu->arch.dtl); + kvmppc_update_vpa(vcpu, &vcpu->arch.dtl, &old_vpa); + if (old_vpa.pinned_addr) + kvmppc_unpin_guest_page(kvm, old_vpa.pinned_addr, old_vpa.gpa, + old_vpa.dirty); vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr; vcpu->arch.dtl_index = 0; } - if (vcpu->arch.slb_shadow.update_pending) - kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow); + if (vcpu->arch.slb_shadow.update_pending) { + kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow, &old_vpa); + if (old_vpa.pinned_addr) + kvmppc_unpin_guest_page(kvm, old_vpa.pinned_addr, old_vpa.gpa, + old_vpa.dirty); + } + spin_unlock(&vcpu->arch.vpa_update_lock); } @@ -1597,7 +1617,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, * That can happen due to a bug, or due to a machine check * occurring at just the wrong time. */ - if (__kvmppc_get_msr_hv(vcpu) & MSR_HV) { + if (!kvmhv_is_nestedv2() && (__kvmppc_get_msr_hv(vcpu) & MSR_HV)) { printk(KERN_EMERG "KVM trap in HV mode!\n"); printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n", vcpu->arch.trap, kvmppc_get_pc(vcpu), @@ -1688,7 +1708,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, { int i; - if (unlikely(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) { + if (!kvmhv_is_nestedv2() && unlikely(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) { /* * Guest userspace executed sc 1. This can only be * reached by the P9 path because the old path @@ -4084,6 +4104,8 @@ static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit, if (rc < 0) return -EINVAL; + kvmppc_gse_put_u64(io->vcpu_run_input, KVMPPC_GSID_LPCR, lpcr); + accumulate_time(vcpu, &vcpu->arch.in_guest); rc = plpar_guest_run_vcpu(0, vcpu->kvm->arch.lpid, vcpu->vcpu_id, &trap, &i); @@ -4736,13 +4758,19 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, if (!nested) { kvmppc_core_prepare_to_enter(vcpu); - if (__kvmppc_get_msr_hv(vcpu) & MSR_EE) { - if (xive_interrupt_pending(vcpu)) + if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, + &vcpu->arch.pending_exceptions) || + xive_interrupt_pending(vcpu)) { + /* + * For nested HV, don't synthesize but always pass MER, + * the L0 will be able to optimise that more + * effectively than manipulating registers directly. + */ + if (!kvmhv_on_pseries() && (__kvmppc_get_msr_hv(vcpu) & MSR_EE)) kvmppc_inject_interrupt_hv(vcpu, - BOOK3S_INTERRUPT_EXTERNAL, 0); - } else if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, - &vcpu->arch.pending_exceptions)) { - lpcr |= LPCR_MER; + BOOK3S_INTERRUPT_EXTERNAL, 0); + else + lpcr |= LPCR_MER; } } else if (vcpu->arch.pending_exceptions || vcpu->arch.doorbell_request || @@ -4806,7 +4834,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, * entering a nested guest in which case the decrementer is now owned * by L2 and the L1 decrementer is provided in hdec_expires */ - if (kvmppc_core_pending_dec(vcpu) && + if (!kvmhv_is_nestedv2() && kvmppc_core_pending_dec(vcpu) && ((tb < kvmppc_dec_expires_host_tb(vcpu)) || (trap == BOOK3S_INTERRUPT_SYSCALL && kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED))) @@ -4949,7 +4977,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) if (run->exit_reason == KVM_EXIT_PAPR_HCALL) { accumulate_time(vcpu, &vcpu->arch.hcall); - if (WARN_ON_ONCE(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) { + if (!kvmhv_is_nestedv2() && WARN_ON_ONCE(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) { /* * These should have been caught reflected * into the guest by now. Final sanity check: @@ -5691,10 +5719,12 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0); } - if (kvmhv_is_nestedv2()) + if (kvmhv_is_nestedv2()) { + kvmhv_flush_lpid(kvm->arch.lpid); plpar_guest_delete(0, kvm->arch.lpid); - else + } else { kvmppc_free_lpid(kvm->arch.lpid); + } kvmppc_free_pimap(kvm); } diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 3b658b8696bc..5c375ec1a3c6 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -503,7 +503,7 @@ void kvmhv_nested_exit(void) } } -static void kvmhv_flush_lpid(u64 lpid) +void kvmhv_flush_lpid(u64 lpid) { long rc; diff --git a/arch/powerpc/kvm/book3s_hv_nestedv2.c b/arch/powerpc/kvm/book3s_hv_nestedv2.c index fd3c4f2d9480..5378eb40b162 100644 --- a/arch/powerpc/kvm/book3s_hv_nestedv2.c +++ b/arch/powerpc/kvm/book3s_hv_nestedv2.c @@ -856,6 +856,35 @@ free_gsb: EXPORT_SYMBOL_GPL(kvmhv_nestedv2_set_ptbl_entry); /** + * kvmhv_nestedv2_set_vpa() - register L2 VPA with L0 + * @vcpu: vcpu + * @vpa: L1 logical real address + */ +int kvmhv_nestedv2_set_vpa(struct kvm_vcpu *vcpu, unsigned long vpa) +{ + struct kvmhv_nestedv2_io *io; + struct kvmppc_gs_buff *gsb; + int rc = 0; + + io = &vcpu->arch.nestedv2_io; + gsb = io->vcpu_run_input; + + kvmppc_gsb_reset(gsb); + rc = kvmppc_gse_put_u64(gsb, KVMPPC_GSID_VPA, vpa); + if (rc < 0) + goto out; + + rc = kvmppc_gsb_send(gsb, 0); + if (rc < 0) + pr_err("KVM-NESTEDv2: couldn't register the L2 VPA (rc=%d)\n", rc); + +out: + kvmppc_gsb_reset(gsb); + return rc; +} +EXPORT_SYMBOL_GPL(kvmhv_nestedv2_set_vpa); + +/** * kvmhv_nestedv2_parse_output() - receive values from H_GUEST_RUN_VCPU output * @vcpu: vcpu * diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 9118242063fb..5b92619a05fd 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -604,6 +604,7 @@ static void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr) case PVR_POWER8: case PVR_POWER8E: case PVR_POWER8NVL: + case PVR_HX_C2000: case PVR_POWER9: vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE | BOOK3S_HFLAG_NEW_TLBIE; diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 077fd88a0b68..ec60c7979718 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -93,7 +93,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) emulated = EMULATE_FAIL; vcpu->arch.regs.msr = kvmppc_get_msr(vcpu); - kvmhv_nestedv2_reload_ptregs(vcpu, &vcpu->arch.regs); if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) { int type = op.type & INSTR_TYPE_MASK; int size = GETSIZE(op.type); @@ -112,7 +111,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) op.reg, size, !instr_byte_swap); if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) - kvmppc_set_gpr(vcpu, op.update_reg, op.ea); + kvmppc_set_gpr(vcpu, op.update_reg, vcpu->arch.vaddr_accessed); break; } @@ -132,7 +131,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) KVM_MMIO_REG_FPR|op.reg, size, 1); if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) - kvmppc_set_gpr(vcpu, op.update_reg, op.ea); + kvmppc_set_gpr(vcpu, op.update_reg, vcpu->arch.vaddr_accessed); break; #endif @@ -224,16 +223,17 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) break; } #endif - case STORE: - /* if need byte reverse, op.val has been reversed by - * analyse_instr(). - */ - emulated = kvmppc_handle_store(vcpu, op.val, size, 1); + case STORE: { + int instr_byte_swap = op.type & BYTEREV; + + emulated = kvmppc_handle_store(vcpu, kvmppc_get_gpr(vcpu, op.reg), + size, !instr_byte_swap); if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) - kvmppc_set_gpr(vcpu, op.update_reg, op.ea); + kvmppc_set_gpr(vcpu, op.update_reg, vcpu->arch.vaddr_accessed); break; + } #ifdef CONFIG_PPC_FPU case STORE_FP: if (kvmppc_check_fp_disabled(vcpu)) @@ -254,7 +254,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) kvmppc_get_fpr(vcpu, op.reg), size, 1); if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) - kvmppc_set_gpr(vcpu, op.update_reg, op.ea); + kvmppc_set_gpr(vcpu, op.update_reg, vcpu->arch.vaddr_accessed); break; #endif @@ -358,7 +358,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) } trace_kvm_ppc_instr(ppc_inst_val(inst), kvmppc_get_pc(vcpu), emulated); - kvmhv_nestedv2_mark_dirty_ptregs(vcpu, &vcpu->arch.regs); /* Advance past emulated instruction. */ if (emulated != EMULATE_FAIL) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 51ad0397c17a..6eac63e79a89 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -45,7 +45,7 @@ obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o # so it is only needed for modules, and only for older linkers which # do not support --save-restore-funcs ifndef CONFIG_LD_IS_BFD -extra-$(CONFIG_PPC64) += crtsavres.o +always-$(CONFIG_PPC64) += crtsavres.o endif obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index a4ab8625061a..5766180f5380 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -586,6 +586,8 @@ static int do_fp_load(struct instruction_op *op, unsigned long ea, } u; nb = GETSIZE(op->type); + if (nb > sizeof(u)) + return -EINVAL; if (!address_ok(regs, ea, nb)) return -EFAULT; rn = op->reg; @@ -636,6 +638,8 @@ static int do_fp_store(struct instruction_op *op, unsigned long ea, } u; nb = GETSIZE(op->type); + if (nb > sizeof(u)) + return -EINVAL; if (!address_ok(regs, ea, nb)) return -EFAULT; rn = op->reg; @@ -680,6 +684,9 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea, u8 b[sizeof(__vector128)]; } u = {}; + if (size > sizeof(u)) + return -EINVAL; + if (!address_ok(regs, ea & ~0xfUL, 16)) return -EFAULT; /* align to multiple of size */ @@ -688,7 +695,7 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea, if (err) return err; if (unlikely(cross_endian)) - do_byte_reverse(&u.b[ea & 0xf], size); + do_byte_reverse(&u.b[ea & 0xf], min_t(size_t, size, sizeof(u))); preempt_disable(); if (regs->msr & MSR_VEC) put_vr(rn, &u.v); @@ -707,6 +714,9 @@ static nokprobe_inline int do_vec_store(int rn, unsigned long ea, u8 b[sizeof(__vector128)]; } u; + if (size > sizeof(u)) + return -EINVAL; + if (!address_ok(regs, ea & ~0xfUL, 16)) return -EFAULT; /* align to multiple of size */ @@ -719,7 +729,7 @@ static nokprobe_inline int do_vec_store(int rn, unsigned long ea, u.v = current->thread.vr_state.vr[rn]; preempt_enable(); if (unlikely(cross_endian)) - do_byte_reverse(&u.b[ea & 0xf], size); + do_byte_reverse(&u.b[ea & 0xf], min_t(size_t, size, sizeof(u))); return copy_mem_out(&u.b[ea & 0xf], ea, size, regs); } #endif /* CONFIG_ALTIVEC */ diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index ad2afa08e62e..0626a25b0d72 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -310,9 +310,16 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags, unsigned long flags else rflags |= 0x3; } + VM_WARN_ONCE(!(pteflags & _PAGE_RWX), "no-access mapping request"); } else { if (pteflags & _PAGE_RWX) rflags |= 0x2; + /* + * We should never hit this in normal fault handling because + * a permission check (check_pte_access()) will bubble this + * to higher level linux handler even for PAGE_NONE. + */ + VM_WARN_ONCE(!(pteflags & _PAGE_RWX), "no-access mapping request"); if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY))) rflags |= 0x1; } diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index be229290a6a7..3438ab72c346 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -542,6 +542,7 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, set_pte_at(vma->vm_mm, addr, ptep, pte); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * For hash translation mode, we use the deposited table to store hash slot * information and they are stored at PTRS_PER_PMD offset from related pmd @@ -563,6 +564,7 @@ int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, return true; } +#endif /* * Does the CPU support tlbie? diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 125733962033..a974baf8f327 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -89,7 +89,8 @@ static int __init scan_pkey_feature(void) unsigned long pvr = mfspr(SPRN_PVR); if (PVR_VER(pvr) == PVR_POWER8 || PVR_VER(pvr) == PVR_POWER8E || - PVR_VER(pvr) == PVR_POWER8NVL || PVR_VER(pvr) == PVR_POWER9) + PVR_VER(pvr) == PVR_POWER8NVL || PVR_VER(pvr) == PVR_POWER9 || + PVR_VER(pvr) == PVR_HX_C2000) pkeys_total = 32; } } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 9e49ede2bc1c..53335ae21a40 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -497,6 +497,8 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, goto done; } count_vm_vma_lock_event(VMA_LOCK_RETRY); + if (fault & VM_FAULT_MAJOR) + flags |= FAULT_FLAG_TRIED; if (fault_signal_pending(fault, regs)) return user_mode(regs) ? 0 : SIGBUS; diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 119ef491f797..d3a7726ecf51 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -126,7 +126,7 @@ void pgtable_cache_add(unsigned int shift) * as to leave enough 0 bits in the address to contain it. */ unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1, HUGEPD_SHIFT_MASK + 1); - struct kmem_cache *new; + struct kmem_cache *new = NULL; /* It would be nice if this was a BUILD_BUG_ON(), but at the * moment, gcc doesn't seem to recognize is_power_of_2 as a @@ -139,7 +139,8 @@ void pgtable_cache_add(unsigned int shift) align = max_t(unsigned long, align, minalign); name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift); - new = kmem_cache_create(name, table_size, align, 0, ctor(shift)); + if (name) + new = kmem_cache_create(name, table_size, align, 0, ctor(shift)); if (!new) panic("Could not allocate pgtable cache for order %d", shift); diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 7f9ff0640124..72341b9fb552 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -181,3 +181,8 @@ static inline bool debug_pagealloc_enabled_or_kfence(void) { return IS_ENABLED(CONFIG_KFENCE) || debug_pagealloc_enabled(); } + +#ifdef CONFIG_MEMORY_HOTPLUG +int create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot); +#endif diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 39dbe6b348df..27f18119fda1 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -534,6 +534,9 @@ static ssize_t affinity_domain_via_partition_show(struct device *dev, struct dev if (!ret) goto parse_result; + if (ret && (ret != H_PARAMETER)) + goto out; + /* * ret value as 'H_PARAMETER' implies that the current buffer size * can't accommodate all the information, and a partial buffer diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 5d12ca386c1f..8664a7d297ad 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -299,6 +299,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) attr_group->attrs = attrs; do { ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i].value); + if (!ev_val_str) + continue; dev_str = device_str_attr_create(pmu->events[i].name, ev_val_str); if (!dev_str) continue; @@ -306,6 +308,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) attrs[j++] = dev_str; if (pmu->events[i].scale) { ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale", pmu->events[i].name); + if (!ev_scale_str) + continue; dev_str = device_str_attr_create(ev_scale_str, pmu->events[i].scale); if (!dev_str) continue; @@ -315,6 +319,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) if (pmu->events[i].unit) { ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit", pmu->events[i].name); + if (!ev_unit_str) + continue; dev_str = device_str_attr_create(ev_unit_str, pmu->events[i].unit); if (!dev_str) continue; diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 1624ebf95497..35a1f4b9f827 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -173,6 +173,7 @@ config ISS4xx config CURRITUCK bool "IBM Currituck (476fpe) Support" depends on PPC_47x + select I2C select SWIOTLB select 476FPE select FORCE_PCI diff --git a/arch/powerpc/platforms/44x/idle.c b/arch/powerpc/platforms/44x/idle.c index f533b495e7db..e2eeef8dff78 100644 --- a/arch/powerpc/platforms/44x/idle.c +++ b/arch/powerpc/platforms/44x/idle.c @@ -27,7 +27,7 @@ static void ppc44x_idle(void) isync(); } -int __init ppc44x_idle_init(void) +static int __init ppc44x_idle_init(void) { if (!mode_spin) { /* If we are not setting spin mode diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c index 6f08d07aee3b..e995eb30bf09 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c @@ -17,6 +17,8 @@ #include <linux/of_address.h> #include <linux/of_irq.h> +#include "mpc5121_ads.h" + static struct device_node *cpld_pic_node; static struct irq_domain *cpld_pic_host; diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c index ce51cfeeb066..8bbbf78bb42b 100644 --- a/arch/powerpc/platforms/512x/pdm360ng.c +++ b/arch/powerpc/platforms/512x/pdm360ng.c @@ -101,7 +101,7 @@ static inline void __init pdm360ng_touchscreen_init(void) } #endif /* CONFIG_TOUCHSCREEN_ADS7846 */ -void __init pdm360ng_init(void) +static void __init pdm360ng_init(void) { mpc512x_init(); pdm360ng_touchscreen_init(); diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 9833c36bda83..c9664e46b03d 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -261,9 +261,10 @@ static int mpc83xx_suspend_begin(suspend_state_t state) static int agent_thread_fn(void *data) { + set_freezable(); + while (1) { - wait_event_interruptible(agent_wq, pci_pm_state >= 2); - try_to_freeze(); + wait_event_freezable(agent_wq, pci_pm_state >= 2); if (signal_pending(current) || pci_pm_state < 2) continue; diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c index ec9f60fbebc7..e0cec670d8db 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c @@ -76,7 +76,7 @@ static void __init mpc85xx_rdb_setup_arch(void) /* P1025 has pins muxed for QE and other functions. To * enable QE UEC mode, we need to set bit QE0 for UCC1 * in Eth mode, QE0 and QE3 for UCC5 in Eth mode, QE9 - * and QE12 for QE MII management singals in PMUXCR + * and QE12 for QE MII management signals in PMUXCR * register. */ setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) | diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig index 67467cd6f34c..06b1e5c49d6f 100644 --- a/arch/powerpc/platforms/86xx/Kconfig +++ b/arch/powerpc/platforms/86xx/Kconfig @@ -52,10 +52,3 @@ config MPC8641 select MPIC default y if GEF_SBC610 || GEF_SBC310 || GEF_PPC9A \ || MVME7100 - -config MPC8610 - bool - select HAVE_PCI - select FSL_PCI if PCI - select PPC_UDBG_16550 - select MPIC diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index ef985ba2bf21..0761d98e5be3 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -64,7 +64,7 @@ static void __noreturn pas_restart(char *cmd) } #ifdef CONFIG_PPC_PASEMI_NEMO -void pas_shutdown(void) +static void pas_shutdown(void) { /* Set the PLD bit that makes the SB600 think the power button is being pressed */ void __iomem *pld_map = ioremap(0xf5000000,4096); diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index c83d1e14077e..15644be31990 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -413,7 +413,7 @@ static void __init smp_psurge_setup_cpu(int cpu_nr) printk(KERN_ERR "Couldn't get primary IPI interrupt"); } -void __init smp_psurge_take_timebase(void) +static void __init smp_psurge_take_timebase(void) { if (psurge_type != PSURGE_DUAL) return; @@ -429,7 +429,7 @@ void __init smp_psurge_take_timebase(void) set_dec(tb_ticks_per_jiffy/2); } -void __init smp_psurge_give_timebase(void) +static void __init smp_psurge_give_timebase(void) { /* Nothing to do here */ } diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index f9a7001dacb7..56a1f7ce78d2 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -275,6 +275,8 @@ int __init opal_event_init(void) else name = kasprintf(GFP_KERNEL, "opal"); + if (!name) + continue; /* Install interrupt handler */ rc = request_irq(r->start, opal_interrupt, r->flags & IRQD_TRIGGER_MASK, name, NULL); diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c index 7bfe4cbeb35a..ea917266aa17 100644 --- a/arch/powerpc/platforms/powernv/opal-powercap.c +++ b/arch/powerpc/platforms/powernv/opal-powercap.c @@ -196,6 +196,12 @@ void __init opal_powercap_init(void) j = 0; pcaps[i].pg.name = kasprintf(GFP_KERNEL, "%pOFn", node); + if (!pcaps[i].pg.name) { + kfree(pcaps[i].pattrs); + kfree(pcaps[i].pg.attrs); + goto out_pcaps_pattrs; + } + if (has_min) { powercap_add_attr(min, "powercap-min", &pcaps[i].pattrs[j]); diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index 327e2f76905d..b66b06efcef1 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -66,6 +66,8 @@ static bool opal_prd_range_is_valid(uint64_t addr, uint64_t size) const char *label; addrp = of_get_address(node, 0, &range_size, NULL); + if (!addrp) + continue; range_addr = of_read_number(addrp, 2); range_end = range_addr + range_size; diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c index 262cd6fac907..748c2b97fa53 100644 --- a/arch/powerpc/platforms/powernv/opal-xscom.c +++ b/arch/powerpc/platforms/powernv/opal-xscom.c @@ -165,6 +165,11 @@ static int scom_debug_init_one(struct dentry *root, struct device_node *dn, ent->chip = chip; snprintf(ent->name, 16, "%08x", chip); ent->path.data = (void *)kasprintf(GFP_KERNEL, "%pOF", dn); + if (!ent->path.data) { + kfree(ent); + return -ENOMEM; + } + ent->path.size = strlen((char *)ent->path.data); dir = debugfs_create_dir(ent->name, root); diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index 191424468f10..393e747541fb 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -425,7 +425,8 @@ static int subcore_init(void) if (pvr_ver != PVR_POWER8 && pvr_ver != PVR_POWER8E && - pvr_ver != PVR_POWER8NVL) + pvr_ver != PVR_POWER8NVL && + pvr_ver != PVR_HX_C2000) return 0; /* diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig index a44869e5ea70..e9c1087dd42e 100644 --- a/arch/powerpc/platforms/ps3/Kconfig +++ b/arch/powerpc/platforms/ps3/Kconfig @@ -167,16 +167,4 @@ config PS3_LPM profiling support of the Cell processor with programs like perfmon2, then say Y or M, otherwise say N. -config PS3GELIC_UDBG - bool "PS3 udbg output via UDP broadcasts on Ethernet" - depends on PPC_PS3 - help - Enables udbg early debugging output by sending broadcast UDP - via the Ethernet port (UDP port number 18194). - - This driver uses a trivial implementation and is independent - from the main PS3 gelic network driver. - - If in doubt, say N here. - endmenu diff --git a/arch/powerpc/platforms/ps3/Makefile b/arch/powerpc/platforms/ps3/Makefile index 86bf2967a8d4..bc79bb124d1e 100644 --- a/arch/powerpc/platforms/ps3/Makefile +++ b/arch/powerpc/platforms/ps3/Makefile @@ -3,7 +3,7 @@ obj-y += setup.o mm.o time.o hvcall.o htab.o repository.o obj-y += interrupt.o exports.o os-area.o obj-y += system-bus.o -obj-$(CONFIG_PS3GELIC_UDBG) += gelic_udbg.o +obj-$(CONFIG_PPC_EARLY_DEBUG_PS3GELIC) += gelic_udbg.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SPU_BASE) += spu.o obj-y += device-init.o diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c index e87360a0fb40..878bc160246e 100644 --- a/arch/powerpc/platforms/ps3/device-init.c +++ b/arch/powerpc/platforms/ps3/device-init.c @@ -827,6 +827,7 @@ static int ps3_probe_thread(void *data) if (res) goto fail_free_irq; + set_freezable(); /* Loop here processing the requested notification events. */ do { try_to_freeze(); diff --git a/arch/powerpc/platforms/ps3/gelic_udbg.c b/arch/powerpc/platforms/ps3/gelic_udbg.c index 6b298010fd84..a5202c18c236 100644 --- a/arch/powerpc/platforms/ps3/gelic_udbg.c +++ b/arch/powerpc/platforms/ps3/gelic_udbg.c @@ -14,6 +14,7 @@ #include <linux/ip.h> #include <linux/udp.h> +#include <asm/ps3.h> #include <asm/io.h> #include <asm/udbg.h> #include <asm/lv1call.h> diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 1476c5e4433c..f936962a2946 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -4,6 +4,7 @@ ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG obj-y := lpar.o hvCall.o nvram.o reconfig.o \ of_helpers.o rtas-work-area.o papr-sysparm.o \ + papr-vpd.o \ setup.o iommu.o event_sources.o ras.o \ firmware.o power.o dlpar.o mobility.o rng.o \ pci.o pci_dlpar.o eeh_pseries.o msi.o \ diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index def184da51cf..b1ae0c0d1187 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -252,7 +252,7 @@ static int pseries_eeh_cap_start(struct pci_dn *pdn) if (!pdn) return 0; - rtas_read_config(pdn, PCI_STATUS, 2, &status); + rtas_pci_dn_read_config(pdn, PCI_STATUS, 2, &status); if (!(status & PCI_STATUS_CAP_LIST)) return 0; @@ -270,11 +270,11 @@ static int pseries_eeh_find_cap(struct pci_dn *pdn, int cap) return 0; while (cnt--) { - rtas_read_config(pdn, pos, 1, &pos); + rtas_pci_dn_read_config(pdn, pos, 1, &pos); if (pos < 0x40) break; pos &= ~3; - rtas_read_config(pdn, pos + PCI_CAP_LIST_ID, 1, &id); + rtas_pci_dn_read_config(pdn, pos + PCI_CAP_LIST_ID, 1, &id); if (id == 0xff) break; if (id == cap) @@ -294,7 +294,7 @@ static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap) if (!edev || !edev->pcie_cap) return 0; - if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) + if (rtas_pci_dn_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) return 0; else if (!header) return 0; @@ -307,7 +307,7 @@ static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap) if (pos < 256) break; - if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) + if (rtas_pci_dn_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) break; } @@ -412,8 +412,8 @@ static void pseries_eeh_init_edev(struct pci_dn *pdn) if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { edev->mode |= EEH_DEV_BRIDGE; if (edev->pcie_cap) { - rtas_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS, - 2, &pcie_flags); + rtas_pci_dn_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS, + 2, &pcie_flags); pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4; if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT) edev->mode |= EEH_DEV_ROOT_PORT; @@ -676,7 +676,7 @@ static int pseries_eeh_read_config(struct eeh_dev *edev, int where, int size, u3 { struct pci_dn *pdn = eeh_dev_to_pdn(edev); - return rtas_read_config(pdn, where, size, val); + return rtas_pci_dn_read_config(pdn, where, size, val); } /** @@ -692,7 +692,7 @@ static int pseries_eeh_write_config(struct eeh_dev *edev, int where, int size, u { struct pci_dn *pdn = eeh_dev_to_pdn(edev); - return rtas_write_config(pdn, where, size, val); + return rtas_pci_dn_write_config(pdn, where, size, val); } #ifdef CONFIG_PCI_IOV diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index a43bfb01720a..3fe3ddb30c04 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -208,8 +208,10 @@ static int dlpar_change_lmb_state(struct drmem_lmb *lmb, bool online) int rc; mem_block = lmb_to_memblock(lmb); - if (!mem_block) + if (!mem_block) { + pr_err("Failed memory block lookup for LMB 0x%x\n", lmb->drc_index); return -EINVAL; + } if (online && mem_block->dev.offline) rc = device_online(&mem_block->dev); @@ -436,14 +438,15 @@ static int dlpar_memory_remove_by_index(u32 drc_index) } } - if (!lmb_found) + if (!lmb_found) { + pr_debug("Failed to look up LMB for drc index %x\n", drc_index); rc = -EINVAL; - - if (rc) + } else if (rc) { pr_debug("Failed to hot-remove memory at %llx\n", lmb->base_addr); - else + } else { pr_debug("Memory at %llx was hot-removed\n", lmb->base_addr); + } return rc; } @@ -575,6 +578,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) rc = update_lmb_associativity_index(lmb); if (rc) { dlpar_release_drc(lmb->drc_index); + pr_err("Failed to configure LMB 0x%x\n", lmb->drc_index); return rc; } @@ -588,12 +592,14 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) /* Add the memory */ rc = __add_memory(nid, lmb->base_addr, block_sz, MHP_MEMMAP_ON_MEMORY); if (rc) { + pr_err("Failed to add LMB 0x%x to node %u", lmb->drc_index, nid); invalidate_lmb_associativity_index(lmb); return rc; } rc = dlpar_online_lmb(lmb); if (rc) { + pr_err("Failed to online LMB 0x%x on node %u\n", lmb->drc_index, nid); __remove_memory(lmb->base_addr, block_sz); invalidate_lmb_associativity_index(lmb); } else { diff --git a/arch/powerpc/platforms/pseries/papr-sysparm.c b/arch/powerpc/platforms/pseries/papr-sysparm.c index fedc61599e6c..7063ce8884e4 100644 --- a/arch/powerpc/platforms/pseries/papr-sysparm.c +++ b/arch/powerpc/platforms/pseries/papr-sysparm.c @@ -2,14 +2,20 @@ #define pr_fmt(fmt) "papr-sysparm: " fmt +#include <linux/anon_inodes.h> #include <linux/bug.h> +#include <linux/file.h> +#include <linux/fs.h> #include <linux/init.h> #include <linux/kernel.h> +#include <linux/miscdevice.h> #include <linux/printk.h> #include <linux/slab.h> -#include <asm/rtas.h> +#include <linux/uaccess.h> +#include <asm/machdep.h> #include <asm/papr-sysparm.h> #include <asm/rtas-work-area.h> +#include <asm/rtas.h> struct papr_sysparm_buf *papr_sysparm_buf_alloc(void) { @@ -23,6 +29,46 @@ void papr_sysparm_buf_free(struct papr_sysparm_buf *buf) kfree(buf); } +static size_t papr_sysparm_buf_get_length(const struct papr_sysparm_buf *buf) +{ + return be16_to_cpu(buf->len); +} + +static void papr_sysparm_buf_set_length(struct papr_sysparm_buf *buf, size_t length) +{ + WARN_ONCE(length > sizeof(buf->val), + "bogus length %zu, clamping to safe value", length); + length = min(sizeof(buf->val), length); + buf->len = cpu_to_be16(length); +} + +/* + * For use on buffers returned from ibm,get-system-parameter before + * returning them to callers. Ensures the encoded length of valid data + * cannot overrun buf->val[]. + */ +static void papr_sysparm_buf_clamp_length(struct papr_sysparm_buf *buf) +{ + papr_sysparm_buf_set_length(buf, papr_sysparm_buf_get_length(buf)); +} + +/* + * Perform some basic diligence on the system parameter buffer before + * submitting it to RTAS. + */ +static bool papr_sysparm_buf_can_submit(const struct papr_sysparm_buf *buf) +{ + /* + * Firmware ought to reject buffer lengths that exceed the + * maximum specified in PAPR, but there's no reason for the + * kernel to allow them either. + */ + if (papr_sysparm_buf_get_length(buf) > sizeof(buf->val)) + return false; + + return true; +} + /** * papr_sysparm_get() - Retrieve the value of a PAPR system parameter. * @param: PAPR system parameter token as described in @@ -47,7 +93,6 @@ void papr_sysparm_buf_free(struct papr_sysparm_buf *buf) * * Return: 0 on success, -errno otherwise. @buf is unmodified on error. */ - int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf) { const s32 token = rtas_function_token(RTAS_FN_IBM_GET_SYSTEM_PARAMETER); @@ -63,6 +108,9 @@ int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf) if (token == RTAS_UNKNOWN_SERVICE) return -ENOENT; + if (!papr_sysparm_buf_can_submit(buf)) + return -EINVAL; + work_area = rtas_work_area_alloc(sizeof(*buf)); memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf)); @@ -77,6 +125,7 @@ int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf) case 0: ret = 0; memcpy(buf, rtas_work_area_raw_buf(work_area), sizeof(*buf)); + papr_sysparm_buf_clamp_length(buf); break; case -3: /* parameter not implemented */ ret = -EOPNOTSUPP; @@ -115,6 +164,9 @@ int papr_sysparm_set(papr_sysparm_t param, const struct papr_sysparm_buf *buf) if (token == RTAS_UNKNOWN_SERVICE) return -ENOENT; + if (!papr_sysparm_buf_can_submit(buf)) + return -EINVAL; + work_area = rtas_work_area_alloc(sizeof(*buf)); memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf)); @@ -149,3 +201,152 @@ int papr_sysparm_set(papr_sysparm_t param, const struct papr_sysparm_buf *buf) return ret; } + +static struct papr_sysparm_buf * +papr_sysparm_buf_from_user(const struct papr_sysparm_io_block __user *user_iob) +{ + struct papr_sysparm_buf *kern_spbuf; + long err; + u16 len; + + /* + * The length of valid data that userspace claims to be in + * user_iob->data[]. + */ + if (get_user(len, &user_iob->length)) + return ERR_PTR(-EFAULT); + + static_assert(sizeof(user_iob->data) >= PAPR_SYSPARM_MAX_INPUT); + static_assert(sizeof(kern_spbuf->val) >= PAPR_SYSPARM_MAX_INPUT); + + if (len > PAPR_SYSPARM_MAX_INPUT) + return ERR_PTR(-EINVAL); + + kern_spbuf = papr_sysparm_buf_alloc(); + if (!kern_spbuf) + return ERR_PTR(-ENOMEM); + + papr_sysparm_buf_set_length(kern_spbuf, len); + + if (len > 0 && copy_from_user(kern_spbuf->val, user_iob->data, len)) { + err = -EFAULT; + goto free_sysparm_buf; + } + + return kern_spbuf; + +free_sysparm_buf: + papr_sysparm_buf_free(kern_spbuf); + return ERR_PTR(err); +} + +static int papr_sysparm_buf_to_user(const struct papr_sysparm_buf *kern_spbuf, + struct papr_sysparm_io_block __user *user_iob) +{ + u16 len_out = papr_sysparm_buf_get_length(kern_spbuf); + + if (put_user(len_out, &user_iob->length)) + return -EFAULT; + + static_assert(sizeof(user_iob->data) >= PAPR_SYSPARM_MAX_OUTPUT); + static_assert(sizeof(kern_spbuf->val) >= PAPR_SYSPARM_MAX_OUTPUT); + + if (copy_to_user(user_iob->data, kern_spbuf->val, PAPR_SYSPARM_MAX_OUTPUT)) + return -EFAULT; + + return 0; +} + +static long papr_sysparm_ioctl_get(struct papr_sysparm_io_block __user *user_iob) +{ + struct papr_sysparm_buf *kern_spbuf; + papr_sysparm_t param; + long ret; + + if (get_user(param.token, &user_iob->parameter)) + return -EFAULT; + + kern_spbuf = papr_sysparm_buf_from_user(user_iob); + if (IS_ERR(kern_spbuf)) + return PTR_ERR(kern_spbuf); + + ret = papr_sysparm_get(param, kern_spbuf); + if (ret) + goto free_sysparm_buf; + + ret = papr_sysparm_buf_to_user(kern_spbuf, user_iob); + if (ret) + goto free_sysparm_buf; + + ret = 0; + +free_sysparm_buf: + papr_sysparm_buf_free(kern_spbuf); + return ret; +} + + +static long papr_sysparm_ioctl_set(struct papr_sysparm_io_block __user *user_iob) +{ + struct papr_sysparm_buf *kern_spbuf; + papr_sysparm_t param; + long ret; + + if (get_user(param.token, &user_iob->parameter)) + return -EFAULT; + + kern_spbuf = papr_sysparm_buf_from_user(user_iob); + if (IS_ERR(kern_spbuf)) + return PTR_ERR(kern_spbuf); + + ret = papr_sysparm_set(param, kern_spbuf); + if (ret) + goto free_sysparm_buf; + + ret = 0; + +free_sysparm_buf: + papr_sysparm_buf_free(kern_spbuf); + return ret; +} + +static long papr_sysparm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) +{ + void __user *argp = (__force void __user *)arg; + long ret; + + switch (ioctl) { + case PAPR_SYSPARM_IOC_GET: + ret = papr_sysparm_ioctl_get(argp); + break; + case PAPR_SYSPARM_IOC_SET: + if (filp->f_mode & FMODE_WRITE) + ret = papr_sysparm_ioctl_set(argp); + else + ret = -EBADF; + break; + default: + ret = -ENOIOCTLCMD; + break; + } + return ret; +} + +static const struct file_operations papr_sysparm_ops = { + .unlocked_ioctl = papr_sysparm_ioctl, +}; + +static struct miscdevice papr_sysparm_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "papr-sysparm", + .fops = &papr_sysparm_ops, +}; + +static __init int papr_sysparm_init(void) +{ + if (!rtas_function_implemented(RTAS_FN_IBM_GET_SYSTEM_PARAMETER)) + return -ENODEV; + + return misc_register(&papr_sysparm_dev); +} +machine_device_initcall(pseries, papr_sysparm_init); diff --git a/arch/powerpc/platforms/pseries/papr-vpd.c b/arch/powerpc/platforms/pseries/papr-vpd.c new file mode 100644 index 000000000000..c29e85db5f35 --- /dev/null +++ b/arch/powerpc/platforms/pseries/papr-vpd.c @@ -0,0 +1,541 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define pr_fmt(fmt) "papr-vpd: " fmt + +#include <linux/anon_inodes.h> +#include <linux/build_bug.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/lockdep.h> +#include <linux/kernel.h> +#include <linux/miscdevice.h> +#include <linux/signal.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/string_helpers.h> +#include <linux/uaccess.h> +#include <asm/machdep.h> +#include <asm/papr-vpd.h> +#include <asm/rtas-work-area.h> +#include <asm/rtas.h> +#include <uapi/asm/papr-vpd.h> + +/* + * Function-specific return values for ibm,get-vpd, derived from PAPR+ + * v2.13 7.3.20 "ibm,get-vpd RTAS Call". + */ +#define RTAS_IBM_GET_VPD_COMPLETE 0 /* All VPD has been retrieved. */ +#define RTAS_IBM_GET_VPD_MORE_DATA 1 /* More VPD is available. */ +#define RTAS_IBM_GET_VPD_START_OVER -4 /* VPD changed, restart call sequence. */ + +/** + * struct rtas_ibm_get_vpd_params - Parameters (in and out) for ibm,get-vpd. + * @loc_code: In: Caller-provided location code buffer. Must be RTAS-addressable. + * @work_area: In: Caller-provided work area buffer for results. + * @sequence: In: Sequence number. Out: Next sequence number. + * @written: Out: Bytes written by ibm,get-vpd to @work_area. + * @status: Out: RTAS call status. + */ +struct rtas_ibm_get_vpd_params { + const struct papr_location_code *loc_code; + struct rtas_work_area *work_area; + u32 sequence; + u32 written; + s32 status; +}; + +/** + * rtas_ibm_get_vpd() - Call ibm,get-vpd to fill a work area buffer. + * @params: See &struct rtas_ibm_get_vpd_params. + * + * Calls ibm,get-vpd until it errors or successfully deposits data + * into the supplied work area. Handles RTAS retry statuses. Maps RTAS + * error statuses to reasonable errno values. + * + * The caller is expected to invoke rtas_ibm_get_vpd() multiple times + * to retrieve all the VPD for the provided location code. Only one + * sequence should be in progress at any time; starting a new sequence + * will disrupt any sequence already in progress. Serialization of VPD + * retrieval sequences is the responsibility of the caller. + * + * The caller should inspect @params.status to determine whether more + * calls are needed to complete the sequence. + * + * Context: May sleep. + * Return: -ve on error, 0 otherwise. + */ +static int rtas_ibm_get_vpd(struct rtas_ibm_get_vpd_params *params) +{ + const struct papr_location_code *loc_code = params->loc_code; + struct rtas_work_area *work_area = params->work_area; + u32 rets[2]; + s32 fwrc; + int ret; + + lockdep_assert_held(&rtas_ibm_get_vpd_lock); + + do { + fwrc = rtas_call(rtas_function_token(RTAS_FN_IBM_GET_VPD), 4, 3, + rets, + __pa(loc_code), + rtas_work_area_phys(work_area), + rtas_work_area_size(work_area), + params->sequence); + } while (rtas_busy_delay(fwrc)); + + switch (fwrc) { + case RTAS_HARDWARE_ERROR: + ret = -EIO; + break; + case RTAS_INVALID_PARAMETER: + ret = -EINVAL; + break; + case RTAS_IBM_GET_VPD_START_OVER: + ret = -EAGAIN; + break; + case RTAS_IBM_GET_VPD_MORE_DATA: + params->sequence = rets[0]; + fallthrough; + case RTAS_IBM_GET_VPD_COMPLETE: + params->written = rets[1]; + /* + * Kernel or firmware bug, do not continue. + */ + if (WARN(params->written > rtas_work_area_size(work_area), + "possible write beyond end of work area")) + ret = -EFAULT; + else + ret = 0; + break; + default: + ret = -EIO; + pr_err_ratelimited("unexpected ibm,get-vpd status %d\n", fwrc); + break; + } + + params->status = fwrc; + return ret; +} + +/* + * Internal VPD "blob" APIs for accumulating ibm,get-vpd results into + * an immutable buffer to be attached to a file descriptor. + */ +struct vpd_blob { + const char *data; + size_t len; +}; + +static bool vpd_blob_has_data(const struct vpd_blob *blob) +{ + return blob->data && blob->len; +} + +static void vpd_blob_free(const struct vpd_blob *blob) +{ + if (blob) { + kvfree(blob->data); + kfree(blob); + } +} + +/** + * vpd_blob_extend() - Append data to a &struct vpd_blob. + * @blob: The blob to extend. + * @data: The new data to append to @blob. + * @len: The length of @data. + * + * Context: May sleep. + * Return: -ENOMEM on allocation failure, 0 otherwise. + */ +static int vpd_blob_extend(struct vpd_blob *blob, const char *data, size_t len) +{ + const size_t new_len = blob->len + len; + const size_t old_len = blob->len; + const char *old_ptr = blob->data; + char *new_ptr; + + new_ptr = old_ptr ? + kvrealloc(old_ptr, old_len, new_len, GFP_KERNEL_ACCOUNT) : + kvmalloc(len, GFP_KERNEL_ACCOUNT); + + if (!new_ptr) + return -ENOMEM; + + memcpy(&new_ptr[old_len], data, len); + blob->data = new_ptr; + blob->len = new_len; + return 0; +} + +/** + * vpd_blob_generate() - Construct a new &struct vpd_blob. + * @generator: Function that supplies the blob data. + * @arg: Context pointer supplied by caller, passed to @generator. + * + * The @generator callback is invoked until it returns NULL. @arg is + * passed to @generator in its first argument on each call. When + * @generator returns data, it should store the data length in its + * second argument. + * + * Context: May sleep. + * Return: A completely populated &struct vpd_blob, or NULL on error. + */ +static const struct vpd_blob * +vpd_blob_generate(const char * (*generator)(void *, size_t *), void *arg) +{ + struct vpd_blob *blob; + const char *buf; + size_t len; + int err = 0; + + blob = kzalloc(sizeof(*blob), GFP_KERNEL_ACCOUNT); + if (!blob) + return NULL; + + while (err == 0 && (buf = generator(arg, &len))) + err = vpd_blob_extend(blob, buf, len); + + if (err != 0 || !vpd_blob_has_data(blob)) + goto free_blob; + + return blob; +free_blob: + vpd_blob_free(blob); + return NULL; +} + +/* + * Internal VPD sequence APIs. A VPD sequence is a series of calls to + * ibm,get-vpd for a given location code. The sequence ends when an + * error is encountered or all VPD for the location code has been + * returned. + */ + +/** + * struct vpd_sequence - State for managing a VPD sequence. + * @error: Shall be zero as long as the sequence has not encountered an error, + * -ve errno otherwise. Use vpd_sequence_set_err() to update this. + * @params: Parameter block to pass to rtas_ibm_get_vpd(). + */ +struct vpd_sequence { + int error; + struct rtas_ibm_get_vpd_params params; +}; + +/** + * vpd_sequence_begin() - Begin a VPD retrieval sequence. + * @seq: Uninitialized sequence state. + * @loc_code: Location code that defines the scope of the VPD to return. + * + * Initializes @seq with the resources necessary to carry out a VPD + * sequence. Callers must pass @seq to vpd_sequence_end() regardless + * of whether the sequence succeeds. + * + * Context: May sleep. + */ +static void vpd_sequence_begin(struct vpd_sequence *seq, + const struct papr_location_code *loc_code) +{ + /* + * Use a static data structure for the location code passed to + * RTAS to ensure it's in the RMA and avoid a separate work + * area allocation. Guarded by the function lock. + */ + static struct papr_location_code static_loc_code; + + /* + * We could allocate the work area before acquiring the + * function lock, but that would allow concurrent requests to + * exhaust the limited work area pool for no benefit. So + * allocate the work area under the lock. + */ + mutex_lock(&rtas_ibm_get_vpd_lock); + static_loc_code = *loc_code; + *seq = (struct vpd_sequence) { + .params = { + .work_area = rtas_work_area_alloc(SZ_4K), + .loc_code = &static_loc_code, + .sequence = 1, + }, + }; +} + +/** + * vpd_sequence_end() - Finalize a VPD retrieval sequence. + * @seq: Sequence state. + * + * Releases resources obtained by vpd_sequence_begin(). + */ +static void vpd_sequence_end(struct vpd_sequence *seq) +{ + rtas_work_area_free(seq->params.work_area); + mutex_unlock(&rtas_ibm_get_vpd_lock); +} + +/** + * vpd_sequence_should_stop() - Determine whether a VPD retrieval sequence + * should continue. + * @seq: VPD sequence state. + * + * Examines the sequence error state and outputs of the last call to + * ibm,get-vpd to determine whether the sequence in progress should + * continue or stop. + * + * Return: True if the sequence has encountered an error or if all VPD for + * this sequence has been retrieved. False otherwise. + */ +static bool vpd_sequence_should_stop(const struct vpd_sequence *seq) +{ + bool done; + + if (seq->error) + return true; + + switch (seq->params.status) { + case 0: + if (seq->params.written == 0) + done = false; /* Initial state. */ + else + done = true; /* All data consumed. */ + break; + case 1: + done = false; /* More data available. */ + break; + default: + done = true; /* Error encountered. */ + break; + } + + return done; +} + +static int vpd_sequence_set_err(struct vpd_sequence *seq, int err) +{ + /* Preserve the first error recorded. */ + if (seq->error == 0) + seq->error = err; + + return seq->error; +} + +/* + * Generator function to be passed to vpd_blob_generate(). + */ +static const char *vpd_sequence_fill_work_area(void *arg, size_t *len) +{ + struct vpd_sequence *seq = arg; + struct rtas_ibm_get_vpd_params *p = &seq->params; + + if (vpd_sequence_should_stop(seq)) + return NULL; + if (vpd_sequence_set_err(seq, rtas_ibm_get_vpd(p))) + return NULL; + *len = p->written; + return rtas_work_area_raw_buf(p->work_area); +} + +/* + * Higher-level VPD retrieval code below. These functions use the + * vpd_blob_* and vpd_sequence_* APIs defined above to create fd-based + * VPD handles for consumption by user space. + */ + +/** + * papr_vpd_run_sequence() - Run a single VPD retrieval sequence. + * @loc_code: Location code that defines the scope of VPD to return. + * + * Context: May sleep. Holds a mutex and an RTAS work area for its + * duration. Typically performs multiple sleepable slab + * allocations. + * + * Return: A populated &struct vpd_blob on success. Encoded error + * pointer otherwise. + */ +static const struct vpd_blob *papr_vpd_run_sequence(const struct papr_location_code *loc_code) +{ + const struct vpd_blob *blob; + struct vpd_sequence seq; + + vpd_sequence_begin(&seq, loc_code); + blob = vpd_blob_generate(vpd_sequence_fill_work_area, &seq); + if (!blob) + vpd_sequence_set_err(&seq, -ENOMEM); + vpd_sequence_end(&seq); + + if (seq.error) { + vpd_blob_free(blob); + return ERR_PTR(seq.error); + } + + return blob; +} + +/** + * papr_vpd_retrieve() - Return the VPD for a location code. + * @loc_code: Location code that defines the scope of VPD to return. + * + * Run VPD sequences against @loc_code until a blob is successfully + * instantiated, or a hard error is encountered, or a fatal signal is + * pending. + * + * Context: May sleep. + * Return: A fully populated VPD blob when successful. Encoded error + * pointer otherwise. + */ +static const struct vpd_blob *papr_vpd_retrieve(const struct papr_location_code *loc_code) +{ + const struct vpd_blob *blob; + + /* + * EAGAIN means the sequence errored with a -4 (VPD changed) + * status from ibm,get-vpd, and we should attempt a new + * sequence. PAPR+ v2.13 R1–7.3.20–5 indicates that this + * should be a transient condition, not something that happens + * continuously. But we'll stop trying on a fatal signal. + */ + do { + blob = papr_vpd_run_sequence(loc_code); + if (!IS_ERR(blob)) /* Success. */ + break; + if (PTR_ERR(blob) != -EAGAIN) /* Hard error. */ + break; + pr_info_ratelimited("VPD changed during retrieval, retrying\n"); + cond_resched(); + } while (!fatal_signal_pending(current)); + + return blob; +} + +static ssize_t papr_vpd_handle_read(struct file *file, char __user *buf, size_t size, loff_t *off) +{ + const struct vpd_blob *blob = file->private_data; + + /* bug: we should not instantiate a handle without any data attached. */ + if (!vpd_blob_has_data(blob)) { + pr_err_once("handle without data\n"); + return -EIO; + } + + return simple_read_from_buffer(buf, size, off, blob->data, blob->len); +} + +static int papr_vpd_handle_release(struct inode *inode, struct file *file) +{ + const struct vpd_blob *blob = file->private_data; + + vpd_blob_free(blob); + + return 0; +} + +static loff_t papr_vpd_handle_seek(struct file *file, loff_t off, int whence) +{ + const struct vpd_blob *blob = file->private_data; + + return fixed_size_llseek(file, off, whence, blob->len); +} + + +static const struct file_operations papr_vpd_handle_ops = { + .read = papr_vpd_handle_read, + .llseek = papr_vpd_handle_seek, + .release = papr_vpd_handle_release, +}; + +/** + * papr_vpd_create_handle() - Create a fd-based handle for reading VPD. + * @ulc: Location code in user memory; defines the scope of the VPD to + * retrieve. + * + * Handler for PAPR_VPD_IOC_CREATE_HANDLE ioctl command. Validates + * @ulc and instantiates an immutable VPD "blob" for it. The blob is + * attached to a file descriptor for reading by user space. The memory + * backing the blob is freed when the file is released. + * + * The entire requested VPD is retrieved by this call and all + * necessary RTAS interactions are performed before returning the fd + * to user space. This keeps the read handler simple and ensures that + * the kernel can prevent interleaving of ibm,get-vpd call sequences. + * + * Return: The installed fd number if successful, -ve errno otherwise. + */ +static long papr_vpd_create_handle(struct papr_location_code __user *ulc) +{ + struct papr_location_code klc; + const struct vpd_blob *blob; + struct file *file; + long err; + int fd; + + if (copy_from_user(&klc, ulc, sizeof(klc))) + return -EFAULT; + + if (!string_is_terminated(klc.str, ARRAY_SIZE(klc.str))) + return -EINVAL; + + blob = papr_vpd_retrieve(&klc); + if (IS_ERR(blob)) + return PTR_ERR(blob); + + fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC); + if (fd < 0) { + err = fd; + goto free_blob; + } + + file = anon_inode_getfile("[papr-vpd]", &papr_vpd_handle_ops, + (void *)blob, O_RDONLY); + if (IS_ERR(file)) { + err = PTR_ERR(file); + goto put_fd; + } + + file->f_mode |= FMODE_LSEEK | FMODE_PREAD; + fd_install(fd, file); + return fd; +put_fd: + put_unused_fd(fd); +free_blob: + vpd_blob_free(blob); + return err; +} + +/* + * Top-level ioctl handler for /dev/papr-vpd. + */ +static long papr_vpd_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) +{ + void __user *argp = (__force void __user *)arg; + long ret; + + switch (ioctl) { + case PAPR_VPD_IOC_CREATE_HANDLE: + ret = papr_vpd_create_handle(argp); + break; + default: + ret = -ENOIOCTLCMD; + break; + } + return ret; +} + +static const struct file_operations papr_vpd_ops = { + .unlocked_ioctl = papr_vpd_dev_ioctl, +}; + +static struct miscdevice papr_vpd_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "papr-vpd", + .fops = &papr_vpd_ops, +}; + +static __init int papr_vpd_init(void) +{ + if (!rtas_function_implemented(RTAS_FN_IBM_GET_VPD)) + return -ENODEV; + + return misc_register(&papr_vpd_dev); +} +machine_device_initcall(pseries, papr_vpd_init); diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 8376f03f932a..bba4ad192b0f 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -55,6 +55,7 @@ extern int dlpar_detach_node(struct device_node *); extern int dlpar_acquire_drc(u32 drc_index); extern int dlpar_release_drc(u32 drc_index); extern int dlpar_unisolate_drc(u32 drc_index); +extern void post_mobility_fixup(void); void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog); int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog); diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 5c43435472cc..382003dfdb9a 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -13,6 +13,7 @@ #include <asm/mmu.h> #include <asm/rtas.h> #include <asm/topology.h> +#include "pseries.h" static struct device suspend_dev; diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index b1f25bac280b..71d52a670d95 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -385,11 +385,15 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, * same fault IRQ is not freed by the OS before. */ mutex_lock(&vas_pseries_mutex); - if (migration_in_progress) + if (migration_in_progress) { rc = -EBUSY; - else + } else { rc = allocate_setup_window(txwin, (u64 *)&domain[0], cop_feat_caps->win_type); + if (!rc) + caps->nr_open_wins_progress++; + } + mutex_unlock(&vas_pseries_mutex); if (rc) goto out; @@ -404,8 +408,17 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, goto out_free; txwin->win_type = cop_feat_caps->win_type; - mutex_lock(&vas_pseries_mutex); + /* + * The migration SUSPEND thread sets migration_in_progress and + * closes all open windows from the list. But the window is + * added to the list after open and modify HCALLs. So possible + * that migration_in_progress is set before modify HCALL which + * may cause some windows are still open when the hypervisor + * initiates the migration. + * So checks the migration_in_progress flag again and close all + * open windows. + * * Possible to lose the acquired credit with DLPAR core * removal after the window is opened. So if there are any * closed windows (means with lost credits), do not give new @@ -413,9 +426,11 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, * after the existing windows are reopened when credits are * available. */ - if (!caps->nr_close_wins) { + mutex_lock(&vas_pseries_mutex); + if (!caps->nr_close_wins && !migration_in_progress) { list_add(&txwin->win_list, &caps->list); caps->nr_open_windows++; + caps->nr_open_wins_progress--; mutex_unlock(&vas_pseries_mutex); vas_user_win_add_mm_context(&txwin->vas_win.task_ref); return &txwin->vas_win; @@ -433,6 +448,12 @@ out_free: */ free_irq_setup(txwin); h_deallocate_vas_window(txwin->vas_win.winid); + /* + * Hold mutex and reduce nr_open_wins_progress counter. + */ + mutex_lock(&vas_pseries_mutex); + caps->nr_open_wins_progress--; + mutex_unlock(&vas_pseries_mutex); out: atomic_dec(&cop_feat_caps->nr_used_credits); kfree(txwin); @@ -937,14 +958,14 @@ int vas_migration_handler(int action) struct vas_caps *vcaps; int i, rc = 0; + pr_info("VAS migration event %d\n", action); + /* * NX-GZIP is not enabled. Nothing to do for migration. */ if (!copypaste_feat) return rc; - mutex_lock(&vas_pseries_mutex); - if (action == VAS_SUSPEND) migration_in_progress = true; else @@ -990,12 +1011,27 @@ int vas_migration_handler(int action) switch (action) { case VAS_SUSPEND: + mutex_lock(&vas_pseries_mutex); rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows, true); + /* + * Windows are included in the list after successful + * open. So wait for closing these in-progress open + * windows in vas_allocate_window() which will be + * done if the migration_in_progress is set. + */ + while (vcaps->nr_open_wins_progress) { + mutex_unlock(&vas_pseries_mutex); + msleep(10); + mutex_lock(&vas_pseries_mutex); + } + mutex_unlock(&vas_pseries_mutex); break; case VAS_RESUME: + mutex_lock(&vas_pseries_mutex); atomic_set(&caps->nr_total_credits, new_nr_creds); rc = reconfig_open_windows(vcaps, new_nr_creds, true); + mutex_unlock(&vas_pseries_mutex); break; default: /* should not happen */ @@ -1011,8 +1047,9 @@ int vas_migration_handler(int action) goto out; } + pr_info("VAS migration event (%d) successful\n", action); + out: - mutex_unlock(&vas_pseries_mutex); return rc; } diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index 7115043ec488..45567cd13178 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -91,6 +91,8 @@ struct vas_cop_feat_caps { struct vas_caps { struct vas_cop_feat_caps caps; struct list_head list; /* List of open windows */ + int nr_open_wins_progress; /* Number of open windows in */ + /* progress. Used in migration */ int nr_close_wins; /* closed windows in the hypervisor for DLPAR */ int nr_open_windows; /* Number of successful open windows */ u8 feat; /* Feature type */ diff --git a/arch/powerpc/sysdev/grackle.c b/arch/powerpc/sysdev/grackle.c index fd2f94a884f0..7dce8278b71e 100644 --- a/arch/powerpc/sysdev/grackle.c +++ b/arch/powerpc/sysdev/grackle.c @@ -18,24 +18,8 @@ #define GRACKLE_CFA(b, d, o) (0x80 | ((b) << 8) | ((d) << 16) \ | (((o) & ~3) << 24)) -#define GRACKLE_PICR1_STG 0x00000040 #define GRACKLE_PICR1_LOOPSNOOP 0x00000010 -/* N.B. this is called before bridges is initialized, so we can't - use grackle_pcibios_{read,write}_config_dword. */ -static inline void grackle_set_stg(struct pci_controller* bp, int enable) -{ - unsigned int val; - - out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8)); - val = in_le32(bp->cfg_data); - val = enable? (val | GRACKLE_PICR1_STG) : - (val & ~GRACKLE_PICR1_STG); - out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8)); - out_le32(bp->cfg_data, val); - (void)in_le32(bp->cfg_data); -} - static inline void grackle_set_loop_snoop(struct pci_controller *bp, int enable) { unsigned int val; @@ -56,7 +40,4 @@ void __init setup_grackle(struct pci_controller *hose) pci_add_flags(PCI_REASSIGN_ALL_BUS); if (of_machine_is_compatible("AAPL,PowerBook1998")) grackle_set_loop_snoop(hose, 1); -#if 0 /* Disabled for now, HW problems ??? */ - grackle_set_stg(hose, 1); -#endif } diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index f6ec6dba92dc..700b67476a7d 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -236,6 +236,8 @@ static int __init icp_native_map_one_cpu(int hw_id, unsigned long addr, rname = kasprintf(GFP_KERNEL, "CPU %d [0x%x] Interrupt Presentation", cpu, hw_id); + if (!rname) + return -ENOMEM; if (!request_mem_region(addr, size, rname)) { pr_warn("icp_native: Could not reserve ICP MMIO for CPU %d, interrupt server #0x%x\n", cpu, hw_id); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 95a2a06acc6a..cd4c9a204d08 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -685,7 +685,7 @@ config RISCV_BOOT_SPINWAIT If unsure what to do here, say N. config ARCH_SUPPORTS_KEXEC - def_bool MMU + def_bool y config ARCH_SELECTS_KEXEC def_bool y @@ -693,7 +693,7 @@ config ARCH_SELECTS_KEXEC select HOTPLUG_CPU if SMP config ARCH_SUPPORTS_KEXEC_FILE - def_bool 64BIT && MMU + def_bool 64BIT config ARCH_SELECTS_KEXEC_FILE def_bool y @@ -702,9 +702,7 @@ config ARCH_SELECTS_KEXEC_FILE select KEXEC_ELF config ARCH_SUPPORTS_KEXEC_PURGATORY - def_bool KEXEC_FILE - depends on CRYPTO=y - depends on CRYPTO_SHA256=y + def_bool ARCH_SUPPORTS_KEXEC_FILE config ARCH_SUPPORTS_CRASH_DUMP def_bool y diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts index 90b261114763..dce96f27cc89 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts @@ -8,9 +8,6 @@ #include <dt-bindings/gpio/gpio.h> #include <dt-bindings/leds/common.h> -/* Clock frequency (in Hz) of the rtcclk */ -#define RTCCLK_FREQ 1000000 - / { model = "Microchip PolarFire-SoC Icicle Kit"; compatible = "microchip,mpfs-icicle-reference-rtlv2210", "microchip,mpfs-icicle-kit", @@ -29,10 +26,6 @@ stdout-path = "serial1:115200n8"; }; - cpus { - timebase-frequency = <RTCCLK_FREQ>; - }; - leds { compatible = "gpio-leds"; diff --git a/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts b/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts index 184cb36a175e..a8d623ee9fa4 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts @@ -10,9 +10,6 @@ #include "mpfs.dtsi" #include "mpfs-m100pfs-fabric.dtsi" -/* Clock frequency (in Hz) of the rtcclk */ -#define MTIMER_FREQ 1000000 - / { model = "Aries Embedded M100PFEVPS"; compatible = "aries,m100pfsevp", "microchip,mpfs"; @@ -33,10 +30,6 @@ stdout-path = "serial1:115200n8"; }; - cpus { - timebase-frequency = <MTIMER_FREQ>; - }; - ddrc_cache_lo: memory@80000000 { device_type = "memory"; reg = <0x0 0x80000000 0x0 0x40000000>; diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts index c87cc2d8fe29..ea0808ab1042 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts @@ -6,9 +6,6 @@ #include "mpfs.dtsi" #include "mpfs-polarberry-fabric.dtsi" -/* Clock frequency (in Hz) of the rtcclk */ -#define MTIMER_FREQ 1000000 - / { model = "Sundance PolarBerry"; compatible = "sundance,polarberry", "microchip,mpfs"; @@ -22,10 +19,6 @@ stdout-path = "serial0:115200n8"; }; - cpus { - timebase-frequency = <MTIMER_FREQ>; - }; - ddrc_cache_lo: memory@80000000 { device_type = "memory"; reg = <0x0 0x80000000 0x0 0x2e000000>; diff --git a/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts index 013cb666c72d..f9a890579438 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts @@ -6,9 +6,6 @@ #include "mpfs.dtsi" #include "mpfs-sev-kit-fabric.dtsi" -/* Clock frequency (in Hz) of the rtcclk */ -#define MTIMER_FREQ 1000000 - / { #address-cells = <2>; #size-cells = <2>; @@ -28,10 +25,6 @@ stdout-path = "serial1:115200n8"; }; - cpus { - timebase-frequency = <MTIMER_FREQ>; - }; - reserved-memory { #address-cells = <2>; #size-cells = <2>; diff --git a/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts b/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts index e0797c7e1b35..d1120f5f2c01 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts @@ -11,9 +11,6 @@ #include "mpfs.dtsi" #include "mpfs-tysom-m-fabric.dtsi" -/* Clock frequency (in Hz) of the rtcclk */ -#define MTIMER_FREQ 1000000 - / { model = "Aldec TySOM-M-MPFS250T-REV2"; compatible = "aldec,tysom-m-mpfs250t-rev2", "microchip,mpfs"; @@ -34,10 +31,6 @@ stdout-path = "serial1:115200n8"; }; - cpus { - timebase-frequency = <MTIMER_FREQ>; - }; - ddrc_cache_lo: memory@80000000 { device_type = "memory"; reg = <0x0 0x80000000 0x0 0x30000000>; diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi index a6faf24f1dba..266489d43912 100644 --- a/arch/riscv/boot/dts/microchip/mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi @@ -13,6 +13,7 @@ cpus { #address-cells = <1>; #size-cells = <0>; + timebase-frequency = <1000000>; cpu0: cpu@0 { compatible = "sifive,e51", "sifive,rocket0", "riscv"; diff --git a/arch/riscv/boot/dts/sophgo/cv1800b.dtsi b/arch/riscv/boot/dts/sophgo/cv1800b.dtsi index df40e87ee063..aec6401a467b 100644 --- a/arch/riscv/boot/dts/sophgo/cv1800b.dtsi +++ b/arch/riscv/boot/dts/sophgo/cv1800b.dtsi @@ -34,7 +34,6 @@ cpu0_intc: interrupt-controller { compatible = "riscv,cpu-intc"; interrupt-controller; - #address-cells = <0>; #interrupt-cells = <1>; }; }; diff --git a/arch/riscv/errata/andes/errata.c b/arch/riscv/errata/andes/errata.c index 197db68cc8da..17a904869724 100644 --- a/arch/riscv/errata/andes/errata.c +++ b/arch/riscv/errata/andes/errata.c @@ -38,29 +38,35 @@ static long ax45mp_iocp_sw_workaround(void) return ret.error ? 0 : ret.value; } -static bool errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigned long impid) +static void errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigned long impid) { + static bool done; + if (!IS_ENABLED(CONFIG_ERRATA_ANDES_CMO)) - return false; + return; + + if (done) + return; + + done = true; if (arch_id != ANDESTECH_AX45MP_MARCHID || impid != ANDESTECH_AX45MP_MIMPID) - return false; + return; if (!ax45mp_iocp_sw_workaround()) - return false; + return; /* Set this just to make core cbo code happy */ riscv_cbom_block_size = 1; riscv_noncoherent_supported(); - - return true; } void __init_or_module andes_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, unsigned long archid, unsigned long impid, unsigned int stage) { - errata_probe_iocp(stage, archid, impid); + if (stage == RISCV_ALTERNATIVES_BOOT) + errata_probe_iocp(stage, archid, impid); /* we have nothing to patch here ATM so just return back */ } diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 294044429e8e..ab00235b018f 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -899,7 +899,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) #define PAGE_KERNEL __pgprot(0) #define swapper_pg_dir NULL #define TASK_SIZE 0xffffffffUL -#define VMALLOC_START 0 +#define VMALLOC_START _AC(0, UL) #define VMALLOC_END TASK_SIZE #endif /* !CONFIG_MMU */ diff --git a/arch/riscv/include/asm/syscall_wrapper.h b/arch/riscv/include/asm/syscall_wrapper.h index 1d7942c8a6cb..eeec04b7dae6 100644 --- a/arch/riscv/include/asm/syscall_wrapper.h +++ b/arch/riscv/include/asm/syscall_wrapper.h @@ -46,9 +46,6 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *); return sys_ni_syscall(); \ } -#define COMPAT_SYS_NI(name) \ - SYSCALL_ALIAS(__riscv_compat_sys_##name, sys_ni_posix_timers); - #endif /* CONFIG_COMPAT */ #define __SYSCALL_DEFINEx(x, name, ...) \ @@ -82,6 +79,4 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *); return sys_ni_syscall(); \ } -#define SYS_NI(name) SYSCALL_ALIAS(__riscv_sys_##name, sys_ni_posix_timers); - #endif /* __ASM_SYSCALL_WRAPPER_H */ diff --git a/arch/riscv/kernel/crash_core.c b/arch/riscv/kernel/crash_core.c index 55f1d7856b54..8706736fd4e2 100644 --- a/arch/riscv/kernel/crash_core.c +++ b/arch/riscv/kernel/crash_core.c @@ -5,18 +5,20 @@ void arch_crash_save_vmcoreinfo(void) { - VMCOREINFO_NUMBER(VA_BITS); VMCOREINFO_NUMBER(phys_ram_base); vmcoreinfo_append_str("NUMBER(PAGE_OFFSET)=0x%lx\n", PAGE_OFFSET); vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START); vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END); +#ifdef CONFIG_MMU + VMCOREINFO_NUMBER(VA_BITS); vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START); vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END); #ifdef CONFIG_64BIT vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR); vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END); #endif +#endif vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR); vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n", kernel_map.va_kernel_pa_offset); diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index b77397432403..76ace1e0b46f 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -154,7 +154,6 @@ secondary_start_sbi: XIP_FIXUP_OFFSET a3 add a3, a3, a1 REG_L sp, (a3) - scs_load_current .Lsecondary_start_common: @@ -165,6 +164,7 @@ secondary_start_sbi: call relocate_enable_mmu #endif call .Lsetup_trap_vector + scs_load_current tail smp_callin #endif /* CONFIG_SMP */ diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 56a8c78e9e21..aac019ed63b1 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -40,15 +40,6 @@ struct relocation_handlers { long buffer); }; -unsigned int initialize_relocation_hashtable(unsigned int num_relocations); -void process_accumulated_relocations(struct module *me); -int add_relocation_to_accumulate(struct module *me, int type, void *location, - unsigned int hashtable_bits, Elf_Addr v); - -struct hlist_head *relocation_hashtable; - -struct list_head used_buckets_list; - /* * The auipc+jalr instruction pair can reach any PC-relative offset * in the range [-2^31 - 2^11, 2^31 - 2^11) @@ -64,7 +55,7 @@ static bool riscv_insn_valid_32bit_offset(ptrdiff_t val) static int riscv_insn_rmw(void *location, u32 keep, u32 set) { - u16 *parcel = location; + __le16 *parcel = location; u32 insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16; insn &= keep; @@ -77,7 +68,7 @@ static int riscv_insn_rmw(void *location, u32 keep, u32 set) static int riscv_insn_rvc_rmw(void *location, u16 keep, u16 set) { - u16 *parcel = location; + __le16 *parcel = location; u16 insn = le16_to_cpu(*parcel); insn &= keep; @@ -604,7 +595,10 @@ static const struct relocation_handlers reloc_handlers[] = { /* 192-255 nonstandard ABI extensions */ }; -void process_accumulated_relocations(struct module *me) +static void +process_accumulated_relocations(struct module *me, + struct hlist_head **relocation_hashtable, + struct list_head *used_buckets_list) { /* * Only ADD/SUB/SET/ULEB128 should end up here. @@ -624,18 +618,25 @@ void process_accumulated_relocations(struct module *me) * - Each relocation entry for a location address */ struct used_bucket *bucket_iter; + struct used_bucket *bucket_iter_tmp; struct relocation_head *rel_head_iter; + struct hlist_node *rel_head_iter_tmp; struct relocation_entry *rel_entry_iter; + struct relocation_entry *rel_entry_iter_tmp; int curr_type; void *location; long buffer; - list_for_each_entry(bucket_iter, &used_buckets_list, head) { - hlist_for_each_entry(rel_head_iter, bucket_iter->bucket, node) { + list_for_each_entry_safe(bucket_iter, bucket_iter_tmp, + used_buckets_list, head) { + hlist_for_each_entry_safe(rel_head_iter, rel_head_iter_tmp, + bucket_iter->bucket, node) { buffer = 0; location = rel_head_iter->location; - list_for_each_entry(rel_entry_iter, - rel_head_iter->rel_entry, head) { + list_for_each_entry_safe(rel_entry_iter, + rel_entry_iter_tmp, + rel_head_iter->rel_entry, + head) { curr_type = rel_entry_iter->type; reloc_handlers[curr_type].reloc_handler( me, &buffer, rel_entry_iter->value); @@ -648,11 +649,14 @@ void process_accumulated_relocations(struct module *me) kfree(bucket_iter); } - kfree(relocation_hashtable); + kfree(*relocation_hashtable); } -int add_relocation_to_accumulate(struct module *me, int type, void *location, - unsigned int hashtable_bits, Elf_Addr v) +static int add_relocation_to_accumulate(struct module *me, int type, + void *location, + unsigned int hashtable_bits, Elf_Addr v, + struct hlist_head *relocation_hashtable, + struct list_head *used_buckets_list) { struct relocation_entry *entry; struct relocation_head *rel_head; @@ -661,6 +665,10 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location, unsigned long hash; entry = kmalloc(sizeof(*entry), GFP_KERNEL); + + if (!entry) + return -ENOMEM; + INIT_LIST_HEAD(&entry->head); entry->type = type; entry->value = v; @@ -669,7 +677,10 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location, current_head = &relocation_hashtable[hash]; - /* Find matching location (if any) */ + /* + * Search for the relocation_head for the relocations that happen at the + * provided location + */ bool found = false; struct relocation_head *rel_head_iter; @@ -681,19 +692,45 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location, } } + /* + * If there has not yet been any relocations at the provided location, + * create a relocation_head for that location and populate it with this + * relocation_entry. + */ if (!found) { rel_head = kmalloc(sizeof(*rel_head), GFP_KERNEL); + + if (!rel_head) { + kfree(entry); + return -ENOMEM; + } + rel_head->rel_entry = kmalloc(sizeof(struct list_head), GFP_KERNEL); + + if (!rel_head->rel_entry) { + kfree(entry); + kfree(rel_head); + return -ENOMEM; + } + INIT_LIST_HEAD(rel_head->rel_entry); rel_head->location = location; INIT_HLIST_NODE(&rel_head->node); if (!current_head->first) { bucket = kmalloc(sizeof(struct used_bucket), GFP_KERNEL); + + if (!bucket) { + kfree(entry); + kfree(rel_head); + kfree(rel_head->rel_entry); + return -ENOMEM; + } + INIT_LIST_HEAD(&bucket->head); bucket->bucket = current_head; - list_add(&bucket->head, &used_buckets_list); + list_add(&bucket->head, used_buckets_list); } hlist_add_head(&rel_head->node, current_head); } @@ -704,7 +741,9 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location, return 0; } -unsigned int initialize_relocation_hashtable(unsigned int num_relocations) +static unsigned int +initialize_relocation_hashtable(unsigned int num_relocations, + struct hlist_head **relocation_hashtable) { /* Can safely assume that bits is not greater than sizeof(long) */ unsigned long hashtable_size = roundup_pow_of_two(num_relocations); @@ -720,12 +759,13 @@ unsigned int initialize_relocation_hashtable(unsigned int num_relocations) hashtable_size <<= should_double_size; - relocation_hashtable = kmalloc_array(hashtable_size, - sizeof(*relocation_hashtable), - GFP_KERNEL); - __hash_init(relocation_hashtable, hashtable_size); + *relocation_hashtable = kmalloc_array(hashtable_size, + sizeof(*relocation_hashtable), + GFP_KERNEL); + if (!*relocation_hashtable) + return -ENOMEM; - INIT_LIST_HEAD(&used_buckets_list); + __hash_init(*relocation_hashtable, hashtable_size); return hashtable_bits; } @@ -742,7 +782,17 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, Elf_Addr v; int res; unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel); - unsigned int hashtable_bits = initialize_relocation_hashtable(num_relocations); + struct hlist_head *relocation_hashtable; + struct list_head used_buckets_list; + unsigned int hashtable_bits; + + hashtable_bits = initialize_relocation_hashtable(num_relocations, + &relocation_hashtable); + + if (hashtable_bits < 0) + return hashtable_bits; + + INIT_LIST_HEAD(&used_buckets_list); pr_debug("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); @@ -823,14 +873,18 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, } if (reloc_handlers[type].accumulate_handler) - res = add_relocation_to_accumulate(me, type, location, hashtable_bits, v); + res = add_relocation_to_accumulate(me, type, location, + hashtable_bits, v, + relocation_hashtable, + &used_buckets_list); else res = handler(me, location, v); if (res) return res; } - process_accumulated_relocations(me); + process_accumulated_relocations(me, &relocation_hashtable, + &used_buckets_list); return 0; } diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index c712037dbe10..a2ca5b7756a5 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -169,7 +169,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, pair->value &= ~missing; } -static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext) +static bool hwprobe_ext0_has(const struct cpumask *cpus, u64 ext) { struct riscv_hwprobe pair; diff --git a/arch/riscv/kernel/tests/module_test/test_uleb128.S b/arch/riscv/kernel/tests/module_test/test_uleb128.S index 90f22049d553..8515ed7cd8c1 100644 --- a/arch/riscv/kernel/tests/module_test/test_uleb128.S +++ b/arch/riscv/kernel/tests/module_test/test_uleb128.S @@ -6,13 +6,13 @@ .text .global test_uleb_basic test_uleb_basic: - ld a0, second + lw a0, second addi a0, a0, -127 ret .global test_uleb_large test_uleb_large: - ld a0, fourth + lw a0, fourth addi a0, a0, -0x07e8 ret @@ -22,10 +22,10 @@ first: second: .reloc second, R_RISCV_SET_ULEB128, second .reloc second, R_RISCV_SUB_ULEB128, first - .dword 0 + .word 0 third: .space 1000 fourth: .reloc fourth, R_RISCV_SET_ULEB128, fourth .reloc fourth, R_RISCV_SUB_ULEB128, third - .dword 0 + .word 0 diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 5eba37147caa..5255f8134aef 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -550,16 +550,14 @@ int handle_misaligned_store(struct pt_regs *regs) } else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) { len = 8; val.data_ulong = GET_RS2S(insn, regs); - } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP && - ((insn >> SH_RD) & 0x1f)) { + } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP) { len = 8; val.data_ulong = GET_RS2C(insn, regs); #endif } else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) { len = 4; val.data_ulong = GET_RS2S(insn, regs); - } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP && - ((insn >> SH_RD) & 0x1f)) { + } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP) { len = 4; val.data_ulong = GET_RS2C(insn, regs); } else if ((insn & INSN_MASK_C_FSD) == INSN_MATCH_C_FSD) { diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index 6cf23b8adb71..e808723a85f1 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -55,6 +55,7 @@ struct imsic { /* IMSIC SW-file */ struct imsic_mrif *swfile; phys_addr_t swfile_pa; + spinlock_t swfile_extirq_lock; }; #define imsic_vs_csr_read(__c) \ @@ -613,12 +614,23 @@ static void imsic_swfile_extirq_update(struct kvm_vcpu *vcpu) { struct imsic *imsic = vcpu->arch.aia_context.imsic_state; struct imsic_mrif *mrif = imsic->swfile; + unsigned long flags; + + /* + * The critical section is necessary during external interrupt + * updates to avoid the risk of losing interrupts due to potential + * interruptions between reading topei and updating pending status. + */ + + spin_lock_irqsave(&imsic->swfile_extirq_lock, flags); if (imsic_mrif_atomic_read(mrif, &mrif->eidelivery) && imsic_mrif_topei(mrif, imsic->nr_eix, imsic->nr_msis)) kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT); else kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT); + + spin_unlock_irqrestore(&imsic->swfile_extirq_lock, flags); } static void imsic_swfile_read(struct kvm_vcpu *vcpu, bool clear, @@ -1039,6 +1051,7 @@ int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu) } imsic->swfile = page_to_virt(swfile_page); imsic->swfile_pa = page_to_phys(swfile_page); + spin_lock_init(&imsic->swfile_extirq_lock); /* Setup IO device */ kvm_iodevice_init(&imsic->iodev, &imsic_iodoev_ops); diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 90d4ba36d1d0..081339ddf47e 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -304,6 +304,8 @@ void handle_page_fault(struct pt_regs *regs) goto done; } count_vm_vma_lock_event(VMA_LOCK_RETRY); + if (fault & VM_FAULT_MAJOR) + flags |= FAULT_FLAG_TRIED; if (fault_signal_pending(fault, regs)) { if (!user_mode(regs)) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 3bec98d20283..d5d8f99d1f25 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -254,13 +254,13 @@ config ARCH_SUPPORTS_KEXEC def_bool y config ARCH_SUPPORTS_KEXEC_FILE - def_bool CRYPTO && CRYPTO_SHA256 && CRYPTO_SHA256_S390 + def_bool y config ARCH_SUPPORTS_KEXEC_SIG def_bool MODULE_SIG_FORMAT config ARCH_SUPPORTS_KEXEC_PURGATORY - def_bool KEXEC_FILE + def_bool y config ARCH_SUPPORTS_CRASH_DUMP def_bool y diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 438cd92e6080..6de44ede4e14 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -44,8 +44,7 @@ CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y CONFIG_CRASH_DUMP=y CONFIG_LIVEPATCH=y -CONFIG_MARCH_ZEC12=y -CONFIG_TUNE_ZEC12=y +CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_HZ_100=y @@ -76,7 +75,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_MODULE_SIG_SHA256=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y @@ -93,6 +91,7 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y +CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLUB_STATS=y # CONFIG_COMPAT_BRK is not set @@ -619,6 +618,9 @@ CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_BTRFS_DEBUG=y CONFIG_BTRFS_ASSERT=y CONFIG_NILFS2_FS=m +CONFIG_BCACHEFS_FS=y +CONFIG_BCACHEFS_QUOTA=y +CONFIG_BCACHEFS_POSIX_ACL=y CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y @@ -691,7 +693,6 @@ CONFIG_PERSISTENT_KEYRINGS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y -CONFIG_SECURITY_NETWORK=y CONFIG_HARDENED_USERCOPY=y CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_SELINUX=y @@ -834,7 +835,6 @@ CONFIG_DEBUG_IRQFLAGS=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y -CONFIG_DEBUG_CREDENTIALS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_REF_SCALE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=300 diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 1b8150e50f6a..bcae47da6b7c 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -42,8 +42,7 @@ CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y CONFIG_CRASH_DUMP=y CONFIG_LIVEPATCH=y -CONFIG_MARCH_ZEC12=y -CONFIG_TUNE_ZEC12=y +CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_HZ_100=y @@ -71,7 +70,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_MODULE_SIG_SHA256=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y @@ -88,6 +86,7 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y +CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y CONFIG_ZSMALLOC_STAT=y # CONFIG_COMPAT_BRK is not set CONFIG_MEMORY_HOTPLUG=y @@ -605,6 +604,9 @@ CONFIG_OCFS2_FS=m CONFIG_BTRFS_FS=y CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_NILFS2_FS=m +CONFIG_BCACHEFS_FS=m +CONFIG_BCACHEFS_QUOTA=y +CONFIG_BCACHEFS_POSIX_ACL=y CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y @@ -677,7 +679,6 @@ CONFIG_PERSISTENT_KEYRINGS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y -CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_LOCKDOWN_LSM=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index b831083b4edd..47028450eee1 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -9,8 +9,7 @@ CONFIG_BPF_SYSCALL=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_CRASH_DUMP=y -CONFIG_MARCH_ZEC12=y -CONFIG_TUNE_ZEC12=y +CONFIG_MARCH_Z13=y # CONFIG_COMPAT is not set CONFIG_NR_CPUS=2 CONFIG_HZ_100=y diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index b714ed0ef688..9acf48e53a87 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h @@ -79,7 +79,7 @@ static inline int test_fp_ctl(u32 fpc) #define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23|KERNEL_VXR_V24V31) #define KERNEL_VXR (KERNEL_VXR_LOW|KERNEL_VXR_HIGH) -#define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_V0V7) +#define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_LOW) struct kernel_fpu; diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h index 9286430fe729..35c1d1b860d8 100644 --- a/arch/s390/include/asm/syscall_wrapper.h +++ b/arch/s390/include/asm/syscall_wrapper.h @@ -63,10 +63,6 @@ cond_syscall(__s390x_sys_##name); \ cond_syscall(__s390_sys_##name) -#define SYS_NI(name) \ - SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers); \ - SYSCALL_ALIAS(__s390_sys_##name, sys_ni_posix_timers) - #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ long __s390_compat_sys##name(struct pt_regs *regs); \ ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \ @@ -85,15 +81,11 @@ /* * As some compat syscalls may not be implemented, we need to expand - * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in - * kernel/time/posix-stubs.c to cover this case as well. + * COND_SYSCALL_COMPAT in kernel/sys_ni.c to cover this case as well. */ #define COND_SYSCALL_COMPAT(name) \ cond_syscall(__s390_compat_sys_##name) -#define COMPAT_SYS_NI(name) \ - SYSCALL_ALIAS(__s390_compat_sys_##name, sys_ni_posix_timers) - #define __S390_SYS_STUBx(x, name, ...) \ long __s390_sys##name(struct pt_regs *regs); \ ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \ @@ -124,9 +116,6 @@ #define COND_SYSCALL(name) \ cond_syscall(__s390x_sys_##name) -#define SYS_NI(name) \ - SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers) - #define __S390_SYS_STUBx(x, fullname, name, ...) #endif /* CONFIG_COMPAT */ diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 86fec9b080f6..5f5cd20ebb34 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -459,3 +459,5 @@ 454 common futex_wake sys_futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue sys_futex_requeue +457 common statmount sys_statmount sys_statmount +458 common listmount sys_listmount sys_listmount diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 02dcbe82a8e5..8207a892bbe2 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -587,10 +587,6 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start, if (!gmap_is_shadow(gmap)) return; - if (start >= 1UL << 31) - /* We are only interested in prefix pages */ - return; - /* * Only new shadow blocks are added to the list during runtime, * therefore we can safely reference them all the time. diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 249aefcf7c4e..ab4098886e56 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -337,6 +337,9 @@ static void do_exception(struct pt_regs *regs, int access) return; } count_vm_vma_lock_event(VMA_LOCK_RETRY); + if (fault & VM_FAULT_MAJOR) + flags |= FAULT_FLAG_TRIED; + /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { if (!user_mode(regs)) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 3bd2ab2a9a34..5cb92941540b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -756,7 +756,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, pte_clear(mm, addr, ptep); } if (reset) - pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; + pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); pgste_set_unlock(ptep, pgste); preempt_enable(); } diff --git a/arch/sh/include/asm/kexec.h b/arch/sh/include/asm/kexec.h index 927d80ba2332..76631714673c 100644 --- a/arch/sh/include/asm/kexec.h +++ b/arch/sh/include/asm/kexec.h @@ -28,7 +28,7 @@ /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_SH -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* arch/sh/kernel/machine_kexec.c */ void reserve_crashkernel(void); @@ -67,6 +67,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs, } #else static inline void reserve_crashkernel(void) { } -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ #endif /* __ASM_SH_KEXEC_H */ diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile index 69cd9ac4b2ab..2d7e70537de0 100644 --- a/arch/sh/kernel/Makefile +++ b/arch/sh/kernel/Makefile @@ -33,7 +33,7 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SH_STANDARD_BIOS) += sh_bios.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_MODULES) += sh_ksyms_32.o module.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_IO_TRAPPED) += io_trapped.o diff --git a/arch/sh/kernel/reboot.c b/arch/sh/kernel/reboot.c index e8eeedc9b182..1de006b1c339 100644 --- a/arch/sh/kernel/reboot.c +++ b/arch/sh/kernel/reboot.c @@ -63,7 +63,7 @@ struct machine_ops machine_ops = { .shutdown = native_machine_shutdown, .restart = native_machine_restart, .halt = native_machine_halt, -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .crash_shutdown = native_machine_crash_shutdown, #endif }; @@ -88,7 +88,7 @@ void machine_halt(void) machine_ops.halt(); } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE void machine_crash_shutdown(struct pt_regs *regs) { machine_ops.crash_shutdown(regs); diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 3d80515298d2..d3175f09b3aa 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -220,7 +220,7 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn, request_resource(res, &code_resource); request_resource(res, &data_resource); request_resource(res, &bss_resource); -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE request_resource(res, &crashk_res); #endif diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 363fae0fe9bf..3103ebd2e4cb 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -459,3 +459,5 @@ 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue +457 common statmount sys_statmount +458 common listmount sys_listmount diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 7bcaa3d5ea44..ba147d7ad19a 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -502,3 +502,5 @@ 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue +457 common statmount sys_statmount +458 common listmount sys_listmount diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3762f41bb092..1566748f16c4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2072,7 +2072,7 @@ config ARCH_SUPPORTS_KEXEC def_bool y config ARCH_SUPPORTS_KEXEC_FILE - def_bool X86_64 && CRYPTO && CRYPTO_SHA256 + def_bool X86_64 config ARCH_SELECTS_KEXEC_FILE def_bool y @@ -2080,7 +2080,7 @@ config ARCH_SELECTS_KEXEC_FILE select HAVE_IMA_KEXEC if IMA config ARCH_SUPPORTS_KEXEC_PURGATORY - def_bool KEXEC_FILE + def_bool y config ARCH_SUPPORTS_KEXEC_SIG def_bool y diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index 55c98fdd67d2..18d15d1ce87d 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -178,7 +178,7 @@ static unsigned long get_cmdline_acpi_rsdp(void) { unsigned long addr = 0; -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE char val[MAX_ADDR_LEN] = { }; int ret; diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 1b5d17a9f70d..cf1f13c82175 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -10,6 +10,7 @@ #include <asm/coco.h> #include <asm/tdx.h> #include <asm/vmx.h> +#include <asm/ia32.h> #include <asm/insn.h> #include <asm/insn-eval.h> #include <asm/pgtable.h> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index d813160b14d8..6356060caaf3 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -26,6 +26,7 @@ #include <xen/events.h> #endif +#include <asm/apic.h> #include <asm/desc.h> #include <asm/traps.h> #include <asm/vdso.h> @@ -167,7 +168,96 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) } } -/* Handles int $0x80 */ +#ifdef CONFIG_IA32_EMULATION +static __always_inline bool int80_is_external(void) +{ + const unsigned int offs = (0x80 / 32) * 0x10; + const u32 bit = BIT(0x80 % 32); + + /* The local APIC on XENPV guests is fake */ + if (cpu_feature_enabled(X86_FEATURE_XENPV)) + return false; + + /* + * If vector 0x80 is set in the APIC ISR then this is an external + * interrupt. Either from broken hardware or injected by a VMM. + * + * Note: In guest mode this is only valid for secure guests where + * the secure module fully controls the vAPIC exposed to the guest. + */ + return apic_read(APIC_ISR + offs) & bit; +} + +/** + * int80_emulation - 32-bit legacy syscall entry + * + * This entry point can be used by 32-bit and 64-bit programs to perform + * 32-bit system calls. Instances of INT $0x80 can be found inline in + * various programs and libraries. It is also used by the vDSO's + * __kernel_vsyscall fallback for hardware that doesn't support a faster + * entry method. Restarted 32-bit system calls also fall back to INT + * $0x80 regardless of what instruction was originally used to do the + * system call. + * + * This is considered a slow path. It is not used by most libc + * implementations on modern hardware except during process startup. + * + * The arguments for the INT $0x80 based syscall are on stack in the + * pt_regs structure: + * eax: system call number + * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 + */ +DEFINE_IDTENTRY_RAW(int80_emulation) +{ + int nr; + + /* Kernel does not use INT $0x80! */ + if (unlikely(!user_mode(regs))) { + irqentry_enter(regs); + instrumentation_begin(); + panic("Unexpected external interrupt 0x80\n"); + } + + /* + * Establish kernel context for instrumentation, including for + * int80_is_external() below which calls into the APIC driver. + * Identical for soft and external interrupts. + */ + enter_from_user_mode(regs); + + instrumentation_begin(); + add_random_kstack_offset(); + + /* Validate that this is a soft interrupt to the extent possible */ + if (unlikely(int80_is_external())) + panic("Unexpected external interrupt 0x80\n"); + + /* + * The low level idtentry code pushed -1 into regs::orig_ax + * and regs::ax contains the syscall number. + * + * User tracing code (ptrace or signal handlers) might assume + * that the regs::orig_ax contains a 32-bit number on invoking + * a 32-bit syscall. + * + * Establish the syscall convention by saving the 32bit truncated + * syscall number in regs::orig_ax and by invalidating regs::ax. + */ + regs->orig_ax = regs->ax & GENMASK(31, 0); + regs->ax = -ENOSYS; + + nr = syscall_32_enter(regs); + + local_irq_enable(); + nr = syscall_enter_from_user_mode_work(regs, nr); + do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); + syscall_exit_to_user_mode(regs); +} +#else /* CONFIG_IA32_EMULATION */ + +/* Handles int $0x80 on a 32bit kernel */ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) { int nr = syscall_32_enter(regs); @@ -186,6 +276,7 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) instrumentation_end(); syscall_exit_to_user_mode(regs); } +#endif /* !CONFIG_IA32_EMULATION */ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) { diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 27c05d08558a..de94e2e84ecc 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -275,80 +275,3 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR int3 SYM_CODE_END(entry_SYSCALL_compat) - -/* - * 32-bit legacy system call entry. - * - * 32-bit x86 Linux system calls traditionally used the INT $0x80 - * instruction. INT $0x80 lands here. - * - * This entry point can be used by 32-bit and 64-bit programs to perform - * 32-bit system calls. Instances of INT $0x80 can be found inline in - * various programs and libraries. It is also used by the vDSO's - * __kernel_vsyscall fallback for hardware that doesn't support a faster - * entry method. Restarted 32-bit system calls also fall back to INT - * $0x80 regardless of what instruction was originally used to do the - * system call. - * - * This is considered a slow path. It is not used by most libc - * implementations on modern hardware except during process startup. - * - * Arguments: - * eax system call number - * ebx arg1 - * ecx arg2 - * edx arg3 - * esi arg4 - * edi arg5 - * ebp arg6 - */ -SYM_CODE_START(entry_INT80_compat) - UNWIND_HINT_ENTRY - ENDBR - /* - * Interrupts are off on entry. - */ - ASM_CLAC /* Do this early to minimize exposure */ - ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV - - /* - * User tracing code (ptrace or signal handlers) might assume that - * the saved RAX contains a 32-bit number when we're invoking a 32-bit - * syscall. Just in case the high bits are nonzero, zero-extend - * the syscall number. (This could almost certainly be deleted - * with no ill effects.) - */ - movl %eax, %eax - - /* switch to thread stack expects orig_ax and rdi to be pushed */ - pushq %rax /* pt_regs->orig_ax */ - - /* Need to switch before accessing the thread stack. */ - SWITCH_TO_KERNEL_CR3 scratch_reg=%rax - - /* In the Xen PV case we already run on the thread stack. */ - ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV - - movq %rsp, %rax - movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp - - pushq 5*8(%rax) /* regs->ss */ - pushq 4*8(%rax) /* regs->rsp */ - pushq 3*8(%rax) /* regs->eflags */ - pushq 2*8(%rax) /* regs->cs */ - pushq 1*8(%rax) /* regs->ip */ - pushq 0*8(%rax) /* regs->orig_ax */ -.Lint80_keep_stack: - - PUSH_AND_CLEAR_REGS rax=$-ENOSYS - UNWIND_HINT_REGS - - cld - - IBRS_ENTER - UNTRAIN_RET - - movq %rsp, %rdi - call do_int80_syscall_32 - jmp swapgs_restore_regs_and_return_to_usermode -SYM_CODE_END(entry_INT80_compat) diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index c8fac5205803..56e6c2f3ee9c 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -461,3 +461,5 @@ 454 i386 futex_wake sys_futex_wake 455 i386 futex_wait sys_futex_wait 456 i386 futex_requeue sys_futex_requeue +457 i386 statmount sys_statmount +458 i386 listmount sys_listmount diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 8cb8bf68721c..3a22eef585c2 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -378,6 +378,8 @@ 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue +457 common statmount sys_statmount +458 common listmount sys_listmount # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index ce1c777227b4..0f2786d4e405 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4051,12 +4051,17 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) u64 pebs_mask = cpuc->pebs_enabled & x86_pmu.pebs_capable; int global_ctrl, pebs_enable; + /* + * In addition to obeying exclude_guest/exclude_host, remove bits being + * used for PEBS when running a guest, because PEBS writes to virtual + * addresses (not physical addresses). + */ *nr = 0; global_ctrl = (*nr)++; arr[global_ctrl] = (struct perf_guest_switch_msr){ .msr = MSR_CORE_PERF_GLOBAL_CTRL, .host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask, - .guest = intel_ctrl & (~cpuc->intel_ctrl_host_mask | ~pebs_mask), + .guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask & ~pebs_mask, }; if (!x86_pmu.pebs) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 65f79092c9d9..fcd20c6dc7f9 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -10,6 +10,9 @@ #define ALT_FLAG_NOT (1 << 0) #define ALT_NOT(feature) ((ALT_FLAG_NOT << ALT_FLAGS_SHIFT) | (feature)) +#define ALT_FLAG_DIRECT_CALL (1 << 1) +#define ALT_DIRECT_CALL(feature) ((ALT_FLAG_DIRECT_CALL << ALT_FLAGS_SHIFT) | (feature)) +#define ALT_CALL_ALWAYS ALT_DIRECT_CALL(X86_FEATURE_ALWAYS) #ifndef __ASSEMBLY__ @@ -86,6 +89,8 @@ struct alt_instr { u8 replacementlen; /* length of new instruction */ } __packed; +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; + /* * Debug flag that can be tested to see whether alternative * instructions were patched in already: @@ -101,11 +106,10 @@ extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine, s32 *start_cfi, s32 *end_cfi); struct module; -struct paravirt_patch_site; struct callthunk_sites { s32 *call_start, *call_end; - struct paravirt_patch_site *pv_start, *pv_end; + struct alt_instr *alt_start, *alt_end; }; #ifdef CONFIG_CALL_THUNKS @@ -150,6 +154,8 @@ static inline int alternatives_text_reserved(void *start, void *end) } #endif /* CONFIG_SMP */ +#define ALT_CALL_INSTR "call BUG_func" + #define b_replacement(num) "664"#num #define e_replacement(num) "665"#num @@ -330,6 +336,22 @@ static inline int alternatives_text_reserved(void *start, void *end) */ #define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr +/* Macro for creating assembler functions avoiding any C magic. */ +#define DEFINE_ASM_FUNC(func, instr, sec) \ + asm (".pushsection " #sec ", \"ax\"\n" \ + ".global " #func "\n\t" \ + ".type " #func ", @function\n\t" \ + ASM_FUNC_ALIGN "\n" \ + #func ":\n\t" \ + ASM_ENDBR \ + instr "\n\t" \ + ASM_RET \ + ".size " #func ", . - " #func "\n\t" \ + ".popsection") + +void BUG_func(void); +void nop_func(void); + #else /* __ASSEMBLY__ */ #ifdef CONFIG_SMP @@ -370,6 +392,10 @@ static inline int alternatives_text_reserved(void *start, void *end) .byte \alt_len .endm +.macro ALT_CALL_INSTR + call BUG_func +.endm + /* * Define an alternative between two instructions. If @feature is * present, early code in apply_alternatives() replaces @oldinstr with diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 35389b2af88e..0216f63a366b 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -81,22 +81,4 @@ do { \ #include <asm-generic/barrier.h> -/* - * Make previous memory operations globally visible before - * a WRMSR. - * - * MFENCE makes writes visible, but only affects load/store - * instructions. WRMSR is unfortunately not a load/store - * instruction and is unaffected by MFENCE. The LFENCE ensures - * that the WRMSR is not reordered. - * - * Most WRMSRs are full serializing instructions themselves and - * do not require this barrier. This is only required for the - * IA32_TSC_DEADLINE and X2APIC MSRs. - */ -static inline void weak_wrmsr_fence(void) -{ - asm volatile("mfence; lfence" : : : "memory"); -} - #endif /* _ASM_X86_BARRIER_H */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 4af140cf5719..632c26cdeeda 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -218,7 +218,7 @@ #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_ZEN (7*32+28) /* "" CPU based on Zen microarchitecture */ +#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ #define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ #define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ @@ -308,10 +308,14 @@ #define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ #define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ #define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */ - #define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ #define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ #define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ +#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ +#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */ +#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */ +#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */ +#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index a0234dfd1031..1e16bd5ac781 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -150,7 +150,7 @@ do { \ ((x)->e_machine == EM_X86_64) #define compat_elf_check_arch(x) \ - ((elf_check_arch_ia32(x) && ia32_enabled()) || \ + ((elf_check_arch_ia32(x) && ia32_enabled_verbose()) || \ (IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64)) static inline void elf_common_init(struct thread_struct *t, diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 5a2ae24b1204..c7ef6ea2fa99 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -2,7 +2,6 @@ #ifndef _ASM_X86_IA32_H #define _ASM_X86_IA32_H - #ifdef CONFIG_IA32_EMULATION #include <linux/compat.h> @@ -75,6 +74,11 @@ static inline bool ia32_enabled(void) return __ia32_enabled; } +static inline void ia32_disable(void) +{ + __ia32_enabled = false; +} + #else /* !CONFIG_IA32_EMULATION */ static inline bool ia32_enabled(void) @@ -82,6 +86,18 @@ static inline bool ia32_enabled(void) return IS_ENABLED(CONFIG_X86_32); } +static inline void ia32_disable(void) {} + #endif +static inline bool ia32_enabled_verbose(void) +{ + bool enabled = ia32_enabled(); + + if (IS_ENABLED(CONFIG_IA32_EMULATION) && !enabled) + pr_notice_once("32-bit emulation disabled. You can reenable with ia32_emulation=on\n"); + + return enabled; +} + #endif /* _ASM_X86_IA32_H */ diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 05fd175cec7d..13639e57e1f8 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -569,6 +569,10 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_UD, exc_invalid_op); DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault); +#if defined(CONFIG_IA32_EMULATION) +DECLARE_IDTENTRY_RAW(IA32_SYSCALL_VECTOR, int80_emulation); +#endif + #ifdef CONFIG_X86_MCE #ifdef CONFIG_X86_64 DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check); diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 6de6e1d95952..de3118305838 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -311,6 +311,7 @@ enum smca_bank_types { SMCA_PIE, /* Power, Interrupts, etc. */ SMCA_UMC, /* Unified Memory Controller */ SMCA_UMC_V2, + SMCA_MA_LLC, /* Memory Attached Last Level Cache */ SMCA_PB, /* Parameter Block */ SMCA_PSP, /* Platform Security Processor */ SMCA_PSP_V2, @@ -326,6 +327,8 @@ enum smca_bank_types { SMCA_SHUB, /* System HUB Unit */ SMCA_SATA, /* SATA Unit */ SMCA_USB, /* USB Unit */ + SMCA_USR_DP, /* Ultra Short Reach Data Plane Controller */ + SMCA_USR_CP, /* Ultra Short Reach Control Plane Controller */ SMCA_GMI_PCS, /* GMI PCS Unit */ SMCA_XGMI_PHY, /* xGMI PHY Unit */ SMCA_WAFL_PHY, /* WAFL PHY Unit */ @@ -333,7 +336,6 @@ enum smca_bank_types { N_SMCA_BANK_TYPES }; -extern const char *smca_get_long_name(enum smca_bank_types t); extern bool amd_mce_is_memory_error(struct mce *m); extern int mce_threshold_create_device(unsigned int cpu); diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 6c8ff12140ae..8bcf7584e7dd 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -142,8 +142,7 @@ static inline void write_cr0(unsigned long x) static __always_inline unsigned long read_cr2(void) { return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2, - "mov %%cr2, %%rax;", - ALT_NOT(X86_FEATURE_XENPV)); + "mov %%cr2, %%rax;", ALT_NOT_XEN); } static __always_inline void write_cr2(unsigned long x) @@ -154,13 +153,12 @@ static __always_inline void write_cr2(unsigned long x) static inline unsigned long __read_cr3(void) { return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3, - "mov %%cr3, %%rax;", ALT_NOT(X86_FEATURE_XENPV)); + "mov %%cr3, %%rax;", ALT_NOT_XEN); } static inline void write_cr3(unsigned long x) { - PVOP_ALT_VCALL1(mmu.write_cr3, x, - "mov %%rdi, %%cr3", ALT_NOT(X86_FEATURE_XENPV)); + PVOP_ALT_VCALL1(mmu.write_cr3, x, "mov %%rdi, %%cr3", ALT_NOT_XEN); } static inline void __write_cr4(unsigned long x) @@ -182,7 +180,7 @@ extern noinstr void pv_native_wbinvd(void); static __always_inline void wbinvd(void) { - PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV)); + PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT_XEN); } static inline u64 paravirt_read_msr(unsigned msr) @@ -390,27 +388,25 @@ static inline void paravirt_release_p4d(unsigned long pfn) static inline pte_t __pte(pteval_t val) { return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val, - "mov %%rdi, %%rax", - ALT_NOT(X86_FEATURE_XENPV)) }; + "mov %%rdi, %%rax", ALT_NOT_XEN) }; } static inline pteval_t pte_val(pte_t pte) { return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte, - "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); + "mov %%rdi, %%rax", ALT_NOT_XEN); } static inline pgd_t __pgd(pgdval_t val) { return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val, - "mov %%rdi, %%rax", - ALT_NOT(X86_FEATURE_XENPV)) }; + "mov %%rdi, %%rax", ALT_NOT_XEN) }; } static inline pgdval_t pgd_val(pgd_t pgd) { return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd, - "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); + "mov %%rdi, %%rax", ALT_NOT_XEN); } #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION @@ -444,14 +440,13 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) static inline pmd_t __pmd(pmdval_t val) { return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, mmu.make_pmd, val, - "mov %%rdi, %%rax", - ALT_NOT(X86_FEATURE_XENPV)) }; + "mov %%rdi, %%rax", ALT_NOT_XEN) }; } static inline pmdval_t pmd_val(pmd_t pmd) { return PVOP_ALT_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd, - "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); + "mov %%rdi, %%rax", ALT_NOT_XEN); } static inline void set_pud(pud_t *pudp, pud_t pud) @@ -464,7 +459,7 @@ static inline pud_t __pud(pudval_t val) pudval_t ret; ret = PVOP_ALT_CALLEE1(pudval_t, mmu.make_pud, val, - "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); + "mov %%rdi, %%rax", ALT_NOT_XEN); return (pud_t) { ret }; } @@ -472,7 +467,7 @@ static inline pud_t __pud(pudval_t val) static inline pudval_t pud_val(pud_t pud) { return PVOP_ALT_CALLEE1(pudval_t, mmu.pud_val, pud.pud, - "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); + "mov %%rdi, %%rax", ALT_NOT_XEN); } static inline void pud_clear(pud_t *pudp) @@ -492,8 +487,7 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) static inline p4d_t __p4d(p4dval_t val) { p4dval_t ret = PVOP_ALT_CALLEE1(p4dval_t, mmu.make_p4d, val, - "mov %%rdi, %%rax", - ALT_NOT(X86_FEATURE_XENPV)); + "mov %%rdi, %%rax", ALT_NOT_XEN); return (p4d_t) { ret }; } @@ -501,7 +495,7 @@ static inline p4d_t __p4d(p4dval_t val) static inline p4dval_t p4d_val(p4d_t p4d) { return PVOP_ALT_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d, - "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); + "mov %%rdi, %%rax", ALT_NOT_XEN); } static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd) @@ -687,17 +681,17 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); static __always_inline unsigned long arch_local_save_flags(void) { return PVOP_ALT_CALLEE0(unsigned long, irq.save_fl, "pushf; pop %%rax;", - ALT_NOT(X86_FEATURE_XENPV)); + ALT_NOT_XEN); } static __always_inline void arch_local_irq_disable(void) { - PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT(X86_FEATURE_XENPV)); + PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT_XEN); } static __always_inline void arch_local_irq_enable(void) { - PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT(X86_FEATURE_XENPV)); + PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT_XEN); } static __always_inline unsigned long arch_local_irq_save(void) @@ -726,52 +720,25 @@ static __always_inline unsigned long arch_local_irq_save(void) #undef PVOP_VCALL4 #undef PVOP_CALL4 -#define DEFINE_PARAVIRT_ASM(func, instr, sec) \ - asm (".pushsection " #sec ", \"ax\"\n" \ - ".global " #func "\n\t" \ - ".type " #func ", @function\n\t" \ - ASM_FUNC_ALIGN "\n" \ - #func ":\n\t" \ - ASM_ENDBR \ - instr "\n\t" \ - ASM_RET \ - ".size " #func ", . - " #func "\n\t" \ - ".popsection") - extern void default_banner(void); void native_pv_lock_init(void) __init; #else /* __ASSEMBLY__ */ -#define _PVSITE(ptype, ops, word, algn) \ -771:; \ - ops; \ -772:; \ - .pushsection .parainstructions,"a"; \ - .align algn; \ - word 771b; \ - .byte ptype; \ - .byte 772b-771b; \ - _ASM_ALIGN; \ - .popsection - - #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT_XXL +#ifdef CONFIG_DEBUG_ENTRY -#define PARA_PATCH(off) ((off) / 8) -#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .quad, 8) #define PARA_INDIRECT(addr) *addr(%rip) -#ifdef CONFIG_DEBUG_ENTRY .macro PARA_IRQ_save_fl - PARA_SITE(PARA_PATCH(PV_IRQ_save_fl), - ANNOTATE_RETPOLINE_SAFE; - call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);) + ANNOTATE_RETPOLINE_SAFE; + call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); .endm -#define SAVE_FLAGS ALTERNATIVE "PARA_IRQ_save_fl;", "pushf; pop %rax;", \ - ALT_NOT(X86_FEATURE_XENPV) +#define SAVE_FLAGS ALTERNATIVE_2 "PARA_IRQ_save_fl;", \ + "ALT_CALL_INSTR;", ALT_CALL_ALWAYS, \ + "pushf; pop %rax;", ALT_NOT_XEN #endif #endif /* CONFIG_PARAVIRT_XXL */ #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 772d03487520..d8e85d2cf8d5 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -2,15 +2,6 @@ #ifndef _ASM_X86_PARAVIRT_TYPES_H #define _ASM_X86_PARAVIRT_TYPES_H -#ifndef __ASSEMBLY__ -/* These all sit in the .parainstructions section to tell us what to patch. */ -struct paravirt_patch_site { - u8 *instr; /* original instructions */ - u8 type; /* type of this instruction */ - u8 len; /* length of original instruction */ -}; -#endif - #ifdef CONFIG_PARAVIRT #ifndef __ASSEMBLY__ @@ -250,43 +241,11 @@ struct paravirt_patch_template { extern struct pv_info pv_info; extern struct paravirt_patch_template pv_ops; -#define PARAVIRT_PATCH(x) \ - (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) - -#define paravirt_type(op) \ - [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ - [paravirt_opptr] "m" (pv_ops.op) -/* - * Generate some code, and mark it as patchable by the - * apply_paravirt() alternate instruction patcher. - */ -#define _paravirt_alt(insn_string, type) \ - "771:\n\t" insn_string "\n" "772:\n" \ - ".pushsection .parainstructions,\"a\"\n" \ - _ASM_ALIGN "\n" \ - _ASM_PTR " 771b\n" \ - " .byte " type "\n" \ - " .byte 772b-771b\n" \ - _ASM_ALIGN "\n" \ - ".popsection\n" - -/* Generate patchable code, with the default asm parameters. */ -#define paravirt_alt(insn_string) \ - _paravirt_alt(insn_string, "%c[paravirt_typenum]") - -/* Simple instruction patching code. */ -#define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t" - -unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, unsigned int len); +#define paravirt_ptr(op) [paravirt_opptr] "m" (pv_ops.op) int paravirt_disable_iospace(void); -/* - * This generates an indirect call based on the operation type number. - * The type number, computed in PARAVIRT_PATCH, is derived from the - * offset into the paravirt_patch_template structure, and can therefore be - * freely converted back into a structure offset. - */ +/* This generates an indirect call based on the operation type number. */ #define PARAVIRT_CALL \ ANNOTATE_RETPOLINE_SAFE \ "call *%[paravirt_opptr];" @@ -319,12 +278,6 @@ int paravirt_disable_iospace(void); * However, x86_64 also has to clobber all caller saved registers, which * unfortunately, are quite a bit (r8 - r11) * - * The call instruction itself is marked by placing its start address - * and size into the .parainstructions section, so that - * apply_paravirt() in arch/i386/kernel/alternative.c can do the - * appropriate patching under the control of the backend pv_init_ops - * implementation. - * * Unfortunately there's no way to get gcc to generate the args setup * for the call, and then allow the call itself to be generated by an * inline asm. Because of this, we must do the complete arg setup and @@ -423,14 +376,27 @@ int paravirt_disable_iospace(void); __mask & __eax; \ }) - +/* + * Use alternative patching for paravirt calls: + * - For replacing an indirect call with a direct one, use the "normal" + * ALTERNATIVE() macro with the indirect call as the initial code sequence, + * which will be replaced with the related direct call by using the + * ALT_FLAG_DIRECT_CALL special case and the "always on" feature. + * - In case the replacement is either a direct call or a short code sequence + * depending on a feature bit, the ALTERNATIVE_2() macro is being used. + * The indirect call is the initial code sequence again, while the special + * code sequence is selected with the specified feature bit. In case the + * feature is not active, the direct call is used as above via the + * ALT_FLAG_DIRECT_CALL special case and the "always on" feature. + */ #define ____PVOP_CALL(ret, op, call_clbr, extra_clbr, ...) \ ({ \ PVOP_CALL_ARGS; \ PVOP_TEST_NULL(op); \ - asm volatile(paravirt_alt(PARAVIRT_CALL) \ + asm volatile(ALTERNATIVE(PARAVIRT_CALL, ALT_CALL_INSTR, \ + ALT_CALL_ALWAYS) \ : call_clbr, ASM_CALL_CONSTRAINT \ - : paravirt_type(op), \ + : paravirt_ptr(op), \ ##__VA_ARGS__ \ : "memory", "cc" extra_clbr); \ ret; \ @@ -441,10 +407,11 @@ int paravirt_disable_iospace(void); ({ \ PVOP_CALL_ARGS; \ PVOP_TEST_NULL(op); \ - asm volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL), \ - alt, cond) \ + asm volatile(ALTERNATIVE_2(PARAVIRT_CALL, \ + ALT_CALL_INSTR, ALT_CALL_ALWAYS, \ + alt, cond) \ : call_clbr, ASM_CALL_CONSTRAINT \ - : paravirt_type(op), \ + : paravirt_ptr(op), \ ##__VA_ARGS__ \ : "memory", "cc" extra_clbr); \ ret; \ @@ -542,8 +509,6 @@ int paravirt_disable_iospace(void); __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) -void _paravirt_nop(void); -void paravirt_BUG(void); unsigned long paravirt_ret0(void); #ifdef CONFIG_PARAVIRT_XXL u64 _paravirt_ident_64(u64); @@ -553,11 +518,11 @@ void pv_native_irq_enable(void); unsigned long pv_native_read_cr2(void); #endif -#define paravirt_nop ((void *)_paravirt_nop) - -extern struct paravirt_patch_site __parainstructions[], - __parainstructions_end[]; +#define paravirt_nop ((void *)nop_func) #endif /* __ASSEMBLY__ */ + +#define ALT_NOT_XEN ALT_NOT(X86_FEATURE_XENPV) + #endif /* CONFIG_PARAVIRT */ #endif /* _ASM_X86_PARAVIRT_TYPES_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index ae81a7191c1c..26620d7642a9 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -749,4 +749,22 @@ enum mds_mitigations { extern bool gds_ucode_mitigated(void); +/* + * Make previous memory operations globally visible before + * a WRMSR. + * + * MFENCE makes writes visible, but only affects load/store + * instructions. WRMSR is unfortunately not a load/store + * instruction and is unaffected by MFENCE. The LFENCE ensures + * that the WRMSR is not reordered. + * + * Most WRMSRs are full serializing instructions themselves and + * do not require this barrier. This is only required for the + * IA32_TSC_DEADLINE and X2APIC MSRs. + */ +static inline void weak_wrmsr_fence(void) +{ + alternative("mfence; lfence", "", ALT_NOT(X86_FEATURE_APIC_MSRS_FENCE)); +} + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 4d84122bd643..484f4f0131a5 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -32,10 +32,6 @@ void entry_SYSCALL_compat(void); void entry_SYSCALL_compat_safe_stack(void); void entry_SYSRETL_compat_unsafe_stack(void); void entry_SYSRETL_compat_end(void); -void entry_INT80_compat(void); -#ifdef CONFIG_XEN_PV -void xen_entry_INT80_compat(void); -#endif #else /* !CONFIG_IA32_EMULATION */ #define entry_SYSCALL_compat NULL #define entry_SYSENTER_compat NULL diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 85b6e3609cb9..ef9697f20129 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -56,8 +56,8 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text"); "pop %rdx\n\t" \ FRAME_END -DEFINE_PARAVIRT_ASM(__raw_callee_save___pv_queued_spin_unlock, - PV_UNLOCK_ASM, .spinlock.text); +DEFINE_ASM_FUNC(__raw_callee_save___pv_queued_spin_unlock, + PV_UNLOCK_ASM, .spinlock.text); #else /* CONFIG_64BIT */ diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index fd2669b1cb2d..21f9407be5d3 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -86,9 +86,6 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); return sys_ni_syscall(); \ } -#define __SYS_NI(abi, name) \ - SYSCALL_ALIAS(__##abi##_##name, sys_ni_posix_timers); - #ifdef CONFIG_X86_64 #define __X64_SYS_STUB0(name) \ __SYS_STUB0(x64, sys_##name) @@ -100,13 +97,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #define __X64_COND_SYSCALL(name) \ __COND_SYSCALL(x64, sys_##name) -#define __X64_SYS_NI(name) \ - __SYS_NI(x64, sys_##name) #else /* CONFIG_X86_64 */ #define __X64_SYS_STUB0(name) #define __X64_SYS_STUBx(x, name, ...) #define __X64_COND_SYSCALL(name) -#define __X64_SYS_NI(name) #endif /* CONFIG_X86_64 */ #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) @@ -120,13 +114,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #define __IA32_COND_SYSCALL(name) \ __COND_SYSCALL(ia32, sys_##name) -#define __IA32_SYS_NI(name) \ - __SYS_NI(ia32, sys_##name) #else /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ #define __IA32_SYS_STUB0(name) #define __IA32_SYS_STUBx(x, name, ...) #define __IA32_COND_SYSCALL(name) -#define __IA32_SYS_NI(name) #endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ #ifdef CONFIG_IA32_EMULATION @@ -135,8 +126,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); * additional wrappers (aptly named __ia32_sys_xyzzy) which decode the * ia32 regs in the proper order for shared or "common" syscalls. As some * syscalls may not be implemented, we need to expand COND_SYSCALL in - * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this - * case as well. + * kernel/sys_ni.c to cover this case as well. */ #define __IA32_COMPAT_SYS_STUB0(name) \ __SYS_STUB0(ia32, compat_sys_##name) @@ -148,14 +138,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #define __IA32_COMPAT_COND_SYSCALL(name) \ __COND_SYSCALL(ia32, compat_sys_##name) -#define __IA32_COMPAT_SYS_NI(name) \ - __SYS_NI(ia32, compat_sys_##name) - #else /* CONFIG_IA32_EMULATION */ #define __IA32_COMPAT_SYS_STUB0(name) #define __IA32_COMPAT_SYS_STUBx(x, name, ...) #define __IA32_COMPAT_COND_SYSCALL(name) -#define __IA32_COMPAT_SYS_NI(name) #endif /* CONFIG_IA32_EMULATION */ @@ -175,13 +161,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #define __X32_COMPAT_COND_SYSCALL(name) \ __COND_SYSCALL(x64, compat_sys_##name) -#define __X32_COMPAT_SYS_NI(name) \ - __SYS_NI(x64, compat_sys_##name) #else /* CONFIG_X86_X32_ABI */ #define __X32_COMPAT_SYS_STUB0(name) #define __X32_COMPAT_SYS_STUBx(x, name, ...) #define __X32_COMPAT_COND_SYSCALL(name) -#define __X32_COMPAT_SYS_NI(name) #endif /* CONFIG_X86_X32_ABI */ @@ -212,17 +195,12 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); /* * As some compat syscalls may not be implemented, we need to expand - * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in - * kernel/time/posix-stubs.c to cover this case as well. + * COND_SYSCALL_COMPAT in kernel/sys_ni.c to cover this case as well. */ #define COND_SYSCALL_COMPAT(name) \ __IA32_COMPAT_COND_SYSCALL(name) \ __X32_COMPAT_COND_SYSCALL(name) -#define COMPAT_SYS_NI(name) \ - __IA32_COMPAT_SYS_NI(name) \ - __X32_COMPAT_SYS_NI(name) - #endif /* CONFIG_COMPAT */ #define __SYSCALL_DEFINEx(x, name, ...) \ @@ -243,8 +221,8 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); * As the generic SYSCALL_DEFINE0() macro does not decode any parameters for * obvious reasons, and passing struct pt_regs *regs to it in %rdi does not * hurt, we only need to re-define it here to keep the naming congruent to - * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() and SYS_NI() - * macros to work correctly. + * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() macro + * to work correctly. */ #define SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ @@ -257,10 +235,6 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); __X64_COND_SYSCALL(name) \ __IA32_COND_SYSCALL(name) -#define SYS_NI(name) \ - __X64_SYS_NI(name) \ - __IA32_SYS_NI(name) - /* * For VSYSCALLS, we need to declare these three syscalls with the new diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 29832c338cdc..0b70653a98c1 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -6,18 +6,6 @@ #include <linux/stddef.h> #include <asm/ptrace.h> -struct paravirt_patch_site; -#ifdef CONFIG_PARAVIRT -void apply_paravirt(struct paravirt_patch_site *start, - struct paravirt_patch_site *end); -#else -static inline void apply_paravirt(struct paravirt_patch_site *start, - struct paravirt_patch_site *end) -{} -#define __parainstructions NULL -#define __parainstructions_end NULL -#endif - /* * Currently, the max observed size in the kernel code is * JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5. diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 1a0dd80d81ac..85a3ce2a3666 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -293,6 +293,7 @@ acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end) processor->processor_id, /* ACPI ID */ processor->lapic_flags & ACPI_MADT_ENABLED); + has_lapic_cpus = true; return 0; } @@ -1134,7 +1135,6 @@ static int __init acpi_parse_madt_lapic_entries(void) if (!count) { count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, acpi_parse_lapic, MAX_LOCAL_APIC); - has_lapic_cpus = count > 0; x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, acpi_parse_x2apic, MAX_LOCAL_APIC); } diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 73be3931e4f0..95e21596e2f9 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -160,7 +160,6 @@ extern s32 __retpoline_sites[], __retpoline_sites_end[]; extern s32 __return_sites[], __return_sites_end[]; extern s32 __cfi_sites[], __cfi_sites_end[]; extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[]; -extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; void text_poke_early(void *addr, const void *opcode, size_t len); @@ -255,6 +254,16 @@ static void __init_or_module noinline optimize_nops(u8 *instr, size_t len) } } +static void __init_or_module noinline optimize_nops_inplace(u8 *instr, size_t len) +{ + unsigned long flags; + + local_irq_save(flags); + optimize_nops(instr, len); + sync_core(); + local_irq_restore(flags); +} + /* * In this context, "source" is where the instructions are placed in the * section .altinstr_replacement, for example during kernel build by the @@ -385,6 +394,63 @@ apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len) } } +/* Low-level backend functions usable from alternative code replacements. */ +DEFINE_ASM_FUNC(nop_func, "", .entry.text); +EXPORT_SYMBOL_GPL(nop_func); + +noinstr void BUG_func(void) +{ + BUG(); +} +EXPORT_SYMBOL_GPL(BUG_func); + +#define CALL_RIP_REL_OPCODE 0xff +#define CALL_RIP_REL_MODRM 0x15 + +/* + * Rewrite the "call BUG_func" replacement to point to the target of the + * indirect pv_ops call "call *disp(%ip)". + */ +static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a) +{ + void *target, *bug = &BUG_func; + s32 disp; + + if (a->replacementlen != 5 || insn_buff[0] != CALL_INSN_OPCODE) { + pr_err("ALT_FLAG_DIRECT_CALL set for a non-call replacement instruction\n"); + BUG(); + } + + if (a->instrlen != 6 || + instr[0] != CALL_RIP_REL_OPCODE || + instr[1] != CALL_RIP_REL_MODRM) { + pr_err("ALT_FLAG_DIRECT_CALL set for unrecognized indirect call\n"); + BUG(); + } + + /* Skip CALL_RIP_REL_OPCODE and CALL_RIP_REL_MODRM */ + disp = *(s32 *)(instr + 2); +#ifdef CONFIG_X86_64 + /* ff 15 00 00 00 00 call *0x0(%rip) */ + /* target address is stored at "next instruction + disp". */ + target = *(void **)(instr + a->instrlen + disp); +#else + /* ff 15 00 00 00 00 call *0x0 */ + /* target address is stored at disp. */ + target = *(void **)disp; +#endif + if (!target) + target = bug; + + /* (BUG_func - .) + (target - BUG_func) := target - . */ + *(s32 *)(insn_buff + 1) += target - bug; + + if (target == &nop_func) + return 0; + + return 5; +} + /* * Replace instructions with better alternatives for this CPU type. This runs * before SMP is initialized to avoid SMP problems with self modifying code. @@ -438,20 +504,25 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, * patch if feature is *NOT* present. */ if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) { - optimize_nops(instr, a->instrlen); + optimize_nops_inplace(instr, a->instrlen); continue; } - DPRINTK(ALT, "feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)", - (a->flags & ALT_FLAG_NOT) ? "!" : "", + DPRINTK(ALT, "feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d) flags: 0x%x", a->cpuid >> 5, a->cpuid & 0x1f, instr, instr, a->instrlen, - replacement, a->replacementlen); + replacement, a->replacementlen, a->flags); memcpy(insn_buff, replacement, a->replacementlen); insn_buff_sz = a->replacementlen; + if (a->flags & ALT_FLAG_DIRECT_CALL) { + insn_buff_sz = alt_replace_call(instr, insn_buff, a); + if (insn_buff_sz < 0) + continue; + } + for (; insn_buff_sz < a->instrlen; insn_buff_sz++) insn_buff[insn_buff_sz] = 0x90; @@ -1411,46 +1482,6 @@ int alternatives_text_reserved(void *start, void *end) } #endif /* CONFIG_SMP */ -#ifdef CONFIG_PARAVIRT - -/* Use this to add nops to a buffer, then text_poke the whole buffer. */ -static void __init_or_module add_nops(void *insns, unsigned int len) -{ - while (len > 0) { - unsigned int noplen = len; - if (noplen > ASM_NOP_MAX) - noplen = ASM_NOP_MAX; - memcpy(insns, x86_nops[noplen], noplen); - insns += noplen; - len -= noplen; - } -} - -void __init_or_module apply_paravirt(struct paravirt_patch_site *start, - struct paravirt_patch_site *end) -{ - struct paravirt_patch_site *p; - char insn_buff[MAX_PATCH_LEN]; - - for (p = start; p < end; p++) { - unsigned int used; - - BUG_ON(p->len > MAX_PATCH_LEN); - /* prep the buffer with the original instructions */ - memcpy(insn_buff, p->instr, p->len); - used = paravirt_patch(p->type, insn_buff, (unsigned long)p->instr, p->len); - - BUG_ON(used > p->len); - - /* Pad the rest with nops */ - add_nops(insn_buff + used, p->len - used); - text_poke_early(p->instr, insn_buff, p->len); - } -} -extern struct paravirt_patch_site __start_parainstructions[], - __stop_parainstructions[]; -#endif /* CONFIG_PARAVIRT */ - /* * Self-test for the INT3 based CALL emulation code. * @@ -1586,28 +1617,11 @@ void __init alternative_instructions(void) */ /* - * Paravirt patching and alternative patching can be combined to - * replace a function call with a short direct code sequence (e.g. - * by setting a constant return value instead of doing that in an - * external function). - * In order to make this work the following sequence is required: - * 1. set (artificial) features depending on used paravirt - * functions which can later influence alternative patching - * 2. apply paravirt patching (generally replacing an indirect - * function call with a direct one) - * 3. apply alternative patching (e.g. replacing a direct function - * call with a custom code sequence) - * Doing paravirt patching after alternative patching would clobber - * the optimization of the custom code with a function call again. + * Make sure to set (artificial) features depending on used paravirt + * functions which can later influence alternative patching. */ paravirt_set_cap(); - /* - * First patch paravirt functions, such that we overwrite the indirect - * call with the direct call. - */ - apply_paravirt(__parainstructions, __parainstructions_end); - __apply_fineibt(__retpoline_sites, __retpoline_sites_end, __cfi_sites, __cfi_sites_end, true); @@ -1618,10 +1632,6 @@ void __init alternative_instructions(void) apply_retpolines(__retpoline_sites, __retpoline_sites_end); apply_returns(__return_sites, __return_sites_end); - /* - * Then patch alternatives, such that those paravirt calls that are in - * alternatives can be overwritten by their immediate fragments. - */ apply_alternatives(__alt_instructions, __alt_instructions_end); /* @@ -1685,8 +1695,8 @@ void __init_or_module text_poke_early(void *addr, const void *opcode, } else { local_irq_save(flags); memcpy(addr, opcode, len); - local_irq_restore(flags); sync_core(); + local_irq_restore(flags); /* * Could also do a CLFLUSH here to speed up CPU recovery; but diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index e9ad518a5003..64ad2ddea121 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -233,14 +233,13 @@ patch_call_sites(s32 *start, s32 *end, const struct core_text *ct) } static __init_or_module void -patch_paravirt_call_sites(struct paravirt_patch_site *start, - struct paravirt_patch_site *end, - const struct core_text *ct) +patch_alt_call_sites(struct alt_instr *start, struct alt_instr *end, + const struct core_text *ct) { - struct paravirt_patch_site *p; + struct alt_instr *a; - for (p = start; p < end; p++) - patch_call(p->instr, ct); + for (a = start; a < end; a++) + patch_call((void *)&a->instr_offset + a->instr_offset, ct); } static __init_or_module void @@ -248,7 +247,7 @@ callthunks_setup(struct callthunk_sites *cs, const struct core_text *ct) { prdbg("Patching call sites %s\n", ct->name); patch_call_sites(cs->call_start, cs->call_end, ct); - patch_paravirt_call_sites(cs->pv_start, cs->pv_end, ct); + patch_alt_call_sites(cs->alt_start, cs->alt_end, ct); prdbg("Patching call sites done%s\n", ct->name); } @@ -257,8 +256,8 @@ void __init callthunks_patch_builtin_calls(void) struct callthunk_sites cs = { .call_start = __call_sites, .call_end = __call_sites_end, - .pv_start = __parainstructions, - .pv_end = __parainstructions_end + .alt_start = __alt_instructions, + .alt_end = __alt_instructions_end }; if (!cpu_feature_enabled(X86_FEATURE_CALL_DEPTH)) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a7eab05e5f29..9f42d1c59e09 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -34,87 +34,6 @@ */ static u32 nodes_per_socket = 1; -/* - * AMD errata checking - * - * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or - * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that - * have an OSVW id assigned, which it takes as first argument. Both take a - * variable number of family-specific model-stepping ranges created by - * AMD_MODEL_RANGE(). - * - * Example: - * - * const int amd_erratum_319[] = - * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2), - * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0), - * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0)); - */ - -#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 } -#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 } -#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \ - ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end)) -#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff) -#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff) -#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff) - -static const int amd_erratum_400[] = - AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), - AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); - -static const int amd_erratum_383[] = - AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); - -/* #1054: Instructions Retired Performance Counter May Be Inaccurate */ -static const int amd_erratum_1054[] = - AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf)); - -static const int amd_zenbleed[] = - AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x30, 0x0, 0x4f, 0xf), - AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf), - AMD_MODEL_RANGE(0x17, 0x90, 0x0, 0x91, 0xf), - AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf)); - -static const int amd_div0[] = - AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf), - AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf)); - -static const int amd_erratum_1485[] = - AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf), - AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf)); - -static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) -{ - int osvw_id = *erratum++; - u32 range; - u32 ms; - - if (osvw_id >= 0 && osvw_id < 65536 && - cpu_has(cpu, X86_FEATURE_OSVW)) { - u64 osvw_len; - - rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len); - if (osvw_id < osvw_len) { - u64 osvw_bits; - - rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6), - osvw_bits); - return osvw_bits & (1ULL << (osvw_id & 0x3f)); - } - } - - /* OSVW unavailable or ID unknown, match family-model-stepping range */ - ms = (cpu->x86_model << 4) | cpu->x86_stepping; - while ((range = *erratum++)) - if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && - (ms >= AMD_MODEL_RANGE_START(range)) && - (ms <= AMD_MODEL_RANGE_END(range))) - return true; - - return false; -} - static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) { u32 gprs[8] = { 0 }; @@ -616,6 +535,49 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) } resctrl_cpu_detect(c); + + /* Figure out Zen generations: */ + switch (c->x86) { + case 0x17: { + switch (c->x86_model) { + case 0x00 ... 0x2f: + case 0x50 ... 0x5f: + setup_force_cpu_cap(X86_FEATURE_ZEN1); + break; + case 0x30 ... 0x4f: + case 0x60 ... 0x7f: + case 0x90 ... 0x91: + case 0xa0 ... 0xaf: + setup_force_cpu_cap(X86_FEATURE_ZEN2); + break; + default: + goto warn; + } + break; + } + case 0x19: { + switch (c->x86_model) { + case 0x00 ... 0x0f: + case 0x20 ... 0x5f: + setup_force_cpu_cap(X86_FEATURE_ZEN3); + break; + case 0x10 ... 0x1f: + case 0x60 ... 0xaf: + setup_force_cpu_cap(X86_FEATURE_ZEN4); + break; + default: + goto warn; + } + break; + } + default: + break; + } + + return; + +warn: + WARN_ONCE(1, "Family 0x%x, model: 0x%x??\n", c->x86, c->x86_model); } static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) @@ -739,15 +701,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) if (c->x86 == 0x16 && c->x86_model <= 0xf) msr_set_bit(MSR_AMD64_LS_CFG, 15); - /* - * Check whether the machine is affected by erratum 400. This is - * used to select the proper idle routine and to enable the check - * whether the machine is affected in arch_post_acpi_init(), which - * sets the X86_BUG_AMD_APIC_C1E bug depending on the MSR check. - */ - if (cpu_has_amd_erratum(c, amd_erratum_400)) - set_cpu_bug(c, X86_BUG_AMD_E400); - early_detect_mem_encrypt(c); /* Re-enable TopologyExtensions if switched off by BIOS */ @@ -814,6 +767,16 @@ static void init_amd_k8(struct cpuinfo_x86 *c) msr_set_bit(MSR_K7_HWCR, 6); #endif set_cpu_bug(c, X86_BUG_SWAPGS_FENCE); + + /* + * Check models and steppings affected by erratum 400. This is + * used to select the proper idle routine and to enable the + * check whether the machine is affected in arch_post_acpi_subsys_init() + * which sets the X86_BUG_AMD_APIC_C1E bug depending on the MSR check. + */ + if (c->x86_model > 0x41 || + (c->x86_model == 0x41 && c->x86_stepping >= 0x2)) + setup_force_cpu_bug(X86_BUG_AMD_E400); } static void init_amd_gh(struct cpuinfo_x86 *c) @@ -847,8 +810,17 @@ static void init_amd_gh(struct cpuinfo_x86 *c) */ msr_clear_bit(MSR_AMD64_BU_CFG2, 24); - if (cpu_has_amd_erratum(c, amd_erratum_383)) - set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); + set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); + + /* + * Check models and steppings affected by erratum 400. This is + * used to select the proper idle routine and to enable the + * check whether the machine is affected in arch_post_acpi_subsys_init() + * which sets the X86_BUG_AMD_APIC_C1E bug depending on the MSR check. + */ + if (c->x86_model > 0x2 || + (c->x86_model == 0x2 && c->x86_stepping >= 0x1)) + setup_force_cpu_bug(X86_BUG_AMD_E400); } static void init_amd_ln(struct cpuinfo_x86 *c) @@ -941,6 +913,19 @@ static void init_amd_bd(struct cpuinfo_x86 *c) clear_rdrand_cpuid_bit(c); } +static void fix_erratum_1386(struct cpuinfo_x86 *c) +{ + /* + * Work around Erratum 1386. The XSAVES instruction malfunctions in + * certain circumstances on Zen1/2 uarch, and not all parts have had + * updated microcode at the time of writing (March 2023). + * + * Affected parts all have no supervisor XSAVE states, meaning that + * the XSAVEC instruction (which works fine) is equivalent. + */ + clear_cpu_cap(c, X86_FEATURE_XSAVES); +} + void init_spectral_chicken(struct cpuinfo_x86 *c) { #ifdef CONFIG_CPU_UNRET_ENTRY @@ -951,34 +936,28 @@ void init_spectral_chicken(struct cpuinfo_x86 *c) * * This suppresses speculation from the middle of a basic block, i.e. it * suppresses non-branch predictions. - * - * We use STIBP as a heuristic to filter out Zen2 from the rest of F17H */ - if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_AMD_STIBP)) { + if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) { if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) { value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT; wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value); } } #endif - /* - * Work around Erratum 1386. The XSAVES instruction malfunctions in - * certain circumstances on Zen1/2 uarch, and not all parts have had - * updated microcode at the time of writing (March 2023). - * - * Affected parts all have no supervisor XSAVE states, meaning that - * the XSAVEC instruction (which works fine) is equivalent. - */ - clear_cpu_cap(c, X86_FEATURE_XSAVES); } -static void init_amd_zn(struct cpuinfo_x86 *c) +static void init_amd_zen_common(void) { - set_cpu_cap(c, X86_FEATURE_ZEN); - + setup_force_cpu_cap(X86_FEATURE_ZEN); #ifdef CONFIG_NUMA node_reclaim_distance = 32; #endif +} + +static void init_amd_zen1(struct cpuinfo_x86 *c) +{ + init_amd_zen_common(); + fix_erratum_1386(c); /* Fix up CPUID bits, but only if not virtualised. */ if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) { @@ -986,15 +965,10 @@ static void init_amd_zn(struct cpuinfo_x86 *c) /* Erratum 1076: CPB feature bit not being set in CPUID. */ if (!cpu_has(c, X86_FEATURE_CPB)) set_cpu_cap(c, X86_FEATURE_CPB); - - /* - * Zen3 (Fam19 model < 0x10) parts are not susceptible to - * Branch Type Confusion, but predate the allocation of the - * BTC_NO bit. - */ - if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO)) - set_cpu_cap(c, X86_FEATURE_BTC_NO); } + + pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n"); + setup_force_cpu_bug(X86_BUG_DIV0); } static bool cpu_has_zenbleed_microcode(void) @@ -1018,11 +992,8 @@ static bool cpu_has_zenbleed_microcode(void) return true; } -static void zenbleed_check(struct cpuinfo_x86 *c) +static void zen2_zenbleed_check(struct cpuinfo_x86 *c) { - if (!cpu_has_amd_erratum(c, amd_zenbleed)) - return; - if (cpu_has(c, X86_FEATURE_HYPERVISOR)) return; @@ -1037,6 +1008,37 @@ static void zenbleed_check(struct cpuinfo_x86 *c) } } +static void init_amd_zen2(struct cpuinfo_x86 *c) +{ + init_amd_zen_common(); + init_spectral_chicken(c); + fix_erratum_1386(c); + zen2_zenbleed_check(c); +} + +static void init_amd_zen3(struct cpuinfo_x86 *c) +{ + init_amd_zen_common(); + + if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) { + /* + * Zen3 (Fam19 model < 0x10) parts are not susceptible to + * Branch Type Confusion, but predate the allocation of the + * BTC_NO bit. + */ + if (!cpu_has(c, X86_FEATURE_BTC_NO)) + set_cpu_cap(c, X86_FEATURE_BTC_NO); + } +} + +static void init_amd_zen4(struct cpuinfo_x86 *c) +{ + init_amd_zen_common(); + + if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) + msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT); +} + static void init_amd(struct cpuinfo_x86 *c) { u64 vm_cr; @@ -1072,11 +1074,17 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x12: init_amd_ln(c); break; case 0x15: init_amd_bd(c); break; case 0x16: init_amd_jg(c); break; - case 0x17: init_spectral_chicken(c); - fallthrough; - case 0x19: init_amd_zn(c); break; } + if (boot_cpu_has(X86_FEATURE_ZEN1)) + init_amd_zen1(c); + else if (boot_cpu_has(X86_FEATURE_ZEN2)) + init_amd_zen2(c); + else if (boot_cpu_has(X86_FEATURE_ZEN3)) + init_amd_zen3(c); + else if (boot_cpu_has(X86_FEATURE_ZEN4)) + init_amd_zen4(c); + /* * Enable workaround for FXSAVE leak on CPUs * without a XSaveErPtr feature @@ -1136,7 +1144,7 @@ static void init_amd(struct cpuinfo_x86 *c) * Counter May Be Inaccurate". */ if (cpu_has(c, X86_FEATURE_IRPERF) && - !cpu_has_amd_erratum(c, amd_erratum_1054)) + (boot_cpu_has(X86_FEATURE_ZEN1) && c->x86_model > 0x2f)) msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); check_null_seg_clears_base(c); @@ -1152,16 +1160,8 @@ static void init_amd(struct cpuinfo_x86 *c) cpu_has(c, X86_FEATURE_AUTOIBRS)) WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS)); - zenbleed_check(c); - - if (cpu_has_amd_erratum(c, amd_div0)) { - pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n"); - setup_force_cpu_bug(X86_BUG_DIV0); - } - - if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && - cpu_has_amd_erratum(c, amd_erratum_1485)) - msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT); + /* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */ + clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE); } #ifdef CONFIG_X86_32 @@ -1315,11 +1315,14 @@ static void zenbleed_check_cpu(void *unused) { struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); - zenbleed_check(c); + zen2_zenbleed_check(c); } void amd_check_microcode(void) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return; + on_each_cpu(zenbleed_check_cpu, NULL, 1); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b14fc8c1c953..98f7ea6b931c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1856,6 +1856,13 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0); #endif + + /* + * Set default APIC and TSC_DEADLINE MSR fencing flag. AMD and + * Hygon will clear it in ->c_init() below. + */ + set_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE); + /* * Vendor-specific initialization. In this section we * canonicalize the feature flags, meaning if there are diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 6f247d66758d..f0cd95502faa 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -354,6 +354,9 @@ static void init_hygon(struct cpuinfo_x86 *c) set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); check_null_seg_clears_base(c); + + /* Hygon CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */ + clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE); } static void cpu_detect_tlb_hygon(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c index e4c3ba91321c..f18d35fe27a9 100644 --- a/arch/x86/kernel/cpu/intel_epb.c +++ b/arch/x86/kernel/cpu/intel_epb.c @@ -237,4 +237,4 @@ err_out_online: cpuhp_remove_state(CPUHP_AP_X86_INTEL_EPB_ONLINE); return ret; } -subsys_initcall(intel_epb_init); +late_initcall(intel_epb_init); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index f3517b8a8e91..2b46eb0fdf3a 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -87,42 +87,40 @@ struct smca_bank { static DEFINE_PER_CPU_READ_MOSTLY(struct smca_bank[MAX_NR_BANKS], smca_banks); static DEFINE_PER_CPU_READ_MOSTLY(u8[N_SMCA_BANK_TYPES], smca_bank_counts); -struct smca_bank_name { - const char *name; /* Short name for sysfs */ - const char *long_name; /* Long name for pretty-printing */ -}; - -static struct smca_bank_name smca_names[] = { - [SMCA_LS ... SMCA_LS_V2] = { "load_store", "Load Store Unit" }, - [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, - [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, - [SMCA_DE] = { "decode_unit", "Decode Unit" }, - [SMCA_RESERVED] = { "reserved", "Reserved" }, - [SMCA_EX] = { "execution_unit", "Execution Unit" }, - [SMCA_FP] = { "floating_point", "Floating Point Unit" }, - [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" }, - [SMCA_CS ... SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" }, - [SMCA_PIE] = { "pie", "Power, Interrupts, etc." }, +static const char * const smca_names[] = { + [SMCA_LS ... SMCA_LS_V2] = "load_store", + [SMCA_IF] = "insn_fetch", + [SMCA_L2_CACHE] = "l2_cache", + [SMCA_DE] = "decode_unit", + [SMCA_RESERVED] = "reserved", + [SMCA_EX] = "execution_unit", + [SMCA_FP] = "floating_point", + [SMCA_L3_CACHE] = "l3_cache", + [SMCA_CS ... SMCA_CS_V2] = "coherent_slave", + [SMCA_PIE] = "pie", /* UMC v2 is separate because both of them can exist in a single system. */ - [SMCA_UMC] = { "umc", "Unified Memory Controller" }, - [SMCA_UMC_V2] = { "umc_v2", "Unified Memory Controller v2" }, - [SMCA_PB] = { "param_block", "Parameter Block" }, - [SMCA_PSP ... SMCA_PSP_V2] = { "psp", "Platform Security Processor" }, - [SMCA_SMU ... SMCA_SMU_V2] = { "smu", "System Management Unit" }, - [SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" }, - [SMCA_MPDMA] = { "mpdma", "MPDMA Unit" }, - [SMCA_NBIO] = { "nbio", "Northbridge IO Unit" }, - [SMCA_PCIE ... SMCA_PCIE_V2] = { "pcie", "PCI Express Unit" }, - [SMCA_XGMI_PCS] = { "xgmi_pcs", "Ext Global Memory Interconnect PCS Unit" }, - [SMCA_NBIF] = { "nbif", "NBIF Unit" }, - [SMCA_SHUB] = { "shub", "System Hub Unit" }, - [SMCA_SATA] = { "sata", "SATA Unit" }, - [SMCA_USB] = { "usb", "USB Unit" }, - [SMCA_GMI_PCS] = { "gmi_pcs", "Global Memory Interconnect PCS Unit" }, - [SMCA_XGMI_PHY] = { "xgmi_phy", "Ext Global Memory Interconnect PHY Unit" }, - [SMCA_WAFL_PHY] = { "wafl_phy", "WAFL PHY Unit" }, - [SMCA_GMI_PHY] = { "gmi_phy", "Global Memory Interconnect PHY Unit" }, + [SMCA_UMC] = "umc", + [SMCA_UMC_V2] = "umc_v2", + [SMCA_MA_LLC] = "ma_llc", + [SMCA_PB] = "param_block", + [SMCA_PSP ... SMCA_PSP_V2] = "psp", + [SMCA_SMU ... SMCA_SMU_V2] = "smu", + [SMCA_MP5] = "mp5", + [SMCA_MPDMA] = "mpdma", + [SMCA_NBIO] = "nbio", + [SMCA_PCIE ... SMCA_PCIE_V2] = "pcie", + [SMCA_XGMI_PCS] = "xgmi_pcs", + [SMCA_NBIF] = "nbif", + [SMCA_SHUB] = "shub", + [SMCA_SATA] = "sata", + [SMCA_USB] = "usb", + [SMCA_USR_DP] = "usr_dp", + [SMCA_USR_CP] = "usr_cp", + [SMCA_GMI_PCS] = "gmi_pcs", + [SMCA_XGMI_PHY] = "xgmi_phy", + [SMCA_WAFL_PHY] = "wafl_phy", + [SMCA_GMI_PHY] = "gmi_phy", }; static const char *smca_get_name(enum smca_bank_types t) @@ -130,17 +128,8 @@ static const char *smca_get_name(enum smca_bank_types t) if (t >= N_SMCA_BANK_TYPES) return NULL; - return smca_names[t].name; -} - -const char *smca_get_long_name(enum smca_bank_types t) -{ - if (t >= N_SMCA_BANK_TYPES) - return NULL; - - return smca_names[t].long_name; + return smca_names[t]; } -EXPORT_SYMBOL_GPL(smca_get_long_name); enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank) { @@ -178,6 +167,7 @@ static const struct smca_hwid smca_hwid_mcatypes[] = { { SMCA_CS, HWID_MCATYPE(0x2E, 0x0) }, { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1) }, { SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2) }, + { SMCA_MA_LLC, HWID_MCATYPE(0x2E, 0x4) }, /* Unified Memory Controller MCA type */ { SMCA_UMC, HWID_MCATYPE(0x96, 0x0) }, @@ -212,6 +202,8 @@ static const struct smca_hwid smca_hwid_mcatypes[] = { { SMCA_SHUB, HWID_MCATYPE(0x80, 0x0) }, { SMCA_SATA, HWID_MCATYPE(0xA8, 0x0) }, { SMCA_USB, HWID_MCATYPE(0xAA, 0x0) }, + { SMCA_USR_DP, HWID_MCATYPE(0x170, 0x0) }, + { SMCA_USR_CP, HWID_MCATYPE(0x180, 0x0) }, { SMCA_GMI_PCS, HWID_MCATYPE(0x241, 0x0) }, { SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0) }, { SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0) }, diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 7b397370b4d6..fd5ce12c4f9a 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -44,6 +44,7 @@ #include <linux/sync_core.h> #include <linux/task_work.h> #include <linux/hardirq.h> +#include <linux/kexec.h> #include <asm/intel-family.h> #include <asm/processor.h> @@ -233,6 +234,7 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp) struct llist_node *pending; struct mce_evt_llist *l; int apei_err = 0; + struct page *p; /* * Allow instrumentation around external facilities usage. Not that it @@ -286,6 +288,20 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp) if (!fake_panic) { if (panic_timeout == 0) panic_timeout = mca_cfg.panic_timeout; + + /* + * Kdump skips the poisoned page in order to avoid + * touching the error bits again. Poison the page even + * if the error is fatal and the machine is about to + * panic. + */ + if (kexec_crash_loaded()) { + if (final && (final->status & MCI_STATUS_ADDRV)) { + p = pfn_to_online_page(final->addr >> PAGE_SHIFT); + if (p) + SetPageHWPoison(p); + } + } panic(msg); } else pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); @@ -670,6 +686,16 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) barrier(); m.status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS)); + /* + * Update storm tracking here, before checking for the + * MCI_STATUS_VAL bit. Valid corrected errors count + * towards declaring, or maintaining, storm status. No + * error in a bank counts towards avoiding, or ending, + * storm status. + */ + if (!mca_cfg.cmci_disabled) + mce_track_storm(&m); + /* If this entry is not valid, ignore it */ if (!(m.status & MCI_STATUS_VAL)) continue; @@ -1601,13 +1627,6 @@ static unsigned long check_interval = INITIAL_CHECK_INTERVAL; static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ static DEFINE_PER_CPU(struct timer_list, mce_timer); -static unsigned long mce_adjust_timer_default(unsigned long interval) -{ - return interval; -} - -static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; - static void __start_timer(struct timer_list *t, unsigned long interval) { unsigned long when = jiffies + interval; @@ -1637,15 +1656,9 @@ static void mce_timer_fn(struct timer_list *t) iv = __this_cpu_read(mce_next_interval); - if (mce_available(this_cpu_ptr(&cpu_info))) { + if (mce_available(this_cpu_ptr(&cpu_info))) mc_poll_banks(); - if (mce_intel_cmci_poll()) { - iv = mce_adjust_timer(iv); - goto done; - } - } - /* * Alert userspace if needed. If we logged an MCE, reduce the polling * interval, otherwise increase the polling interval. @@ -1655,23 +1668,29 @@ static void mce_timer_fn(struct timer_list *t) else iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); -done: - __this_cpu_write(mce_next_interval, iv); - __start_timer(t, iv); + if (mce_get_storm_mode()) { + __start_timer(t, HZ); + } else { + __this_cpu_write(mce_next_interval, iv); + __start_timer(t, iv); + } } /* - * Ensure that the timer is firing in @interval from now. + * When a storm starts on any bank on this CPU, switch to polling + * once per second. When the storm ends, revert to the default + * polling interval. */ -void mce_timer_kick(unsigned long interval) +void mce_timer_kick(bool storm) { struct timer_list *t = this_cpu_ptr(&mce_timer); - unsigned long iv = __this_cpu_read(mce_next_interval); - __start_timer(t, interval); + mce_set_storm_mode(storm); - if (interval < iv) - __this_cpu_write(mce_next_interval, interval); + if (storm) + __start_timer(t, HZ); + else + __this_cpu_write(mce_next_interval, check_interval * HZ); } /* Must not be called in IRQ context where del_timer_sync() can deadlock */ @@ -1995,7 +2014,6 @@ static void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c) intel_init_cmci(); intel_init_lmce(); - mce_adjust_timer = cmci_intel_adjust_timer; } static void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c) @@ -2008,7 +2026,6 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) switch (c->x86_vendor) { case X86_VENDOR_INTEL: mce_intel_feature_init(c); - mce_adjust_timer = cmci_intel_adjust_timer; break; case X86_VENDOR_AMD: { @@ -2568,9 +2585,6 @@ static int mce_device_create(unsigned int cpu) int err; int i, j; - if (!mce_available(&boot_cpu_data)) - return -EIO; - dev = per_cpu(mce_device, cpu); if (dev) return 0; @@ -2665,8 +2679,6 @@ static void mce_reenable_cpu(void) static int mce_cpu_dead(unsigned int cpu) { - mce_intel_hcpu_update(cpu); - /* intentionally ignoring frozen here */ if (!cpuhp_tasks_frozen) cmci_rediscover(); diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 4d8d4bcf915d..72f0695c3dc1 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -746,6 +746,7 @@ static void check_hw_inj_possible(void) wrmsrl_safe(mca_msr_reg(bank, MCA_STATUS), status); rdmsrl_safe(mca_msr_reg(bank, MCA_STATUS), &status); + wrmsrl_safe(mca_msr_reg(bank, MCA_STATUS), 0); if (!status) { hw_injection_possible = false; diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index 52bce533ddcc..399b62e223d2 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -42,15 +42,6 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); /* - * CMCI storm detection backoff counter - * - * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've - * encountered an error. If not, we decrement it by one. We signal the end of - * the CMCI storm when it reaches 0. - */ -static DEFINE_PER_CPU(int, cmci_backoff_cnt); - -/* * cmci_discover_lock protects against parallel discovery attempts * which could race against each other. */ @@ -63,22 +54,26 @@ static DEFINE_RAW_SPINLOCK(cmci_discover_lock); */ static DEFINE_SPINLOCK(cmci_poll_lock); +/* Linux non-storm CMCI threshold (may be overridden by BIOS) */ #define CMCI_THRESHOLD 1 -#define CMCI_POLL_INTERVAL (30 * HZ) -#define CMCI_STORM_INTERVAL (HZ) -#define CMCI_STORM_THRESHOLD 15 -static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); -static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt); -static DEFINE_PER_CPU(unsigned int, cmci_storm_state); - -enum { - CMCI_STORM_NONE, - CMCI_STORM_ACTIVE, - CMCI_STORM_SUBSIDED, -}; +/* + * MCi_CTL2 threshold for each bank when there is no storm. + * Default value for each bank may have been set by BIOS. + */ +static u16 cmci_threshold[MAX_NR_BANKS]; -static atomic_t cmci_storm_on_cpus; +/* + * High threshold to limit CMCI rate during storms. Max supported is + * 0x7FFF. Use this slightly smaller value so it has a distinctive + * signature when some asks "Why am I not seeing all corrected errors?" + * A high threshold is used instead of just disabling CMCI for a + * bank because both corrected and uncorrected errors may be logged + * in the same bank and signalled with CMCI. The threshold only applies + * to corrected errors, so keeping CMCI enabled means that uncorrected + * errors will still be processed in a timely fashion. + */ +#define CMCI_STORM_THRESHOLD 32749 static int cmci_supported(int *banks) { @@ -134,204 +129,166 @@ static bool lmce_supported(void) return tmp & FEAT_CTL_LMCE_ENABLED; } -bool mce_intel_cmci_poll(void) +/* + * Set a new CMCI threshold value. Preserve the state of the + * MCI_CTL2_CMCI_EN bit in case this happens during a + * cmci_rediscover() operation. + */ +static void cmci_set_threshold(int bank, int thresh) { - if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) - return false; - - /* - * Reset the counter if we've logged an error in the last poll - * during the storm. - */ - if (machine_check_poll(0, this_cpu_ptr(&mce_banks_owned))) - this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); - else - this_cpu_dec(cmci_backoff_cnt); + unsigned long flags; + u64 val; - return true; + raw_spin_lock_irqsave(&cmci_discover_lock, flags); + rdmsrl(MSR_IA32_MCx_CTL2(bank), val); + val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; + wrmsrl(MSR_IA32_MCx_CTL2(bank), val | thresh); + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); } -void mce_intel_hcpu_update(unsigned long cpu) +void mce_intel_handle_storm(int bank, bool on) { - if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE) - atomic_dec(&cmci_storm_on_cpus); + if (on) + cmci_set_threshold(bank, CMCI_STORM_THRESHOLD); + else + cmci_set_threshold(bank, cmci_threshold[bank]); +} - per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; +/* + * The interrupt handler. This is called on every event. + * Just call the poller directly to log any events. + * This could in theory increase the threshold under high load, + * but doesn't for now. + */ +static void intel_threshold_interrupt(void) +{ + machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); } -static void cmci_toggle_interrupt_mode(bool on) +/* + * Check all the reasons why current CPU cannot claim + * ownership of a bank. + * 1: CPU already owns this bank + * 2: BIOS owns this bank + * 3: Some other CPU owns this bank + */ +static bool cmci_skip_bank(int bank, u64 *val) { - unsigned long flags, *owned; - int bank; - u64 val; + unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned); - raw_spin_lock_irqsave(&cmci_discover_lock, flags); - owned = this_cpu_ptr(mce_banks_owned); - for_each_set_bit(bank, owned, MAX_NR_BANKS) { - rdmsrl(MSR_IA32_MCx_CTL2(bank), val); + if (test_bit(bank, owned)) + return true; - if (on) - val |= MCI_CTL2_CMCI_EN; - else - val &= ~MCI_CTL2_CMCI_EN; + /* Skip banks in firmware first mode */ + if (test_bit(bank, mce_banks_ce_disabled)) + return true; - wrmsrl(MSR_IA32_MCx_CTL2(bank), val); - } - raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); -} + rdmsrl(MSR_IA32_MCx_CTL2(bank), *val); -unsigned long cmci_intel_adjust_timer(unsigned long interval) -{ - if ((this_cpu_read(cmci_backoff_cnt) > 0) && - (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) { - mce_notify_irq(); - return CMCI_STORM_INTERVAL; + /* Already owned by someone else? */ + if (*val & MCI_CTL2_CMCI_EN) { + clear_bit(bank, owned); + __clear_bit(bank, this_cpu_ptr(mce_poll_banks)); + return true; } - switch (__this_cpu_read(cmci_storm_state)) { - case CMCI_STORM_ACTIVE: - - /* - * We switch back to interrupt mode once the poll timer has - * silenced itself. That means no events recorded and the timer - * interval is back to our poll interval. - */ - __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); - if (!atomic_sub_return(1, &cmci_storm_on_cpus)) - pr_notice("CMCI storm subsided: switching to interrupt mode\n"); + return false; +} - fallthrough; +/* + * Decide which CMCI interrupt threshold to use: + * 1: If this bank is in storm mode from whichever CPU was + * the previous owner, stay in storm mode. + * 2: If ignoring any threshold set by BIOS, set Linux default + * 3: Try to honor BIOS threshold (unless buggy BIOS set it at zero). + */ +static u64 cmci_pick_threshold(u64 val, int *bios_zero_thresh) +{ + if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD) + return val; - case CMCI_STORM_SUBSIDED: + if (!mca_cfg.bios_cmci_threshold) { + val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; + val |= CMCI_THRESHOLD; + } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { /* - * We wait for all CPUs to go back to SUBSIDED state. When that - * happens we switch back to interrupt mode. + * If bios_cmci_threshold boot option was specified + * but the threshold is zero, we'll try to initialize + * it to 1. */ - if (!atomic_read(&cmci_storm_on_cpus)) { - __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); - cmci_toggle_interrupt_mode(true); - cmci_recheck(); - } - return CMCI_POLL_INTERVAL; - default: - - /* We have shiny weather. Let the poll do whatever it thinks. */ - return interval; + *bios_zero_thresh = 1; + val |= CMCI_THRESHOLD; } + + return val; } -static bool cmci_storm_detect(void) +/* + * Try to claim ownership of a bank. + */ +static void cmci_claim_bank(int bank, u64 val, int bios_zero_thresh, int *bios_wrong_thresh) { - unsigned int cnt = __this_cpu_read(cmci_storm_cnt); - unsigned long ts = __this_cpu_read(cmci_time_stamp); - unsigned long now = jiffies; - int r; + struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); - if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE) - return true; + val |= MCI_CTL2_CMCI_EN; + wrmsrl(MSR_IA32_MCx_CTL2(bank), val); + rdmsrl(MSR_IA32_MCx_CTL2(bank), val); - if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) { - cnt++; - } else { - cnt = 1; - __this_cpu_write(cmci_time_stamp, now); + /* If the enable bit did not stick, this bank should be polled. */ + if (!(val & MCI_CTL2_CMCI_EN)) { + WARN_ON(!test_bit(bank, this_cpu_ptr(mce_poll_banks))); + storm->banks[bank].poll_only = true; + return; } - __this_cpu_write(cmci_storm_cnt, cnt); - if (cnt <= CMCI_STORM_THRESHOLD) - return false; - - cmci_toggle_interrupt_mode(false); - __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); - r = atomic_add_return(1, &cmci_storm_on_cpus); - mce_timer_kick(CMCI_STORM_INTERVAL); - this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); + /* This CPU successfully set the enable bit. */ + set_bit(bank, (void *)this_cpu_ptr(&mce_banks_owned)); - if (r == 1) - pr_notice("CMCI storm detected: switching to poll mode\n"); - return true; -} + if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD) { + pr_notice("CPU%d BANK%d CMCI inherited storm\n", smp_processor_id(), bank); + mce_inherit_storm(bank); + cmci_storm_begin(bank); + } else { + __clear_bit(bank, this_cpu_ptr(mce_poll_banks)); + } -/* - * The interrupt handler. This is called on every event. - * Just call the poller directly to log any events. - * This could in theory increase the threshold under high load, - * but doesn't for now. - */ -static void intel_threshold_interrupt(void) -{ - if (cmci_storm_detect()) - return; + /* + * We are able to set thresholds for some banks that + * had a threshold of 0. This means the BIOS has not + * set the thresholds properly or does not work with + * this boot option. Note down now and report later. + */ + if (mca_cfg.bios_cmci_threshold && bios_zero_thresh && + (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) + *bios_wrong_thresh = 1; - machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); + /* Save default threshold for each bank */ + if (cmci_threshold[bank] == 0) + cmci_threshold[bank] = val & MCI_CTL2_CMCI_THRESHOLD_MASK; } /* * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks * on this CPU. Use the algorithm recommended in the SDM to discover shared - * banks. + * banks. Called during initial bootstrap, and also for hotplug CPU operations + * to rediscover/reassign machine check banks. */ static void cmci_discover(int banks) { - unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned); + int bios_wrong_thresh = 0; unsigned long flags; int i; - int bios_wrong_thresh = 0; raw_spin_lock_irqsave(&cmci_discover_lock, flags); for (i = 0; i < banks; i++) { u64 val; int bios_zero_thresh = 0; - if (test_bit(i, owned)) + if (cmci_skip_bank(i, &val)) continue; - /* Skip banks in firmware first mode */ - if (test_bit(i, mce_banks_ce_disabled)) - continue; - - rdmsrl(MSR_IA32_MCx_CTL2(i), val); - - /* Already owned by someone else? */ - if (val & MCI_CTL2_CMCI_EN) { - clear_bit(i, owned); - __clear_bit(i, this_cpu_ptr(mce_poll_banks)); - continue; - } - - if (!mca_cfg.bios_cmci_threshold) { - val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; - val |= CMCI_THRESHOLD; - } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { - /* - * If bios_cmci_threshold boot option was specified - * but the threshold is zero, we'll try to initialize - * it to 1. - */ - bios_zero_thresh = 1; - val |= CMCI_THRESHOLD; - } - - val |= MCI_CTL2_CMCI_EN; - wrmsrl(MSR_IA32_MCx_CTL2(i), val); - rdmsrl(MSR_IA32_MCx_CTL2(i), val); - - /* Did the enable bit stick? -- the bank supports CMCI */ - if (val & MCI_CTL2_CMCI_EN) { - set_bit(i, owned); - __clear_bit(i, this_cpu_ptr(mce_poll_banks)); - /* - * We are able to set thresholds for some banks that - * had a threshold of 0. This means the BIOS has not - * set the thresholds properly or does not work with - * this boot option. Note down now and report later. - */ - if (mca_cfg.bios_cmci_threshold && bios_zero_thresh && - (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) - bios_wrong_thresh = 1; - } else { - WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks))); - } + val = cmci_pick_threshold(val, &bios_zero_thresh); + cmci_claim_bank(i, val, bios_zero_thresh, &bios_wrong_thresh); } raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) { @@ -370,6 +327,9 @@ static void __cmci_disable_bank(int bank) val &= ~MCI_CTL2_CMCI_EN; wrmsrl(MSR_IA32_MCx_CTL2(bank), val); __clear_bit(bank, this_cpu_ptr(mce_banks_owned)); + + if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD) + cmci_storm_end(bank); } /* diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index e13a26c9c0ac..01f8f03969e6 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -41,9 +41,7 @@ struct dentry *mce_get_debugfs_dir(void); extern mce_banks_t mce_banks_ce_disabled; #ifdef CONFIG_X86_MCE_INTEL -unsigned long cmci_intel_adjust_timer(unsigned long interval); -bool mce_intel_cmci_poll(void); -void mce_intel_hcpu_update(unsigned long cpu); +void mce_intel_handle_storm(int bank, bool on); void cmci_disable_bank(int bank); void intel_init_cmci(void); void intel_init_lmce(void); @@ -51,9 +49,7 @@ void intel_clear_lmce(void); bool intel_filter_mce(struct mce *m); bool intel_mce_usable_address(struct mce *m); #else -# define cmci_intel_adjust_timer mce_adjust_timer_default -static inline bool mce_intel_cmci_poll(void) { return false; } -static inline void mce_intel_hcpu_update(unsigned long cpu) { } +static inline void mce_intel_handle_storm(int bank, bool on) { } static inline void cmci_disable_bank(int bank) { } static inline void intel_init_cmci(void) { } static inline void intel_init_lmce(void) { } @@ -62,7 +58,63 @@ static inline bool intel_filter_mce(struct mce *m) { return false; } static inline bool intel_mce_usable_address(struct mce *m) { return false; } #endif -void mce_timer_kick(unsigned long interval); +void mce_timer_kick(bool storm); + +#ifdef CONFIG_X86_MCE_THRESHOLD +void cmci_storm_begin(unsigned int bank); +void cmci_storm_end(unsigned int bank); +void mce_track_storm(struct mce *mce); +void mce_inherit_storm(unsigned int bank); +bool mce_get_storm_mode(void); +void mce_set_storm_mode(bool storm); +#else +static inline void cmci_storm_begin(unsigned int bank) {} +static inline void cmci_storm_end(unsigned int bank) {} +static inline void mce_track_storm(struct mce *mce) {} +static inline void mce_inherit_storm(unsigned int bank) {} +static inline bool mce_get_storm_mode(void) { return false; } +static inline void mce_set_storm_mode(bool storm) {} +#endif + +/* + * history: Bitmask tracking errors occurrence. Each set bit + * represents an error seen. + * + * timestamp: Last time (in jiffies) that the bank was polled. + * in_storm_mode: Is this bank in storm mode? + * poll_only: Bank does not support CMCI, skip storm tracking. + */ +struct storm_bank { + u64 history; + u64 timestamp; + bool in_storm_mode; + bool poll_only; +}; + +#define NUM_HISTORY_BITS (sizeof(u64) * BITS_PER_BYTE) + +/* How many errors within the history buffer mark the start of a storm. */ +#define STORM_BEGIN_THRESHOLD 5 + +/* + * How many polls of machine check bank without an error before declaring + * the storm is over. Since it is tracked by the bitmasks in the history + * field of struct storm_bank the mask is 30 bits [0 ... 29]. + */ +#define STORM_END_POLL_THRESHOLD 29 + +/* + * banks: per-cpu, per-bank details + * stormy_bank_count: count of MC banks in storm state + * poll_mode: CPU is in poll mode + */ +struct mca_storm_desc { + struct storm_bank banks[MAX_NR_BANKS]; + u8 stormy_bank_count; + bool poll_mode; +}; + +DECLARE_PER_CPU(struct mca_storm_desc, storm_desc); #ifdef CONFIG_ACPI_APEI int apei_write_mce(struct mce *m); diff --git a/arch/x86/kernel/cpu/mce/threshold.c b/arch/x86/kernel/cpu/mce/threshold.c index ef4e7bb5fd88..89e31e1e5c9c 100644 --- a/arch/x86/kernel/cpu/mce/threshold.c +++ b/arch/x86/kernel/cpu/mce/threshold.c @@ -29,3 +29,118 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_threshold) trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR); apic_eoi(); } + +DEFINE_PER_CPU(struct mca_storm_desc, storm_desc); + +void mce_inherit_storm(unsigned int bank) +{ + struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); + + /* + * Previous CPU owning this bank had put it into storm mode, + * but the precise history of that storm is unknown. Assume + * the worst (all recent polls of the bank found a valid error + * logged). This will avoid the new owner prematurely declaring + * the storm has ended. + */ + storm->banks[bank].history = ~0ull; + storm->banks[bank].timestamp = jiffies; +} + +bool mce_get_storm_mode(void) +{ + return __this_cpu_read(storm_desc.poll_mode); +} + +void mce_set_storm_mode(bool storm) +{ + __this_cpu_write(storm_desc.poll_mode, storm); +} + +static void mce_handle_storm(unsigned int bank, bool on) +{ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + mce_intel_handle_storm(bank, on); + break; + } +} + +void cmci_storm_begin(unsigned int bank) +{ + struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); + + __set_bit(bank, this_cpu_ptr(mce_poll_banks)); + storm->banks[bank].in_storm_mode = true; + + /* + * If this is the first bank on this CPU to enter storm mode + * start polling. + */ + if (++storm->stormy_bank_count == 1) + mce_timer_kick(true); +} + +void cmci_storm_end(unsigned int bank) +{ + struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); + + __clear_bit(bank, this_cpu_ptr(mce_poll_banks)); + storm->banks[bank].history = 0; + storm->banks[bank].in_storm_mode = false; + + /* If no banks left in storm mode, stop polling. */ + if (!this_cpu_dec_return(storm_desc.stormy_bank_count)) + mce_timer_kick(false); +} + +void mce_track_storm(struct mce *mce) +{ + struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); + unsigned long now = jiffies, delta; + unsigned int shift = 1; + u64 history = 0; + + /* No tracking needed for banks that do not support CMCI */ + if (storm->banks[mce->bank].poll_only) + return; + + /* + * When a bank is in storm mode it is polled once per second and + * the history mask will record about the last minute of poll results. + * If it is not in storm mode, then the bank is only checked when + * there is a CMCI interrupt. Check how long it has been since + * this bank was last checked, and adjust the amount of "shift" + * to apply to history. + */ + if (!storm->banks[mce->bank].in_storm_mode) { + delta = now - storm->banks[mce->bank].timestamp; + shift = (delta + HZ) / HZ; + } + + /* If it has been a long time since the last poll, clear history. */ + if (shift < NUM_HISTORY_BITS) + history = storm->banks[mce->bank].history << shift; + + storm->banks[mce->bank].timestamp = now; + + /* History keeps track of corrected errors. VAL=1 && UC=0 */ + if ((mce->status & MCI_STATUS_VAL) && mce_is_correctable(mce)) + history |= 1; + + storm->banks[mce->bank].history = history; + + if (storm->banks[mce->bank].in_storm_mode) { + if (history & GENMASK_ULL(STORM_END_POLL_THRESHOLD, 0)) + return; + printk_deferred(KERN_NOTICE "CPU%d BANK%d CMCI storm subsided\n", smp_processor_id(), mce->bank); + mce_handle_storm(mce->bank, false); + cmci_storm_end(mce->bank); + } else { + if (hweight64(history) < STORM_BEGIN_THRESHOLD) + return; + printk_deferred(KERN_NOTICE "CPU%d BANK%d CMCI storm detected\n", smp_processor_id(), mce->bank); + mce_handle_storm(mce->bank, true); + cmci_storm_begin(mce->bank); + } +} diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 070426b9895f..857e608af641 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -370,14 +370,14 @@ static __init struct microcode_intel *get_microcode_blob(struct ucode_cpu_info * { struct cpio_data cp; + intel_collect_cpu_info(&uci->cpu_sig); + if (!load_builtin_intel_microcode(&cp)) cp = find_microcode_in_initrd(ucode_path); if (!(cp.data && cp.size)) return NULL; - intel_collect_cpu_info(&uci->cpu_sig); - return scan_microcode(cp.data, cp.size, uci, save); } @@ -410,13 +410,13 @@ void __init load_ucode_intel_bsp(struct early_load_data *ed) { struct ucode_cpu_info uci; - ed->old_rev = intel_get_microcode_revision(); - uci.mc = get_microcode_blob(&uci, false); - if (uci.mc && apply_microcode_early(&uci) == UCODE_UPDATED) - ucode_patch_va = UCODE_BSP_LOADED; + ed->old_rev = uci.cpu_sig.rev; - ed->new_rev = uci.cpu_sig.rev; + if (uci.mc && apply_microcode_early(&uci) == UCODE_UPDATED) { + ucode_patch_va = UCODE_BSP_LOADED; + ed->new_rev = uci.cpu_sig.rev; + } } void load_ucode_intel_ap(void) @@ -457,12 +457,6 @@ static enum ucode_state apply_microcode_late(int cpu) if (ret != UCODE_UPDATED && ret != UCODE_OK) return ret; - if (!cpu && uci->cpu_sig.rev != cur_rev) { - pr_info("Updated to revision 0x%x, date = %04x-%02x-%02x\n", - uci->cpu_sig.rev, mc->hdr.date & 0xffff, mc->hdr.date >> 24, - (mc->hdr.date >> 16) & 0xff); - } - cpu_data(cpu).microcode = uci->cpu_sig.rev; if (!cpu) boot_cpu_data.microcode = uci->cpu_sig.rev; diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 086a2c3aaaa0..f479e5e793a2 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -114,6 +114,28 @@ SYM_CODE_START_NOALIGN(startup_64) /* Form the CR3 value being sure to include the CR3 modifier */ addq $(early_top_pgt - __START_KERNEL_map), %rax + +#ifdef CONFIG_AMD_MEM_ENCRYPT + mov %rax, %rdi + mov %rax, %r14 + + addq phys_base(%rip), %rdi + + /* + * For SEV guests: Verify that the C-bit is correct. A malicious + * hypervisor could lie about the C-bit position to perform a ROP + * attack on the guest by writing to the unencrypted stack and wait for + * the next RET instruction. + */ + call sev_verify_cbit + + /* + * Restore CR3 value without the phys_base which will be added + * below, before writing %cr3. + */ + mov %r14, %rax +#endif + jmp 1f SYM_CODE_END(startup_64) @@ -193,15 +215,6 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) addq phys_base(%rip), %rax /* - * For SEV guests: Verify that the C-bit is correct. A malicious - * hypervisor could lie about the C-bit position to perform a ROP - * attack on the guest by writing to the unencrypted stack and wait for - * the next RET instruction. - */ - movq %rax, %rdi - call sev_verify_cbit - - /* * Switch to new page-table * * For the boot CPU this switches to early_top_pgt which still has the @@ -255,6 +268,22 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) testl $X2APIC_ENABLE, %eax jnz .Lread_apicid_msr +#ifdef CONFIG_X86_X2APIC + /* + * If system is in X2APIC mode then MMIO base might not be + * mapped causing the MMIO read below to fault. Faults can't + * be handled at that point. + */ + cmpl $0, x2apic_mode(%rip) + jz .Lread_apicid_mmio + + /* Force the AP into X2APIC mode. */ + orl $X2APIC_ENABLE, %eax + wrmsr + jmp .Lread_apicid_msr +#endif + +.Lread_apicid_mmio: /* Read the APIC ID from the fix-mapped MMIO space. */ movq apic_mmio_base(%rip), %rcx addq $APIC_ID, %rcx diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 8857abc706e4..660b601f1d6c 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -121,7 +121,7 @@ static const __initconst struct idt_data def_idts[] = { static const struct idt_data ia32_idt[] __initconst = { #if defined(CONFIG_IA32_EMULATION) - SYSG(IA32_SYSCALL_VECTOR, entry_INT80_compat), + SYSG(IA32_SYSCALL_VECTOR, asm_int80_emulation), #elif defined(CONFIG_X86_32) SYSG(IA32_SYSCALL_VECTOR, entry_INT80_32), #endif diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index e8babebad7b8..a0ce46c0a2d8 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -576,7 +576,8 @@ static void kprobe_emulate_call_indirect(struct kprobe *p, struct pt_regs *regs) { unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg]; - int3_emulate_call(regs, regs_get_register(regs, offs)); + int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + p->ainsn.size); + int3_emulate_jmp(regs, regs_get_register(regs, offs)); } NOKPROBE_SYMBOL(kprobe_emulate_call_indirect); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 0ddb3bd0f1aa..c461c1a4b6af 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -803,8 +803,8 @@ extern bool __raw_callee_save___kvm_vcpu_is_preempted(long); "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax)\n\t" \ "setne %al\n\t" -DEFINE_PARAVIRT_ASM(__raw_callee_save___kvm_vcpu_is_preempted, - PV_VCPU_PREEMPTED_ASM, .text); +DEFINE_ASM_FUNC(__raw_callee_save___kvm_vcpu_is_preempted, + PV_VCPU_PREEMPTED_ASM, .text); #endif static void __init kvm_guest_init(void) diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 5f71a0cf4399..e18914c0e38a 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -276,7 +276,7 @@ int module_finalize(const Elf_Ehdr *hdr, struct module *me) { const Elf_Shdr *s, *alt = NULL, *locks = NULL, - *para = NULL, *orc = NULL, *orc_ip = NULL, + *orc = NULL, *orc_ip = NULL, *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL, *calls = NULL, *cfi = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; @@ -286,8 +286,6 @@ int module_finalize(const Elf_Ehdr *hdr, alt = s; if (!strcmp(".smp_locks", secstrings + s->sh_name)) locks = s; - if (!strcmp(".parainstructions", secstrings + s->sh_name)) - para = s; if (!strcmp(".orc_unwind", secstrings + s->sh_name)) orc = s; if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name)) @@ -304,14 +302,6 @@ int module_finalize(const Elf_Ehdr *hdr, ibt_endbr = s; } - /* - * See alternative_instructions() for the ordering rules between the - * various patching types. - */ - if (para) { - void *pseg = (void *)para->sh_addr; - apply_paravirt(pseg, pseg + para->sh_size); - } if (retpolines || cfi) { void *rseg = NULL, *cseg = NULL; unsigned int rsize = 0, csize = 0; @@ -341,7 +331,7 @@ int module_finalize(const Elf_Ehdr *hdr, void *aseg = (void *)alt->sh_addr; apply_alternatives(aseg, aseg + alt->sh_size); } - if (calls || para) { + if (calls || alt) { struct callthunk_sites cs = {}; if (calls) { @@ -349,9 +339,9 @@ int module_finalize(const Elf_Ehdr *hdr, cs.call_end = (void *)calls->sh_addr + calls->sh_size; } - if (para) { - cs.pv_start = (void *)para->sh_addr; - cs.pv_end = (void *)para->sh_addr + para->sh_size; + if (alt) { + cs.alt_start = (void *)alt->sh_addr; + cs.alt_end = (void *)alt->sh_addr + alt->sh_size; } callthunks_patch_module_calls(&cs, me); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 97f1436c1a20..5358d43886ad 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -34,14 +34,8 @@ #include <asm/io_bitmap.h> #include <asm/gsseg.h> -/* - * nop stub, which must not clobber anything *including the stack* to - * avoid confusing the entry prologues. - */ -DEFINE_PARAVIRT_ASM(_paravirt_nop, "", .entry.text); - /* stub always returning 0. */ -DEFINE_PARAVIRT_ASM(paravirt_ret0, "xor %eax,%eax", .entry.text); +DEFINE_ASM_FUNC(paravirt_ret0, "xor %eax,%eax", .entry.text); void __init default_banner(void) { @@ -49,26 +43,12 @@ void __init default_banner(void) pv_info.name); } -/* Undefined instruction for dealing with missing ops pointers. */ -noinstr void paravirt_BUG(void) -{ - BUG(); -} - -static unsigned paravirt_patch_call(void *insn_buff, const void *target, - unsigned long addr, unsigned len) -{ - __text_gen_insn(insn_buff, CALL_INSN_OPCODE, - (void *)addr, target, CALL_INSN_SIZE); - return CALL_INSN_SIZE; -} - #ifdef CONFIG_PARAVIRT_XXL -DEFINE_PARAVIRT_ASM(_paravirt_ident_64, "mov %rdi, %rax", .text); -DEFINE_PARAVIRT_ASM(pv_native_save_fl, "pushf; pop %rax", .noinstr.text); -DEFINE_PARAVIRT_ASM(pv_native_irq_disable, "cli", .noinstr.text); -DEFINE_PARAVIRT_ASM(pv_native_irq_enable, "sti", .noinstr.text); -DEFINE_PARAVIRT_ASM(pv_native_read_cr2, "mov %cr2, %rax", .noinstr.text); +DEFINE_ASM_FUNC(_paravirt_ident_64, "mov %rdi, %rax", .text); +DEFINE_ASM_FUNC(pv_native_save_fl, "pushf; pop %rax", .noinstr.text); +DEFINE_ASM_FUNC(pv_native_irq_disable, "cli", .noinstr.text); +DEFINE_ASM_FUNC(pv_native_irq_enable, "sti", .noinstr.text); +DEFINE_ASM_FUNC(pv_native_read_cr2, "mov %cr2, %rax", .noinstr.text); #endif DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key); @@ -85,28 +65,6 @@ static void native_tlb_remove_table(struct mmu_gather *tlb, void *table) tlb_remove_page(tlb, table); } -unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, - unsigned int len) -{ - /* - * Neat trick to map patch type back to the call within the - * corresponding structure. - */ - void *opfunc = *((void **)&pv_ops + type); - unsigned ret; - - if (opfunc == NULL) - /* If there's no function, patch it with paravirt_BUG() */ - ret = paravirt_patch_call(insn_buff, paravirt_BUG, addr, len); - else if (opfunc == _paravirt_nop) - ret = 0; - else - /* Otherwise call the function. */ - ret = paravirt_patch_call(insn_buff, opfunc, addr, len); - - return ret; -} - struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 70472eebe719..c67285824e82 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1234,10 +1234,6 @@ void setup_ghcb(void) if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) return; - /* First make sure the hypervisor talks a supported protocol. */ - if (!sev_es_negotiate_protocol()) - sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); - /* * Check whether the runtime #VC exception handler is active. It uses * the per-CPU GHCB page which is set up by sev_es_init_vc_handling(). @@ -1255,6 +1251,13 @@ void setup_ghcb(void) } /* + * Make sure the hypervisor talks a supported protocol. + * This gets called only in the BSP boot phase. + */ + if (!sev_es_negotiate_protocol()) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); + + /* * Clear the boot_ghcb. The first exception comes in before the bss * section is cleared. */ diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 54a5596adaa6..a349dbfc6d5a 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -267,19 +267,6 @@ SECTIONS } #endif - /* - * start address and size of operations which during runtime - * can be patched with virtualization friendly instructions or - * baremetal native ones. Think page table operations. - * Details in paravirt_types.h - */ - . = ALIGN(8); - .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { - __parainstructions = .; - *(.parainstructions) - __parainstructions_end = .; - } - #ifdef CONFIG_RETPOLINE /* * List of instructions that call/jmp/jcc to retpoline thunks diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c index ee8c4c3496ed..eea6ea7f14af 100644 --- a/arch/x86/kvm/debugfs.c +++ b/arch/x86/kvm/debugfs.c @@ -182,6 +182,7 @@ static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file) } static const struct file_operations mmu_rmaps_stat_fops = { + .owner = THIS_MODULE, .open = kvm_mmu_rmaps_stat_open, .read = seq_read, .llseek = seq_lseek, diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 238afd7335e4..4943f6b2bbee 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -2388,7 +2388,7 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *h if (!eventfd) return HV_STATUS_INVALID_PORT_ID; - eventfd_signal(eventfd, 1); + eventfd_signal(eventfd); return HV_STATUS_SUCCESS; } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 4900c078045a..6ee925d66648 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2972,6 +2972,25 @@ static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm) set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux); } + + /* + * For SEV-ES, accesses to MSR_IA32_XSS should not be intercepted if + * the host/guest supports its use. + * + * guest_can_use() checks a number of requirements on the host/guest to + * ensure that MSR_IA32_XSS is available, but it might report true even + * if X86_FEATURE_XSAVES isn't configured in the guest to ensure host + * MSR_IA32_XSS is always properly restored. For SEV-ES, it is better + * to further check that the guest CPUID actually supports + * X86_FEATURE_XSAVES so that accesses to MSR_IA32_XSS by misbehaved + * guests will still get intercepted and caught in the normal + * kvm_emulate_rdmsr()/kvm_emulated_wrmsr() paths. + */ + if (guest_can_use(vcpu, X86_FEATURE_XSAVES) && + guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 1, 1); + else + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 0, 0); } void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 712146312358..a8bd4e909a1e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -103,6 +103,7 @@ static const struct svm_direct_access_msrs { { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, { .index = MSR_IA32_LASTINTFROMIP, .always = false }, { .index = MSR_IA32_LASTINTTOIP, .always = false }, + { .index = MSR_IA32_XSS, .always = false }, { .index = MSR_EFER, .always = false }, { .index = MSR_IA32_CR_PAT, .always = false }, { .index = MSR_AMD64_SEV_ES_GHCB, .always = true }, @@ -1855,15 +1856,17 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) bool old_paging = is_paging(vcpu); #ifdef CONFIG_X86_64 - if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) { + if (vcpu->arch.efer & EFER_LME) { if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { vcpu->arch.efer |= EFER_LMA; - svm->vmcb->save.efer |= EFER_LMA | EFER_LME; + if (!vcpu->arch.guest_state_protected) + svm->vmcb->save.efer |= EFER_LMA | EFER_LME; } if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) { vcpu->arch.efer &= ~EFER_LMA; - svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); + if (!vcpu->arch.guest_state_protected) + svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); } } #endif diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index be67ab7fdd10..c409f934c377 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -30,7 +30,7 @@ #define IOPM_SIZE PAGE_SIZE * 3 #define MSRPM_SIZE PAGE_SIZE * 2 -#define MAX_DIRECT_ACCESS_MSRS 46 +#define MAX_DIRECT_ACCESS_MSRS 47 #define MSRPM_OFFSETS 32 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; extern bool npt_enabled; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2c924075f6f1..1a3aaa7dafae 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5518,8 +5518,8 @@ static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { - return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region, - sizeof(guest_xsave->region)); + kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region, + sizeof(guest_xsave->region)); } static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, @@ -13031,7 +13031,10 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) if (vcpu->arch.guest_state_protected) return true; - return vcpu->arch.preempted_in_kernel; + if (vcpu != kvm_get_running_vcpu()) + return vcpu->arch.preempted_in_kernel; + + return static_call(kvm_x86_get_cpl)(vcpu) == 0; } unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index e53fad915a62..523bb6df5ac9 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -2088,7 +2088,7 @@ static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r) if (ret < 0 && ret != -ENOTCONN) return false; } else { - eventfd_signal(evtchnfd->deliver.eventfd.ctx, 1); + eventfd_signal(evtchnfd->deliver.eventfd.ctx); } *r = 0; diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c index cea25ca8b8cf..c9dae65ac01b 100644 --- a/arch/x86/lib/csum-partial_64.c +++ b/arch/x86/lib/csum-partial_64.c @@ -11,26 +11,23 @@ #include <asm/checksum.h> #include <asm/word-at-a-time.h> -static inline unsigned short from32to16(unsigned a) +static inline __wsum csum_finalize_sum(u64 temp64) { - unsigned short b = a >> 16; - asm("addw %w2,%w0\n\t" - "adcw $0,%w0\n" - : "=r" (b) - : "0" (b), "r" (a)); - return b; + return (__force __wsum)((temp64 + ror64(temp64, 32)) >> 32); } -static inline __wsum csum_tail(u64 temp64, int odd) +static inline unsigned long update_csum_40b(unsigned long sum, const unsigned long m[5]) { - unsigned int result; - - result = add32_with_carry(temp64 >> 32, temp64 & 0xffffffff); - if (unlikely(odd)) { - result = from32to16(result); - result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); - } - return (__force __wsum)result; + asm("addq %1,%0\n\t" + "adcq %2,%0\n\t" + "adcq %3,%0\n\t" + "adcq %4,%0\n\t" + "adcq %5,%0\n\t" + "adcq $0,%0" + :"+r" (sum) + :"m" (m[0]), "m" (m[1]), "m" (m[2]), + "m" (m[3]), "m" (m[4])); + return sum; } /* @@ -47,64 +44,32 @@ static inline __wsum csum_tail(u64 temp64, int odd) __wsum csum_partial(const void *buff, int len, __wsum sum) { u64 temp64 = (__force u64)sum; - unsigned odd; - odd = 1 & (unsigned long) buff; - if (unlikely(odd)) { - if (unlikely(len == 0)) - return sum; - temp64 = ror32((__force u32)sum, 8); - temp64 += (*(unsigned char *)buff << 8); - len--; - buff++; + /* Do two 40-byte chunks in parallel to get better ILP */ + if (likely(len >= 80)) { + u64 temp64_2 = 0; + do { + temp64 = update_csum_40b(temp64, buff); + temp64_2 = update_csum_40b(temp64_2, buff + 40); + buff += 80; + len -= 80; + } while (len >= 80); + + asm("addq %1,%0\n\t" + "adcq $0,%0" + :"+r" (temp64): "r" (temp64_2)); } /* - * len == 40 is the hot case due to IPv6 headers, but annotating it likely() - * has noticeable negative affect on codegen for all other cases with - * minimal performance benefit here. + * len == 40 is the hot case due to IPv6 headers, so return + * early for that exact case without checking the tail bytes. */ - if (len == 40) { - asm("addq 0*8(%[src]),%[res]\n\t" - "adcq 1*8(%[src]),%[res]\n\t" - "adcq 2*8(%[src]),%[res]\n\t" - "adcq 3*8(%[src]),%[res]\n\t" - "adcq 4*8(%[src]),%[res]\n\t" - "adcq $0,%[res]" - : [res] "+r"(temp64) - : [src] "r"(buff), "m"(*(const char(*)[40])buff)); - return csum_tail(temp64, odd); - } - if (unlikely(len >= 64)) { - /* - * Extra accumulators for better ILP in the loop. - */ - u64 tmp_accum, tmp_carries; - - asm("xorl %k[tmp_accum],%k[tmp_accum]\n\t" - "xorl %k[tmp_carries],%k[tmp_carries]\n\t" - "subl $64, %[len]\n\t" - "1:\n\t" - "addq 0*8(%[src]),%[res]\n\t" - "adcq 1*8(%[src]),%[res]\n\t" - "adcq 2*8(%[src]),%[res]\n\t" - "adcq 3*8(%[src]),%[res]\n\t" - "adcl $0,%k[tmp_carries]\n\t" - "addq 4*8(%[src]),%[tmp_accum]\n\t" - "adcq 5*8(%[src]),%[tmp_accum]\n\t" - "adcq 6*8(%[src]),%[tmp_accum]\n\t" - "adcq 7*8(%[src]),%[tmp_accum]\n\t" - "adcl $0,%k[tmp_carries]\n\t" - "addq $64, %[src]\n\t" - "subl $64, %[len]\n\t" - "jge 1b\n\t" - "addq %[tmp_accum],%[res]\n\t" - "adcq %[tmp_carries],%[res]\n\t" - "adcq $0,%[res]" - : [tmp_accum] "=&r"(tmp_accum), - [tmp_carries] "=&r"(tmp_carries), [res] "+r"(temp64), - [len] "+r"(len), [src] "+r"(buff) - : "m"(*(const char *)buff)); + if (len >= 40) { + temp64 = update_csum_40b(temp64, buff); + len -= 40; + if (!len) + return csum_finalize_sum(temp64); + buff += 40; } if (len & 32) { @@ -143,7 +108,7 @@ __wsum csum_partial(const void *buff, int len, __wsum sum) : [res] "+r"(temp64) : [trail] "r"(trail)); } - return csum_tail(temp64, odd); + return csum_finalize_sum(temp64); } EXPORT_SYMBOL(csum_partial); diff --git a/arch/x86/lib/misc.c b/arch/x86/lib/misc.c index 92cd8ecc3a2c..40b81c338ae5 100644 --- a/arch/x86/lib/misc.c +++ b/arch/x86/lib/misc.c @@ -8,7 +8,7 @@ */ int num_digits(int val) { - int m = 10; + long long m = 10; int d = 1; if (val < 0) { diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ab778eac1952..679b09cfe241 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1370,6 +1370,8 @@ void do_user_addr_fault(struct pt_regs *regs, goto done; } count_vm_vma_lock_event(VMA_LOCK_RETRY); + if (fault & VM_FAULT_MAJOR) + flags |= FAULT_FLAG_TRIED; /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index a68f2dda0948..70b91de2e053 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -32,6 +32,7 @@ #include <asm/msr.h> #include <asm/cmdline.h> #include <asm/sev.h> +#include <asm/ia32.h> #include "mm_internal.h" @@ -481,6 +482,16 @@ void __init sme_early_init(void) */ if (sev_status & MSR_AMD64_SEV_ES_ENABLED) x86_cpuinit.parallel_bringup = false; + + /* + * The VMM is capable of injecting interrupt 0x80 and triggering the + * compatibility syscall path. + * + * By default, the 32-bit emulation is disabled in order to ensure + * the safety of the VM. + */ + if (sev_status & MSR_AMD64_SEV_ENABLED) + ia32_disable(); } void __init mem_encrypt_free_decrypted_mem(void) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 8c10d9abc239..e89e415aa743 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -3025,3 +3025,49 @@ void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp #endif WARN(1, "verification of programs using bpf_throw should have failed\n"); } + +void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old) +{ + u8 *old_addr, *new_addr, *old_bypass_addr; + int ret; + + old_bypass_addr = old ? NULL : poke->bypass_addr; + old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL; + new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL; + + /* + * On program loading or teardown, the program's kallsym entry + * might not be in place, so we use __bpf_arch_text_poke to skip + * the kallsyms check. + */ + if (new) { + ret = __bpf_arch_text_poke(poke->tailcall_target, + BPF_MOD_JUMP, + old_addr, new_addr); + BUG_ON(ret < 0); + if (!old) { + ret = __bpf_arch_text_poke(poke->tailcall_bypass, + BPF_MOD_JUMP, + poke->bypass_addr, + NULL); + BUG_ON(ret < 0); + } + } else { + ret = __bpf_arch_text_poke(poke->tailcall_bypass, + BPF_MOD_JUMP, + old_bypass_addr, + poke->bypass_addr); + BUG_ON(ret < 0); + /* let other CPUs finish the execution of program + * so that it will not possible to expose them + * to invalid nop, stack unwind, nop state + */ + if (!ret) + synchronize_rcu(); + ret = __bpf_arch_text_poke(poke->tailcall_target, + BPF_MOD_JUMP, + old_addr, NULL); + BUG_ON(ret < 0); + } +} diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index d30949e25ebd..a3bae2b24626 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -66,7 +66,7 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { [S_REL] = "^(__init_(begin|end)|" "__x86_cpu_dev_(start|end)|" - "(__parainstructions|__alt_instructions)(_end)?|" + "__alt_instructions(_end)?|" "(__iommu_table|__apicdrivers|__smp_locks)(_end)?|" "__(start|end)_pci_.*|" #if CONFIG_FW_LOADER diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 9b1ec5d8c99c..a65fc2ae15b4 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -9,6 +9,7 @@ config XEN select PARAVIRT_CLOCK select X86_HV_CALLBACK_VECTOR depends on X86_64 || (X86_32 && X86_PAE) + depends on X86_64 || (X86_GENERIC || MPENTIUM4 || MCORE2 || MATOM || MK8) depends on X86_LOCAL_APIC && X86_TSC help This is the Linux Xen port. Enabling this will allow the diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0337392a3121..3c61bb98c10e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -33,9 +33,12 @@ EXPORT_SYMBOL_GPL(hypercall_page); * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info * but during boot it is switched to point to xen_vcpu_info. * The pointer is used in xen_evtchn_do_upcall to acknowledge pending events. + * Make sure that xen_vcpu_info doesn't cross a page boundary by making it + * cache-line aligned (the struct is guaranteed to have a size of 64 bytes, + * which matches the cache line size of 64-bit x86 processors). */ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); -DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); +DEFINE_PER_CPU_ALIGNED(struct vcpu_info, xen_vcpu_info); /* Linux <-> Xen vCPU id mapping */ DEFINE_PER_CPU(uint32_t, xen_vcpu_id); @@ -160,6 +163,7 @@ void xen_vcpu_setup(int cpu) int err; struct vcpu_info *vcpup; + BUILD_BUG_ON(sizeof(*vcpup) > SMP_CACHE_BYTES); BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); /* diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index bbbfdd495ebd..aeb33e0a3f76 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -704,7 +704,7 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_int3, false ), TRAP_ENTRY(exc_overflow, false ), #ifdef CONFIG_IA32_EMULATION - { entry_INT80_compat, xen_entry_INT80_compat, false }, + TRAP_ENTRY(int80_emulation, false ), #endif TRAP_ENTRY(exc_page_fault, false ), TRAP_ENTRY(exc_divide_error, false ), diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 6092fea7d651..39982f955cfe 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -45,7 +45,7 @@ static const typeof(pv_ops) xen_irq_ops __initconst = { /* Initial interrupt flag handling only called while interrupts off. */ .save_fl = __PV_IS_CALLEE_SAVE(paravirt_ret0), .irq_disable = __PV_IS_CALLEE_SAVE(paravirt_nop), - .irq_enable = __PV_IS_CALLEE_SAVE(paravirt_BUG), + .irq_enable = __PV_IS_CALLEE_SAVE(BUG_func), .safe_halt = xen_safe_halt, .halt = xen_halt, diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 9e5e68008785..1a9cd18dfbd3 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -156,7 +156,7 @@ xen_pv_trap asm_xenpv_exc_machine_check #endif /* CONFIG_X86_MCE */ xen_pv_trap asm_exc_simd_coprocessor_error #ifdef CONFIG_IA32_EMULATION -xen_pv_trap entry_INT80_compat +xen_pv_trap asm_int80_emulation #endif xen_pv_trap asm_exc_xen_unknown_trap xen_pv_trap asm_exc_xen_hypervisor_callback diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 408a2aa66c69..a87ab36889e7 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -21,7 +21,7 @@ extern void *xen_initial_gdt; struct trap_info; void xen_copy_trap_info(struct trap_info *traps); -DECLARE_PER_CPU(struct vcpu_info, xen_vcpu_info); +DECLARE_PER_CPU_ALIGNED(struct vcpu_info, xen_vcpu_info); DECLARE_PER_CPU(unsigned long, xen_cr3); DECLARE_PER_CPU(unsigned long, xen_current_cr3); diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 06eefa9c1458..497b5d32f457 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -427,3 +427,5 @@ 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue +457 common statmount sys_statmount +458 common listmount sys_listmount |