diff options
Diffstat (limited to 'arch')
486 files changed, 8088 insertions, 6657 deletions
diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 2676017f42f1..44e7aedac6e8 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -327,7 +327,7 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile index b6c862dff1f6..187cd8df2faf 100644 --- a/arch/alpha/kernel/Makefile +++ b/arch/alpha/kernel/Makefile @@ -3,7 +3,7 @@ # Makefile for the linux kernel. # -extra-y := vmlinux.lds +always-$(KBUILD_BUILTIN) := vmlinux.lds asflags-y := $(KBUILD_CFLAGS) ccflags-y := -Wno-sign-compare diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c index a89ce84371f9..d19e51ec711d 100644 --- a/arch/alpha/kernel/srmcons.c +++ b/arch/alpha/kernel/srmcons.c @@ -69,7 +69,8 @@ srmcons_do_receive_chars(struct tty_port *port) static void srmcons_receive_chars(struct timer_list *t) { - struct srmcons_private *srmconsp = from_timer(srmconsp, t, timer); + struct srmcons_private *srmconsp = timer_container_of(srmconsp, t, + timer); struct tty_port *port = &srmconsp->port; unsigned long flags; int incr = 10; diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 005d9e4d187a..a31bbf5c8bbc 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -144,7 +144,7 @@ #define ARC_AUX_AGU_MOD2 0x5E2 #define ARC_AUX_AGU_MOD3 0x5E3 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <soc/arc/arc_aux.h> diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 592d7fffc223..e615c42b93ba 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -6,7 +6,7 @@ #ifndef _ASM_ARC_ATOMIC_H #define _ASM_ARC_ATOMIC_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <linux/compiler.h> @@ -31,6 +31,6 @@ #include <asm/atomic64-arcv2.h> #endif -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h index 9b5791b85471..73080a664369 100644 --- a/arch/arc/include/asm/atomic64-arcv2.h +++ b/arch/arc/include/asm/atomic64-arcv2.h @@ -137,12 +137,9 @@ ATOMIC64_OPS(xor, xor, xor) #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP -static inline s64 -arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new) +static inline u64 __arch_cmpxchg64_relaxed(volatile void *ptr, u64 old, u64 new) { - s64 prev; - - smp_mb(); + u64 prev; __asm__ __volatile__( "1: llockd %0, [%1] \n" @@ -152,14 +149,12 @@ arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new) " bnz 1b \n" "2: \n" : "=&r"(prev) - : "r"(ptr), "ir"(expected), "r"(new) - : "cc"); /* memory clobber comes from smp_mb() */ - - smp_mb(); + : "r"(ptr), "ir"(old), "r"(new) + : "memory", "cc"); return prev; } -#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg +#define arch_cmpxchg64_relaxed __arch_cmpxchg64_relaxed static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new) { diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index f5a936496f06..5340c2871392 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -10,7 +10,7 @@ #error only <linux/bitops.h> can be included directly #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <linux/compiler.h> @@ -192,6 +192,6 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x) #include <asm-generic/bitops/le.h> #include <asm-generic/bitops/ext2-atomic-setbit.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h index 4c453ba96c51..171c16021f70 100644 --- a/arch/arc/include/asm/bug.h +++ b/arch/arc/include/asm/bug.h @@ -6,7 +6,7 @@ #ifndef _ASM_ARC_BUG_H #define _ASM_ARC_BUG_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/ptrace.h> @@ -29,6 +29,6 @@ void die(const char *str, struct pt_regs *regs, unsigned long address); #include <asm-generic/bug.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index f0f1fc5d62b6..040a97f4dd82 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -23,7 +23,7 @@ */ #define ARC_UNCACHED_ADDR_SPACE 0xc0000000 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/build_bug.h> @@ -65,7 +65,7 @@ extern int ioc_enable; extern unsigned long perip_base, perip_end; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* Instruction cache related Auxiliary registers */ #define ARC_REG_IC_BCR 0x77 /* Build Config reg */ diff --git a/arch/arc/include/asm/current.h b/arch/arc/include/asm/current.h index 06be89f6f2f0..03ffd005f3fa 100644 --- a/arch/arc/include/asm/current.h +++ b/arch/arc/include/asm/current.h @@ -9,7 +9,7 @@ #ifndef _ASM_ARC_CURRENT_H #define _ASM_ARC_CURRENT_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_ARC_CURR_IN_REG @@ -20,6 +20,6 @@ register struct task_struct *curr_arc asm("gp"); #include <asm-generic/current.h> #endif /* ! CONFIG_ARC_CURR_IN_REG */ -#endif /* ! __ASSEMBLY__ */ +#endif /* ! __ASSEMBLER__ */ #endif /* _ASM_ARC_CURRENT_H */ diff --git a/arch/arc/include/asm/dsp-impl.h b/arch/arc/include/asm/dsp-impl.h index cd5636dfeb6f..fd5fdaad90c1 100644 --- a/arch/arc/include/asm/dsp-impl.h +++ b/arch/arc/include/asm/dsp-impl.h @@ -11,7 +11,7 @@ #define DSP_CTRL_DISABLED_ALL 0 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* clobbers r5 register */ .macro DSP_EARLY_INIT diff --git a/arch/arc/include/asm/dsp.h b/arch/arc/include/asm/dsp.h index f496dbc4640b..eeaaf4e4eabd 100644 --- a/arch/arc/include/asm/dsp.h +++ b/arch/arc/include/asm/dsp.h @@ -7,7 +7,7 @@ #ifndef __ASM_ARC_DSP_H #define __ASM_ARC_DSP_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * DSP-related saved registers - need to be saved only when you are @@ -24,6 +24,6 @@ struct dsp_callee_regs { #endif }; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_ARC_DSP_H */ diff --git a/arch/arc/include/asm/dwarf.h b/arch/arc/include/asm/dwarf.h index a0d5ebe1bc3f..1524c5cf8b59 100644 --- a/arch/arc/include/asm/dwarf.h +++ b/arch/arc/include/asm/dwarf.h @@ -6,7 +6,7 @@ #ifndef _ASM_ARC_DWARF_H #define _ASM_ARC_DWARF_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #ifdef ARC_DW2_UNWIND_AS_CFI @@ -38,6 +38,6 @@ #endif /* !ARC_DW2_UNWIND_AS_CFI */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_ARC_DWARF_H */ diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index 38c35722cebf..f453af251a1a 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -13,7 +13,7 @@ #include <asm/processor.h> /* For VMALLOC_START */ #include <asm/mmu.h> -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #ifdef CONFIG_ISA_ARCOMPACT #include <asm/entry-compact.h> /* ISA specific bits */ @@ -146,7 +146,7 @@ #endif /* CONFIG_ARC_CURR_IN_REG */ -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ extern void do_signal(struct pt_regs *); extern void do_notify_resume(struct pt_regs *); diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h index fb3c21f1a238..30aea562f8aa 100644 --- a/arch/arc/include/asm/irqflags-arcv2.h +++ b/arch/arc/include/asm/irqflags-arcv2.h @@ -50,7 +50,7 @@ #define ISA_INIT_STATUS_BITS (STATUS_IE_MASK | __AD_ENB | \ (ARCV2_IRQ_DEF_PRIO << 1)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Save IRQ state and disable IRQs @@ -170,6 +170,6 @@ static inline void arc_softirq_clear(int irq) seti .endm -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h index 936a2f21f315..85c2f6bcde0c 100644 --- a/arch/arc/include/asm/irqflags-compact.h +++ b/arch/arc/include/asm/irqflags-compact.h @@ -40,7 +40,7 @@ #define ISA_INIT_STATUS_BITS STATUS_IE_MASK -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /****************************************************************** * IRQ Control Macros @@ -196,6 +196,6 @@ static inline int arch_irqs_disabled(void) flag \scratch .endm -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h index a339223d9e05..66ead75784d9 100644 --- a/arch/arc/include/asm/jump_label.h +++ b/arch/arc/include/asm/jump_label.h @@ -2,7 +2,7 @@ #ifndef _ASM_ARC_JUMP_LABEL_H #define _ASM_ARC_JUMP_LABEL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/stringify.h> #include <linux/types.h> @@ -68,5 +68,5 @@ struct jump_entry { jump_label_t key; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h index 8a3fb71e9cfa..ba3cb65b5eaa 100644 --- a/arch/arc/include/asm/linkage.h +++ b/arch/arc/include/asm/linkage.h @@ -12,7 +12,7 @@ #define __ALIGN .align 4 #define __ALIGN_STR __stringify(__ALIGN) -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro ST2 e, o, off #ifdef CONFIG_ARC_HAS_LL64 @@ -61,7 +61,7 @@ CFI_ENDPROC ASM_NL \ .size name, .-name -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ #ifdef CONFIG_ARC_HAS_ICCM #define __arcfp_code __section(".text.arcfp") @@ -75,6 +75,6 @@ #define __arcfp_data __section(".data") #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/mmu-arcv2.h b/arch/arc/include/asm/mmu-arcv2.h index 41412642f279..5e5482026ac9 100644 --- a/arch/arc/include/asm/mmu-arcv2.h +++ b/arch/arc/include/asm/mmu-arcv2.h @@ -69,7 +69,7 @@ #define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct mm_struct; extern int pae40_exist_but_not_enab(void); @@ -100,6 +100,6 @@ static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd) sr \reg, [ARC_REG_PID] .endm -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index 4ae2db59d494..e3b35ceab582 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -6,7 +6,7 @@ #ifndef _ASM_ARC_MMU_H #define _ASM_ARC_MMU_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/threads.h> /* NR_CPUS */ diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index def0dfb95b43..9720fe6b2c24 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -19,7 +19,7 @@ #endif /* CONFIG_ARC_HAS_PAE40 */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define clear_page(paddr) memset((paddr), 0, PAGE_SIZE) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) @@ -136,6 +136,6 @@ static inline unsigned long virt_to_pfn(const void *kaddr) #include <asm-generic/memory_model.h> /* page_to_pfn, pfn_to_page */ #include <asm-generic/getorder.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h index 8ebec1b21d24..4630c5acca05 100644 --- a/arch/arc/include/asm/pgtable-bits-arcv2.h +++ b/arch/arc/include/asm/pgtable-bits-arcv2.h @@ -75,7 +75,7 @@ * This is to enable COW mechanism */ /* xwr */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define pte_write(pte) (pte_val(pte) & _PAGE_WRITE) #define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY) @@ -130,7 +130,7 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } @@ -142,6 +142,6 @@ PTE_BIT_FUNC(swp_clear_exclusive, &= ~(_PAGE_SWP_EXCLUSIVE)); #include <asm/hugepage.h> #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h index d1ce4b0f1071..c8f9273372c0 100644 --- a/arch/arc/include/asm/pgtable-levels.h +++ b/arch/arc/include/asm/pgtable-levels.h @@ -85,7 +85,7 @@ #define PTRS_PER_PTE BIT(PMD_SHIFT - PAGE_SHIFT) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #if CONFIG_PGTABLE_LEVELS > 3 #include <asm-generic/pgtable-nop4d.h> @@ -181,6 +181,6 @@ #define pmd_leaf(x) (pmd_val(x) & _PAGE_HW_SZ) #endif -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 4cf45a99fd79..bd580e2b62d7 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -19,7 +19,7 @@ */ #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern char empty_zero_page[PAGE_SIZE]; #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) @@ -29,6 +29,6 @@ extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE); /* to cope with aliasing VIPT cache */ #define HAVE_ARCH_UNMAPPED_AREA -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index d606658e2fe7..7f7901ac6643 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -11,7 +11,7 @@ #ifndef __ASM_ARC_PROCESSOR_H #define __ASM_ARC_PROCESSOR_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/ptrace.h> #include <asm/dsp.h> @@ -66,7 +66,7 @@ extern void start_thread(struct pt_regs * regs, unsigned long pc, extern unsigned int __get_wchan(struct task_struct *p); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * Default System Memory Map on ARC diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index cf79df0b2570..f6c052af8f4d 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -10,7 +10,7 @@ #include <uapi/asm/ptrace.h> #include <linux/compiler.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef union { struct { @@ -172,6 +172,6 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, extern int syscall_trace_enter(struct pt_regs *); extern void syscall_trace_exit(struct pt_regs *); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_PTRACE_H */ diff --git a/arch/arc/include/asm/switch_to.h b/arch/arc/include/asm/switch_to.h index 1f85de8288b1..5806106a65f9 100644 --- a/arch/arc/include/asm/switch_to.h +++ b/arch/arc/include/asm/switch_to.h @@ -6,7 +6,7 @@ #ifndef _ASM_ARC_SWITCH_TO_H #define _ASM_ARC_SWITCH_TO_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/sched.h> #include <asm/dsp-impl.h> diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h index 12daaf3a61ea..255d2c774219 100644 --- a/arch/arc/include/asm/thread_info.h +++ b/arch/arc/include/asm/thread_info.h @@ -24,7 +24,7 @@ #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #define THREAD_SHIFT (PAGE_SHIFT << THREAD_SIZE_ORDER) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/thread_info.h> @@ -62,7 +62,7 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void) return (struct thread_info *)(sp & ~(THREAD_SIZE - 1)); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * thread information flags diff --git a/arch/arc/include/uapi/asm/ptrace.h b/arch/arc/include/uapi/asm/ptrace.h index 2a6eff57f6dd..3ae832db278c 100644 --- a/arch/arc/include/uapi/asm/ptrace.h +++ b/arch/arc/include/uapi/asm/ptrace.h @@ -14,7 +14,7 @@ #define PTRACE_GET_THREAD_AREA 25 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Userspace ABI: Register state needed by * -ptrace (gdbserver) @@ -53,6 +53,6 @@ struct user_regs_arcv2 { unsigned long r30, r58, r59; }; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _UAPI__ASM_ARC_PTRACE_H */ diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile index 95fbf9364c67..fa94fff02419 100644 --- a/arch/arc/kernel/Makefile +++ b/arch/arc/kernel/Makefile @@ -26,4 +26,4 @@ ifdef CONFIG_ISA_ARCOMPACT CFLAGS_fpu.o += -mdpfp endif -extra-y := vmlinux.lds +always-$(KBUILD_BUILTIN) := vmlinux.lds diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index d8969dab12d4..789cfb9ea14e 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -241,15 +241,6 @@ static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2) return (e1->start > e2->start) - (e1->start < e2->start); } -static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size) -{ - struct eh_frame_hdr_table_entry *e1 = p1; - struct eh_frame_hdr_table_entry *e2 = p2; - - swap(e1->start, e2->start); - swap(e1->fde, e2->fde); -} - static void init_unwind_hdr(struct unwind_table *table, void *(*alloc) (unsigned long)) { @@ -345,7 +336,7 @@ static void init_unwind_hdr(struct unwind_table *table, sort(header->table, n, sizeof(*header->table), - cmp_eh_frame_hdr_table_entries, swap_eh_frame_hdr_table_entries); + cmp_eh_frame_hdr_table_entries, NULL); table->hdrsz = hdrSize; smp_wmb(); diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index e81964cce516..f71af368674c 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -167,7 +167,7 @@ CONFIG_MFD_MAX77686=y CONFIG_MFD_MAX77693=y CONFIG_MFD_MAX8997=y CONFIG_MFD_MAX8998=y -CONFIG_MFD_SEC_CORE=y +CONFIG_MFD_SEC_I2C=y CONFIG_MFD_STMPE=y CONFIG_STMPE_I2C=y CONFIG_MFD_TPS65090=y diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index aca01ad6aafc..50c170b4619f 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -612,7 +612,7 @@ CONFIG_MFD_QCOM_RPM=y CONFIG_MFD_SPMI_PMIC=y CONFIG_MFD_RK8XX_I2C=y CONFIG_MFD_RN5T618=y -CONFIG_MFD_SEC_CORE=y +CONFIG_MFD_SEC_I2C=y CONFIG_MFD_STMPE=y CONFIG_MFD_PALMAS=y CONFIG_MFD_TPS65090=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index ded4b9a5accf..ff29c5b0e9c9 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -335,7 +335,7 @@ CONFIG_MFD_MAX77693=y CONFIG_MFD_MAX8907=m CONFIG_EZX_PCAP=y CONFIG_UCB1400_CORE=m -CONFIG_MFD_SEC_CORE=y +CONFIG_MFD_SEC_I2C=y CONFIG_MFD_PALMAS=y CONFIG_MFD_TPS65090=y CONFIG_MFD_TPS6586X=y diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 7f1c3b4e3e04..86378eec7757 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -301,7 +301,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(swp) __pte((swp).val) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_isset(pte, L_PTE_SWP_EXCLUSIVE); } diff --git a/arch/arm/include/asm/simd.h b/arch/arm/include/asm/simd.h index d37559762180..be08a8da046f 100644 --- a/arch/arm/include/asm/simd.h +++ b/arch/arm/include/asm/simd.h @@ -8,7 +8,8 @@ static __must_check inline bool may_use_simd(void) { - return IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !in_hardirq(); + return IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !in_hardirq() + && !irqs_disabled(); } #endif /* _ASM_SIMD_H */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index b3333d070390..afc9de7ef9a1 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -104,4 +104,4 @@ obj-$(CONFIG_HAVE_ARM_SMCCC) += smccc-call.o obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += spectre.o -extra-y := vmlinux.lds +always-$(KBUILD_BUILTIN) := vmlinux.lds diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 748698e91a4b..27e64f782cb3 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -515,7 +515,5 @@ void __init early_ioremap_init(void) bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, unsigned long flags) { - unsigned long pfn = PHYS_PFN(offset); - - return memblock_is_map_memory(pfn); + return memblock_is_map_memory(offset); } diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 7803d50b90f8..e559ad3cd148 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -877,6 +877,7 @@ void kernel_neon_begin(void) * the kernel mode NEON register contents never need to be preserved. */ BUG_ON(in_hardirq()); + BUG_ON(irqs_disabled()); cpu = __smp_processor_id(); fpexc = fmrx(FPEXC) | FPEXC_EN; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 55fc331af337..393d71124f5d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -256,6 +256,7 @@ config ARM64 select HOTPLUG_SMT if HOTPLUG_CPU select IRQ_DOMAIN select IRQ_FORCED_THREADING + select JUMP_LABEL select KASAN_VMALLOC if KASAN select LOCK_MM_AND_FIND_VMA select MODULES_USE_ELF_RELA diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 8b76821f190f..a541bb029aa4 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -269,7 +269,7 @@ config ARCH_QCOM bool "Qualcomm Platforms" select GPIOLIB select PINCTRL - select HAVE_PWRCTL if PCI + select HAVE_PWRCTRL if PCI help This enables support for the ARMv8 based Qualcomm chipsets. diff --git a/arch/arm64/boot/dts/apple/spi1-nvram.dtsi b/arch/arm64/boot/dts/apple/spi1-nvram.dtsi index 3df2fd3993b5..9740fbf200f0 100644 --- a/arch/arm64/boot/dts/apple/spi1-nvram.dtsi +++ b/arch/arm64/boot/dts/apple/spi1-nvram.dtsi @@ -20,8 +20,6 @@ compatible = "jedec,spi-nor"; reg = <0x0>; spi-max-frequency = <25000000>; - #address-cells = <1>; - #size-cells = <1>; partitions { compatible = "fixed-partitions"; diff --git a/arch/arm64/boot/dts/apple/t8103-j293.dts b/arch/arm64/boot/dts/apple/t8103-j293.dts index e2d9439397f7..5b3c42e9f0e6 100644 --- a/arch/arm64/boot/dts/apple/t8103-j293.dts +++ b/arch/arm64/boot/dts/apple/t8103-j293.dts @@ -100,6 +100,8 @@ &displaydfr_mipi { status = "okay"; + #address-cells = <1>; + #size-cells = <0>; dfr_panel: panel@0 { compatible = "apple,j293-summit", "apple,summit"; diff --git a/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi b/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi index 8e82231acab5..0c8206156bfe 100644 --- a/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi +++ b/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi @@ -71,7 +71,7 @@ */ &port00 { bus-range = <1 1>; - wifi0: network@0,0 { + wifi0: wifi@0,0 { compatible = "pci14e4,4425"; reg = <0x10000 0x0 0x0 0x0 0x0>; /* To be filled by the loader */ diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi index 20faf0c0d809..3a204845b85b 100644 --- a/arch/arm64/boot/dts/apple/t8103.dtsi +++ b/arch/arm64/boot/dts/apple/t8103.dtsi @@ -405,8 +405,6 @@ compatible = "apple,t8103-display-pipe-mipi", "apple,h7-display-pipe-mipi"; reg = <0x2 0x28600000 0x0 0x100000>; power-domains = <&ps_mipi_dsi>; - #address-cells = <1>; - #size-cells = <0>; status = "disabled"; ports { diff --git a/arch/arm64/boot/dts/apple/t8112-j493.dts b/arch/arm64/boot/dts/apple/t8112-j493.dts index be86d34c6696..fb8ad7d4c65a 100644 --- a/arch/arm64/boot/dts/apple/t8112-j493.dts +++ b/arch/arm64/boot/dts/apple/t8112-j493.dts @@ -63,6 +63,8 @@ &displaydfr_mipi { status = "okay"; + #address-cells = <1>; + #size-cells = <0>; dfr_panel: panel@0 { compatible = "apple,j493-summit", "apple,summit"; diff --git a/arch/arm64/boot/dts/apple/t8112.dtsi b/arch/arm64/boot/dts/apple/t8112.dtsi index e95711d8337f..f68354194355 100644 --- a/arch/arm64/boot/dts/apple/t8112.dtsi +++ b/arch/arm64/boot/dts/apple/t8112.dtsi @@ -420,8 +420,6 @@ compatible = "apple,t8112-display-pipe-mipi", "apple,h7-display-pipe-mipi"; reg = <0x2 0x28600000 0x0 0x100000>; power-domains = <&ps_mipi_dsi>; - #address-cells = <1>; - #size-cells = <0>; status = "disabled"; ports { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi index 0baf256b4400..983b2f0e8797 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi @@ -687,11 +687,12 @@ }; wdog0: watchdog@2ad0000 { - compatible = "fsl,imx21-wdt"; + compatible = "fsl,ls1046a-wdt", "fsl,imx21-wdt"; reg = <0x0 0x2ad0000 0x0 0x10000>; interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(2)>; + big-endian; }; edma0: dma-controller@2c00000 { diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi index d29710772569..1594ce9182a5 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi @@ -464,6 +464,7 @@ }; reg_nvcc_sd: LDO5 { + regulator-always-on; regulator-max-microvolt = <3300000>; regulator-min-microvolt = <1800000>; regulator-name = "On-module +V3.3_1.8_SD (LDO5)"; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi index 2f740d74707b..4bf818873fe3 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi @@ -70,7 +70,7 @@ tpm@1 { compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x1>; - spi-max-frequency = <36000000>; + spi-max-frequency = <25000000>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi index 5ab3ffe9931d..cf747ec6fa16 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi @@ -110,7 +110,7 @@ tpm@1 { compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x1>; - spi-max-frequency = <36000000>; + spi-max-frequency = <25000000>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi index e2b5e7ac3e46..5eb114d2360a 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi @@ -122,7 +122,7 @@ tpm@1 { compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x1>; - spi-max-frequency = <36000000>; + spi-max-frequency = <25000000>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts index 6daa2313f879..568d24265ddf 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts @@ -201,7 +201,7 @@ tpm@0 { compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x0>; - spi-max-frequency = <36000000>; + spi-max-frequency = <25000000>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts b/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts index 6c47f4b47356..9f4d0899a94d 100644 --- a/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts @@ -574,17 +574,17 @@ &scmi_iomuxc { pinctrl_emdio: emdiogrp { fsl,pins = < - IMX95_PAD_ENET2_MDC__NETCMIX_TOP_NETC_MDC 0x57e - IMX95_PAD_ENET2_MDIO__NETCMIX_TOP_NETC_MDIO 0x97e + IMX95_PAD_ENET2_MDC__NETCMIX_TOP_NETC_MDC 0x50e + IMX95_PAD_ENET2_MDIO__NETCMIX_TOP_NETC_MDIO 0x90e >; }; pinctrl_enetc0: enetc0grp { fsl,pins = < - IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x57e - IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x57e - IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x57e - IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x57e + IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x50e + IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x50e + IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x50e + IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x50e IMX95_PAD_ENET1_TX_CTL__NETCMIX_TOP_ETH0_RGMII_TX_CTL 0x57e IMX95_PAD_ENET1_TXC__NETCMIX_TOP_ETH0_RGMII_TX_CLK 0x58e IMX95_PAD_ENET1_RX_CTL__NETCMIX_TOP_ETH0_RGMII_RX_CTL 0x57e @@ -598,10 +598,10 @@ pinctrl_enetc1: enetc1grp { fsl,pins = < - IMX95_PAD_ENET2_TD3__NETCMIX_TOP_ETH1_RGMII_TD3 0x57e - IMX95_PAD_ENET2_TD2__NETCMIX_TOP_ETH1_RGMII_TD2 0x57e - IMX95_PAD_ENET2_TD1__NETCMIX_TOP_ETH1_RGMII_TD1 0x57e - IMX95_PAD_ENET2_TD0__NETCMIX_TOP_ETH1_RGMII_TD0 0x57e + IMX95_PAD_ENET2_TD3__NETCMIX_TOP_ETH1_RGMII_TD3 0x50e + IMX95_PAD_ENET2_TD2__NETCMIX_TOP_ETH1_RGMII_TD2 0x50e + IMX95_PAD_ENET2_TD1__NETCMIX_TOP_ETH1_RGMII_TD1 0x50e + IMX95_PAD_ENET2_TD0__NETCMIX_TOP_ETH1_RGMII_TD0 0x50e IMX95_PAD_ENET2_TX_CTL__NETCMIX_TOP_ETH1_RGMII_TX_CTL 0x57e IMX95_PAD_ENET2_TXC__NETCMIX_TOP_ETH1_RGMII_TX_CLK 0x58e IMX95_PAD_ENET2_RX_CTL__NETCMIX_TOP_ETH1_RGMII_RX_CTL 0x57e diff --git a/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts b/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts index 6886ea766655..d7d845231312 100644 --- a/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts @@ -566,17 +566,17 @@ &scmi_iomuxc { pinctrl_emdio: emdiogrp{ fsl,pins = < - IMX95_PAD_ENET1_MDC__NETCMIX_TOP_NETC_MDC 0x57e - IMX95_PAD_ENET1_MDIO__NETCMIX_TOP_NETC_MDIO 0x97e + IMX95_PAD_ENET1_MDC__NETCMIX_TOP_NETC_MDC 0x50e + IMX95_PAD_ENET1_MDIO__NETCMIX_TOP_NETC_MDIO 0x90e >; }; pinctrl_enetc0: enetc0grp { fsl,pins = < - IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x57e - IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x57e - IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x57e - IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x57e + IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x50e + IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x50e + IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x50e + IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x50e IMX95_PAD_ENET1_TX_CTL__NETCMIX_TOP_ETH0_RGMII_TX_CTL 0x57e IMX95_PAD_ENET1_TXC__NETCMIX_TOP_ETH0_RGMII_TX_CLK 0x58e IMX95_PAD_ENET1_RX_CTL__NETCMIX_TOP_ETH0_RGMII_RX_CTL 0x57e diff --git a/arch/arm64/boot/dts/freescale/imx95.dtsi b/arch/arm64/boot/dts/freescale/imx95.dtsi index 632631a29112..5aecdd9b62ff 100644 --- a/arch/arm64/boot/dts/freescale/imx95.dtsi +++ b/arch/arm64/boot/dts/freescale/imx95.dtsi @@ -1708,7 +1708,7 @@ <0x9 0 1 0>; reg-names = "dbi","atu", "dbi2", "app", "dma", "addr_space"; num-lanes = <1>; - interrupts = <GIC_SPI 317 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 311 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "dma"; clocks = <&scmi_clk IMX95_CLK_HSIO>, <&scmi_clk IMX95_CLK_HSIOPLL>, diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index ae7a275fd223..cefecb7a23cf 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -1090,6 +1090,8 @@ }; &pmk8280_rtc { + qcom,uefi-rtc-info; + status = "okay"; }; diff --git a/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi b/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi index c02fd4d15c96..e3888bc143a0 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi @@ -224,6 +224,7 @@ reg-names = "rtc", "alarm"; interrupts = <0x0 0x62 0x1 IRQ_TYPE_EDGE_RISING>; qcom,no-alarm; /* alarm owned by ADSP */ + qcom,uefi-rtc-info; }; pmk8550_sdam_2: nvram@7100 { diff --git a/arch/arm64/boot/dts/renesas/r9a09g057.dtsi b/arch/arm64/boot/dts/renesas/r9a09g057.dtsi index 18ab5639b301..0f3501951409 100644 --- a/arch/arm64/boot/dts/renesas/r9a09g057.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a09g057.dtsi @@ -280,6 +280,171 @@ resets = <&cpg 0x30>; }; + dmac0: dma-controller@11400000 { + compatible = "renesas,r9a09g057-dmac"; + reg = <0 0x11400000 0 0x10000>; + interrupts = <GIC_SPI 499 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 89 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 90 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 91 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 92 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 93 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 94 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 95 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 96 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 97 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 98 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 99 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 100 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 101 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 102 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 103 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 104 IRQ_TYPE_EDGE_RISING>; + interrupt-names = "error", + "ch0", "ch1", "ch2", "ch3", + "ch4", "ch5", "ch6", "ch7", + "ch8", "ch9", "ch10", "ch11", + "ch12", "ch13", "ch14", "ch15"; + clocks = <&cpg CPG_MOD 0x0>; + power-domains = <&cpg>; + resets = <&cpg 0x31>; + #dma-cells = <1>; + dma-channels = <16>; + renesas,icu = <&icu 4>; + }; + + dmac1: dma-controller@14830000 { + compatible = "renesas,r9a09g057-dmac"; + reg = <0 0x14830000 0 0x10000>; + interrupts = <GIC_SPI 495 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 25 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 27 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 28 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 29 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 30 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 31 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 32 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 33 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 34 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 35 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 36 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 37 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 38 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 39 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 40 IRQ_TYPE_EDGE_RISING>; + interrupt-names = "error", + "ch0", "ch1", "ch2", "ch3", + "ch4", "ch5", "ch6", "ch7", + "ch8", "ch9", "ch10", "ch11", + "ch12", "ch13", "ch14", "ch15"; + clocks = <&cpg CPG_MOD 0x1>; + power-domains = <&cpg>; + resets = <&cpg 0x32>; + #dma-cells = <1>; + dma-channels = <16>; + renesas,icu = <&icu 0>; + }; + + dmac2: dma-controller@14840000 { + compatible = "renesas,r9a09g057-dmac"; + reg = <0 0x14840000 0 0x10000>; + interrupts = <GIC_SPI 496 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 41 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 42 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 43 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 44 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 45 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 46 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 47 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 48 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 49 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 50 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 51 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 52 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 53 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 54 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 55 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 56 IRQ_TYPE_EDGE_RISING>; + interrupt-names = "error", + "ch0", "ch1", "ch2", "ch3", + "ch4", "ch5", "ch6", "ch7", + "ch8", "ch9", "ch10", "ch11", + "ch12", "ch13", "ch14", "ch15"; + clocks = <&cpg CPG_MOD 0x2>; + power-domains = <&cpg>; + resets = <&cpg 0x33>; + #dma-cells = <1>; + dma-channels = <16>; + renesas,icu = <&icu 1>; + }; + + dmac3: dma-controller@12000000 { + compatible = "renesas,r9a09g057-dmac"; + reg = <0 0x12000000 0 0x10000>; + interrupts = <GIC_SPI 497 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 57 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 58 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 59 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 60 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 61 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 62 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 63 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 64 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 65 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 66 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 67 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 68 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 69 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 70 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 71 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 72 IRQ_TYPE_EDGE_RISING>; + interrupt-names = "error", + "ch0", "ch1", "ch2", "ch3", + "ch4", "ch5", "ch6", "ch7", + "ch8", "ch9", "ch10", "ch11", + "ch12", "ch13", "ch14", "ch15"; + clocks = <&cpg CPG_MOD 0x3>; + power-domains = <&cpg>; + resets = <&cpg 0x34>; + #dma-cells = <1>; + dma-channels = <16>; + renesas,icu = <&icu 2>; + }; + + dmac4: dma-controller@12010000 { + compatible = "renesas,r9a09g057-dmac"; + reg = <0 0x12010000 0 0x10000>; + interrupts = <GIC_SPI 498 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 73 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 74 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 76 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 77 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 78 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 79 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 80 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 81 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 82 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 83 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 84 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 85 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 86 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 87 IRQ_TYPE_EDGE_RISING>, + <GIC_SPI 88 IRQ_TYPE_EDGE_RISING>; + interrupt-names = "error", + "ch0", "ch1", "ch2", "ch3", + "ch4", "ch5", "ch6", "ch7", + "ch8", "ch9", "ch10", "ch11", + "ch12", "ch13", "ch14", "ch15"; + clocks = <&cpg CPG_MOD 0x4>; + power-domains = <&cpg>; + resets = <&cpg 0x35>; + #dma-cells = <1>; + dma-channels = <16>; + renesas,icu = <&icu 3>; + }; + ostm0: timer@11800000 { compatible = "renesas,r9a09g057-ostm", "renesas,ostm"; reg = <0x0 0x11800000 0x0 0x1000>; diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi index ab232e5c7ad6..4203b335a263 100644 --- a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi @@ -379,6 +379,18 @@ <0 RK_PA7 RK_FUNC_GPIO &pcfg_pull_up>; }; }; + + spi1 { + spi1_csn0_gpio_pin: spi1-csn0-gpio-pin { + rockchip,pins = + <3 RK_PB1 RK_FUNC_GPIO &pcfg_pull_up_4ma>; + }; + + spi1_csn1_gpio_pin: spi1-csn1-gpio-pin { + rockchip,pins = + <3 RK_PB2 RK_FUNC_GPIO &pcfg_pull_up_4ma>; + }; + }; }; &pmu_io_domains { @@ -396,6 +408,17 @@ vqmmc-supply = <&vccio_sd>; }; +&spi1 { + /* + * Hardware CS has a very slow rise time of about 6us, + * causing transmission errors. + * With cs-gpios we have a rise time of about 20ns. + */ + cs-gpios = <&gpio3 RK_PB1 GPIO_ACTIVE_LOW>, <&gpio3 RK_PB2 GPIO_ACTIVE_LOW>; + pinctrl-names = "default"; + pinctrl-0 = <&spi1_clk &spi1_csn0_gpio_pin &spi1_csn1_gpio_pin &spi1_miso &spi1_mosi>; +}; + &tsadc { status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts index 3c127c5c2607..a9021c524afb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts @@ -30,6 +30,7 @@ fan: gpio_fan { compatible = "gpio-fan"; + fan-supply = <&vcc12v_dcin>; gpios = <&gpio0 RK_PD5 GPIO_ACTIVE_HIGH>; gpio-fan,speed-map = < 0 0>, diff --git a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts index b09e789c75c4..801b40fea4e8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts +++ b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts @@ -211,10 +211,38 @@ status = "okay"; }; +&cpu_b0 { + cpu-supply = <&vdd_cpu_big_s0>; +}; + +&cpu_b1 { + cpu-supply = <&vdd_cpu_big_s0>; +}; + +&cpu_b2 { + cpu-supply = <&vdd_cpu_big_s0>; +}; + +&cpu_b3 { + cpu-supply = <&vdd_cpu_big_s0>; +}; + &cpu_l0 { cpu-supply = <&vdd_cpu_lit_s0>; }; +&cpu_l1 { + cpu-supply = <&vdd_cpu_lit_s0>; +}; + +&cpu_l2 { + cpu-supply = <&vdd_cpu_lit_s0>; +}; + +&cpu_l3 { + cpu-supply = <&vdd_cpu_lit_s0>; +}; + &gmac0 { phy-mode = "rgmii-id"; clock_in_out = "output"; diff --git a/arch/arm64/boot/dts/rockchip/rk3576.dtsi b/arch/arm64/boot/dts/rockchip/rk3576.dtsi index 1086482f0479..64812e3bcb61 100644 --- a/arch/arm64/boot/dts/rockchip/rk3576.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3576.dtsi @@ -615,7 +615,7 @@ <0 0 0 2 &pcie1_intc 1>, <0 0 0 3 &pcie1_intc 2>, <0 0 0 4 &pcie1_intc 3>; - linux,pci-domain = <0>; + linux,pci-domain = <1>; max-link-speed = <2>; num-ib-windows = <8>; num-viewport = <8>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi index 7f874c77410c..6584d73660f6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi @@ -578,14 +578,14 @@ hdmim0_tx0_scl: hdmim0-tx0-scl { rockchip,pins = /* hdmim0_tx0_scl */ - <4 RK_PB7 5 &pcfg_pull_none>; + <4 RK_PB7 5 &pcfg_pull_none_drv_level_5_smt>; }; /omit-if-no-ref/ hdmim0_tx0_sda: hdmim0-tx0-sda { rockchip,pins = /* hdmim0_tx0_sda */ - <4 RK_PC0 5 &pcfg_pull_none>; + <4 RK_PC0 5 &pcfg_pull_none_drv_level_1_smt>; }; /omit-if-no-ref/ @@ -640,14 +640,14 @@ hdmim1_tx0_scl: hdmim1-tx0-scl { rockchip,pins = /* hdmim1_tx0_scl */ - <0 RK_PD5 11 &pcfg_pull_none>; + <0 RK_PD5 11 &pcfg_pull_none_drv_level_5_smt>; }; /omit-if-no-ref/ hdmim1_tx0_sda: hdmim1-tx0-sda { rockchip,pins = /* hdmim1_tx0_sda */ - <0 RK_PD4 11 &pcfg_pull_none>; + <0 RK_PD4 11 &pcfg_pull_none_drv_level_1_smt>; }; /omit-if-no-ref/ @@ -668,14 +668,14 @@ hdmim1_tx1_scl: hdmim1-tx1-scl { rockchip,pins = /* hdmim1_tx1_scl */ - <3 RK_PC6 5 &pcfg_pull_none>; + <3 RK_PC6 5 &pcfg_pull_none_drv_level_5_smt>; }; /omit-if-no-ref/ hdmim1_tx1_sda: hdmim1-tx1-sda { rockchip,pins = /* hdmim1_tx1_sda */ - <3 RK_PC5 5 &pcfg_pull_none>; + <3 RK_PC5 5 &pcfg_pull_none_drv_level_1_smt>; }; /omit-if-no-ref/ hdmim2_rx_cec: hdmim2-rx-cec { @@ -709,14 +709,14 @@ hdmim2_tx0_scl: hdmim2-tx0-scl { rockchip,pins = /* hdmim2_tx0_scl */ - <3 RK_PC7 5 &pcfg_pull_none>; + <3 RK_PC7 5 &pcfg_pull_none_drv_level_5_smt>; }; /omit-if-no-ref/ hdmim2_tx0_sda: hdmim2-tx0-sda { rockchip,pins = /* hdmim2_tx0_sda */ - <3 RK_PD0 5 &pcfg_pull_none>; + <3 RK_PD0 5 &pcfg_pull_none_drv_level_1_smt>; }; /omit-if-no-ref/ @@ -730,14 +730,14 @@ hdmim2_tx1_scl: hdmim2-tx1-scl { rockchip,pins = /* hdmim2_tx1_scl */ - <1 RK_PA4 5 &pcfg_pull_none>; + <1 RK_PA4 5 &pcfg_pull_none_drv_level_5_smt>; }; /omit-if-no-ref/ hdmim2_tx1_sda: hdmim2-tx1-sda { rockchip,pins = /* hdmim2_tx1_sda */ - <1 RK_PA3 5 &pcfg_pull_none>; + <1 RK_PA3 5 &pcfg_pull_none_drv_level_1_smt>; }; /omit-if-no-ref/ diff --git a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi index cc37f082adea..b07543315f87 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi @@ -321,6 +321,7 @@ bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; + cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>; disable-wp; max-frequency = <150000000>; no-sdio; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi index 244c66faa161..fb48ddc04bcb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi @@ -160,14 +160,15 @@ hdmim0_tx1_scl: hdmim0-tx1-scl { rockchip,pins = /* hdmim0_tx1_scl */ - <2 RK_PB5 4 &pcfg_pull_none>; + <2 RK_PB5 4 &pcfg_pull_none_drv_level_3_smt>; }; /omit-if-no-ref/ hdmim0_tx1_sda: hdmim0-tx1-sda { rockchip,pins = /* hdmim0_tx1_sda */ - <2 RK_PB4 4 &pcfg_pull_none>; + <2 RK_PB4 4 &pcfg_pull_none_drv_level_1_smt>; + }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts index 8b717c4017a4..b2947b36fada 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts @@ -474,6 +474,7 @@ bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; + cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>; disable-wp; max-frequency = <150000000>; no-sdio; diff --git a/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi b/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi index 5c645437b507..b0475b7c655a 100644 --- a/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi +++ b/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi @@ -333,6 +333,41 @@ }; /omit-if-no-ref/ + pcfg_pull_none_drv_level_1_smt: pcfg-pull-none-drv-level-1-smt { + bias-disable; + drive-strength = <1>; + input-schmitt-enable; + }; + + /omit-if-no-ref/ + pcfg_pull_none_drv_level_2_smt: pcfg-pull-none-drv-level-2-smt { + bias-disable; + drive-strength = <2>; + input-schmitt-enable; + }; + + /omit-if-no-ref/ + pcfg_pull_none_drv_level_3_smt: pcfg-pull-none-drv-level-3-smt { + bias-disable; + drive-strength = <3>; + input-schmitt-enable; + }; + + /omit-if-no-ref/ + pcfg_pull_none_drv_level_4_smt: pcfg-pull-none-drv-level-4-smt { + bias-disable; + drive-strength = <4>; + input-schmitt-enable; + }; + + /omit-if-no-ref/ + pcfg_pull_none_drv_level_5_smt: pcfg-pull-none-drv-level-5-smt { + bias-disable; + drive-strength = <5>; + input-schmitt-enable; + }; + + /omit-if-no-ref/ pcfg_output_high: pcfg-output-high { output-high; }; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 1e99db100607..eb5c17d4c7ec 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -774,7 +774,7 @@ CONFIG_MFD_MT6397=y CONFIG_MFD_SPMI_PMIC=y CONFIG_MFD_RK8XX_I2C=y CONFIG_MFD_RK8XX_SPI=y -CONFIG_MFD_SEC_CORE=y +CONFIG_MFD_SEC_I2C=y CONFIG_MFD_SL28CPLD=y CONFIG_RZ_MTU3=y CONFIG_MFD_TI_AM335X_TSCADC=m @@ -1444,6 +1444,7 @@ CONFIG_PLATFORM_MHU=y CONFIG_BCM2835_MBOX=y CONFIG_QCOM_APCS_IPC=y CONFIG_MTK_ADSP_MBOX=m +CONFIG_QCOM_CPUCP_MBOX=m CONFIG_QCOM_IPCC=y CONFIG_ROCKCHIP_IOMMU=y CONFIG_TEGRA_IOMMU_SMMU=y @@ -1573,6 +1574,7 @@ CONFIG_RESET_QCOM_AOSS=y CONFIG_RESET_QCOM_PDC=m CONFIG_RESET_RZG2L_USBPHY_CTRL=y CONFIG_RESET_TI_SCI=y +CONFIG_PHY_SNPS_EUSB2=m CONFIG_PHY_XGENE=y CONFIG_PHY_CAN_TRANSCEIVER=m CONFIG_PHY_NXP_PTN3222=m @@ -1597,7 +1599,6 @@ CONFIG_PHY_QCOM_EDP=m CONFIG_PHY_QCOM_PCIE2=m CONFIG_PHY_QCOM_QMP=m CONFIG_PHY_QCOM_QUSB2=m -CONFIG_PHY_QCOM_SNPS_EUSB2=m CONFIG_PHY_QCOM_EUSB2_REPEATER=m CONFIG_PHY_QCOM_M31_USB=m CONFIG_PHY_QCOM_USB_HS=m diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 1e7c7475e43f..9f38340d24c2 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -287,30 +287,6 @@ .Lskip_fgt2_\@: .endm -.macro __init_el2_gcs - mrs_s x1, SYS_ID_AA64PFR1_EL1 - ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4 - cbz x1, .Lskip_gcs_\@ - - /* Ensure GCS is not enabled when we start trying to do BLs */ - msr_s SYS_GCSCR_EL1, xzr - msr_s SYS_GCSCRE0_EL1, xzr -.Lskip_gcs_\@: -.endm - -.macro __init_el2_mpam - /* Memory Partitioning And Monitoring: disable EL2 traps */ - mrs x1, id_aa64pfr0_el1 - ubfx x0, x1, #ID_AA64PFR0_EL1_MPAM_SHIFT, #4 - cbz x0, .Lskip_mpam_\@ // skip if no MPAM - msr_s SYS_MPAM2_EL2, xzr // use the default partition - // and disable lower traps - mrs_s x0, SYS_MPAMIDR_EL1 - tbz x0, #MPAMIDR_EL1_HAS_HCR_SHIFT, .Lskip_mpam_\@ // skip if no MPAMHCR reg - msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2 -.Lskip_mpam_\@: -.endm - /** * Initialize EL2 registers to sane values. This should be called early on all * cores that were booted in EL2. Note that everything gets initialised as @@ -328,12 +304,10 @@ __init_el2_stage2 __init_el2_gicv3 __init_el2_hstr - __init_el2_mpam __init_el2_nvhe_idregs __init_el2_cptr __init_el2_fgt __init_el2_fgt2 - __init_el2_gcs .endm #ifndef __KVM_NVHE_HYPERVISOR__ @@ -375,6 +349,23 @@ #endif .macro finalise_el2_state + check_override id_aa64pfr0, ID_AA64PFR0_EL1_MPAM_SHIFT, .Linit_mpam_\@, .Lskip_mpam_\@, x1, x2 + +.Linit_mpam_\@: + msr_s SYS_MPAM2_EL2, xzr // use the default partition + // and disable lower traps + mrs_s x0, SYS_MPAMIDR_EL1 + tbz x0, #MPAMIDR_EL1_HAS_HCR_SHIFT, .Lskip_mpam_\@ // skip if no MPAMHCR reg + msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2 + +.Lskip_mpam_\@: + check_override id_aa64pfr1, ID_AA64PFR1_EL1_GCS_SHIFT, .Linit_gcs_\@, .Lskip_gcs_\@, x1, x2 + +.Linit_gcs_\@: + msr_s SYS_GCSCR_EL1, xzr + msr_s SYS_GCSCRE0_EL1, xzr + +.Lskip_gcs_\@: check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2 .Linit_sve_\@: /* SVE register access */ diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 9e93733523f6..74a4f738c5f5 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -58,7 +58,7 @@ #define INIT_DIR_SIZE (PAGE_SIZE * (EARLY_PAGES(SWAPPER_PGTABLE_LEVELS, KIMAGE_VADDR, _end, EXTRA_PAGE) \ + EARLY_SEGMENT_EXTRA_PAGES)) -#define INIT_IDMAP_DIR_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, KIMAGE_VADDR, _end, 1)) +#define INIT_IDMAP_DIR_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, KIMAGE_VADDR, kimage_limit, 1)) #define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + EARLY_IDMAP_EXTRA_PAGES) * PAGE_SIZE) #define INIT_IDMAP_FDT_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, 0UL, UL(MAX_FDT_SIZE), 1) - 1) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index bd020fc28aa9..0720898f563e 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -561,68 +561,6 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu) vcpu_set_flag((v), e); \ } while (0) -#define __build_check_all_or_none(r, bits) \ - BUILD_BUG_ON(((r) & (bits)) && ((r) & (bits)) != (bits)) - -#define __cpacr_to_cptr_clr(clr, set) \ - ({ \ - u64 cptr = 0; \ - \ - if ((set) & CPACR_EL1_FPEN) \ - cptr |= CPTR_EL2_TFP; \ - if ((set) & CPACR_EL1_ZEN) \ - cptr |= CPTR_EL2_TZ; \ - if ((set) & CPACR_EL1_SMEN) \ - cptr |= CPTR_EL2_TSM; \ - if ((clr) & CPACR_EL1_TTA) \ - cptr |= CPTR_EL2_TTA; \ - if ((clr) & CPTR_EL2_TAM) \ - cptr |= CPTR_EL2_TAM; \ - if ((clr) & CPTR_EL2_TCPAC) \ - cptr |= CPTR_EL2_TCPAC; \ - \ - cptr; \ - }) - -#define __cpacr_to_cptr_set(clr, set) \ - ({ \ - u64 cptr = 0; \ - \ - if ((clr) & CPACR_EL1_FPEN) \ - cptr |= CPTR_EL2_TFP; \ - if ((clr) & CPACR_EL1_ZEN) \ - cptr |= CPTR_EL2_TZ; \ - if ((clr) & CPACR_EL1_SMEN) \ - cptr |= CPTR_EL2_TSM; \ - if ((set) & CPACR_EL1_TTA) \ - cptr |= CPTR_EL2_TTA; \ - if ((set) & CPTR_EL2_TAM) \ - cptr |= CPTR_EL2_TAM; \ - if ((set) & CPTR_EL2_TCPAC) \ - cptr |= CPTR_EL2_TCPAC; \ - \ - cptr; \ - }) - -#define cpacr_clear_set(clr, set) \ - do { \ - BUILD_BUG_ON((set) & CPTR_VHE_EL2_RES0); \ - BUILD_BUG_ON((clr) & CPACR_EL1_E0POE); \ - __build_check_all_or_none((clr), CPACR_EL1_FPEN); \ - __build_check_all_or_none((set), CPACR_EL1_FPEN); \ - __build_check_all_or_none((clr), CPACR_EL1_ZEN); \ - __build_check_all_or_none((set), CPACR_EL1_ZEN); \ - __build_check_all_or_none((clr), CPACR_EL1_SMEN); \ - __build_check_all_or_none((set), CPACR_EL1_SMEN); \ - \ - if (has_vhe() || has_hvhe()) \ - sysreg_clear_set(cpacr_el1, clr, set); \ - else \ - sysreg_clear_set(cptr_el2, \ - __cpacr_to_cptr_clr(clr, set), \ - __cpacr_to_cptr_set(clr, set));\ - } while (0) - /* * Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE * format if E2H isn't set. diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 6ce2c5173482..3e41a880b062 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1107,14 +1107,36 @@ static inline u64 *___ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r) #define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r)) u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64); + +#define __vcpu_assign_sys_reg(v, r, val) \ + do { \ + const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ + u64 __v = (val); \ + if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \ + __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \ + \ + ctxt_sys_reg(ctxt, (r)) = __v; \ + } while (0) + +#define __vcpu_rmw_sys_reg(v, r, op, val) \ + do { \ + const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ + u64 __v = ctxt_sys_reg(ctxt, (r)); \ + __v op (val); \ + if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \ + __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \ + \ + ctxt_sys_reg(ctxt, (r)) = __v; \ + } while (0) + #define __vcpu_sys_reg(v,r) \ - (*({ \ + ({ \ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ - u64 *__r = __ctxt_sys_reg(ctxt, (r)); \ + u64 __v = ctxt_sys_reg(ctxt, (r)); \ if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \ - *__r = kvm_vcpu_apply_reg_masks((v), (r), *__r);\ - __r; \ - })) + __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \ + __v; \ + }) u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); @@ -1267,9 +1289,8 @@ void kvm_arm_resume_guest(struct kvm *kvm); }) /* - * The couple of isb() below are there to guarantee the same behaviour - * on VHE as on !VHE, where the eret to EL1 acts as a context - * synchronization event. + * The isb() below is there to guarantee the same behaviour on VHE as on !VHE, + * where the eret to EL1 acts as a context synchronization event. */ #define kvm_call_hyp(f, ...) \ do { \ @@ -1287,7 +1308,6 @@ void kvm_arm_resume_guest(struct kvm *kvm); \ if (has_vhe()) { \ ret = f(__VA_ARGS__); \ - isb(); \ } else { \ ret = kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \ } \ @@ -1460,7 +1480,6 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *range); /* Guest/host FPSIMD coordination helpers */ -int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 88db8a0c0b37..192d86e1cc76 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -563,7 +563,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte) return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & PTE_SWP_EXCLUSIVE; } diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index eba1a98657f1..aa9efee17277 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -323,13 +323,14 @@ static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm) } /* - * If mprotect/munmap/etc occurs during TLB batched flushing, we need to - * synchronise all the TLBI issued with a DSB to avoid the race mentioned in - * flush_tlb_batched_pending(). + * If mprotect/munmap/etc occurs during TLB batched flushing, we need to ensure + * all the previously issued TLBIs targeting mm have completed. But since we + * can be executing on a remote CPU, a DSB cannot guarantee this like it can + * for arch_tlbbatch_flush(). Our only option is to flush the entire mm. */ static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm) { - dsb(ish); + flush_tlb_mm(mm); } /* diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 71c29a2a2f19..a2faf0049dab 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -34,7 +34,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ syscall.o proton-pack.o idle.o patching.o pi/ \ - rsi.o + rsi.o jump_label.o obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ sys_compat.o @@ -47,7 +47,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o -obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_EFI) += efi.o efi-rt-wrapper.o obj-$(CONFIG_PCI) += pci.o @@ -78,7 +77,7 @@ $(obj)/vdso32-wrap.o: $(obj)/vdso32/vdso.so obj-y += probes/ obj-y += head.o -extra-y += vmlinux.lds +always-$(KBUILD_BUILTIN) += vmlinux.lds ifeq ($(CONFIG_DEBUG_EFI),y) AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 45ea79cacf46..e151585c6cca 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1199,8 +1199,10 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) cpacr_restore(cpacr); } - if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0)) + if (id_aa64pfr0_mpam(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) { + info->reg_mpamidr = read_cpuid(MPAMIDR_EL1); init_cpu_ftr_reg(SYS_MPAMIDR_EL1, info->reg_mpamidr); + } if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid); @@ -1453,7 +1455,8 @@ void update_cpu_features(int cpu, cpacr_restore(cpacr); } - if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0)) { + if (id_aa64pfr0_mpam(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) { + info->reg_mpamidr = read_cpuid(MPAMIDR_EL1); taint |= check_update_ftr_reg(SYS_MPAMIDR_EL1, cpu, info->reg_mpamidr, boot->reg_mpamidr); } @@ -3132,6 +3135,13 @@ static bool has_sve_feature(const struct arm64_cpu_capabilities *cap, int scope) } #endif +#ifdef CONFIG_ARM64_SME +static bool has_sme_feature(const struct arm64_cpu_capabilities *cap, int scope) +{ + return system_supports_sme() && has_user_cpuid_feature(cap, scope); +} +#endif + static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR0_EL1, AES, PMULL, CAP_HWCAP, KERNEL_HWCAP_PMULL), HWCAP_CAP(ID_AA64ISAR0_EL1, AES, AES, CAP_HWCAP, KERNEL_HWCAP_AES), @@ -3220,31 +3230,31 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR2_EL1, BC, IMP, CAP_HWCAP, KERNEL_HWCAP_HBC), #ifdef CONFIG_ARM64_SME HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME), - HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), - HWCAP_CAP(ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2), - HWCAP_CAP(ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), - HWCAP_CAP(ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), - HWCAP_CAP(ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32), - HWCAP_CAP(ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16), - HWCAP_CAP(ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16), - HWCAP_CAP(ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16), - HWCAP_CAP(ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), - HWCAP_CAP(ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32), - HWCAP_CAP(ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2), - HWCAP_CAP(ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM), - HWCAP_CAP(ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES), - HWCAP_CAP(ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA), - HWCAP_CAP(ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4), #endif /* CONFIG_ARM64_SME */ HWCAP_CAP(ID_AA64FPFR0_EL1, F8CVT, IMP, CAP_HWCAP, KERNEL_HWCAP_F8CVT), HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA), diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 94525abd1c22..c1f2b6b04b41 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -496,8 +496,11 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) __cpuinfo_store_cpu_32bit(&info->aarch32); - if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0)) - info->reg_mpamidr = read_cpuid(MPAMIDR_EL1); + /* + * info->reg_mpamidr deferred to {init,update}_cpu_features because we + * don't want to read it (and trigger a trap on buggy firmware) if + * using an aa64pfr0_el1 override to unconditionally disable MPAM. + */ if (IS_ENABLED(CONFIG_ARM64_SME) && id_aa64pfr1_sme(info->reg_id_aa64pfr1)) { diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 3857fd7ee8d4..62230d6dd919 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -15,6 +15,7 @@ #include <asm/efi.h> #include <asm/stacktrace.h> +#include <asm/vmap_stack.h> static bool region_is_misaligned(const efi_memory_desc_t *md) { @@ -214,9 +215,13 @@ static int __init arm64_efi_rt_init(void) if (!efi_enabled(EFI_RUNTIME_SERVICES)) return 0; - p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL, - NUMA_NO_NODE, &&l); -l: if (!p) { + if (!IS_ENABLED(CONFIG_VMAP_STACK)) { + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return -ENOMEM; + } + + p = arch_alloc_vmap_stack(THREAD_SIZE, NUMA_NO_NODE); + if (!p) { pr_warn("Failed to allocate EFI runtime stack\n"); clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return -ENOMEM; diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 5a69b6eb4090..714b0b5ec5ac 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -10,6 +10,10 @@ #error This file should only be included in vmlinux.lds.S #endif +#if defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 210000 +#define ASSERT(...) +#endif + #define PI_EXPORT_SYM(sym) \ __PI_EXPORT_SYM(sym, __pi_ ## sym, Cannot export BSS symbol sym to startup code) #define __PI_EXPORT_SYM(sym, pisym, msg)\ @@ -140,4 +144,17 @@ KVM_NVHE_ALIAS(kvm_protected_mode_initialized); _kernel_codesize = ABSOLUTE(__inittext_end - _text); #endif +/* + * LLD will occasionally error out with a '__init_end does not converge' error + * if INIT_IDMAP_DIR_SIZE is defined in terms of _end, as this results in a + * circular dependency. Counter this by dimensioning the initial IDMAP page + * tables based on kimage_limit, which is defined such that its value should + * not change as a result of the initdata segment being pushed over a 64k + * segment boundary due to changes in INIT_IDMAP_DIR_SIZE, provided that its + * value doesn't change by more than 2M between linker passes. + */ +kimage_limit = ALIGN(ABSOLUTE(_end + SZ_64K), SZ_2M); + +#undef ASSERT + #endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kernel/pi/idreg-override.c b/arch/arm64/kernel/pi/idreg-override.c index c6b185b885f7..bc57b290e5e7 100644 --- a/arch/arm64/kernel/pi/idreg-override.c +++ b/arch/arm64/kernel/pi/idreg-override.c @@ -127,6 +127,7 @@ static const struct ftr_set_desc pfr0 __prel64_initconst = { .fields = { FIELD("sve", ID_AA64PFR0_EL1_SVE_SHIFT, pfr0_sve_filter), FIELD("el0", ID_AA64PFR0_EL1_EL0_SHIFT, NULL), + FIELD("mpam", ID_AA64PFR0_EL1_MPAM_SHIFT, NULL), {} }, }; @@ -154,6 +155,7 @@ static const struct ftr_set_desc pfr1 __prel64_initconst = { FIELD("gcs", ID_AA64PFR1_EL1_GCS_SHIFT, NULL), FIELD("mte", ID_AA64PFR1_EL1_MTE_SHIFT, NULL), FIELD("sme", ID_AA64PFR1_EL1_SME_SHIFT, pfr1_sme_filter), + FIELD("mpam_frac", ID_AA64PFR1_EL1_MPAM_frac_SHIFT, NULL), {} }, }; @@ -246,6 +248,7 @@ static const struct { { "rodata=off", "arm64_sw.rodataoff=1" }, { "arm64.nolva", "id_aa64mmfr2.varange=0" }, { "arm64.no32bit_el0", "id_aa64pfr0.el0=1" }, + { "arm64.nompam", "id_aa64pfr0.mpam=0 id_aa64pfr1.mpam_frac=0" }, }; static int __init parse_hexdigit(const char *p, u64 *v) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index a5ca15daeb8a..08b7042a2e2d 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -288,7 +288,9 @@ static void flush_gcs(void) if (!system_supports_gcs()) return; - gcs_free(current); + current->thread.gcspr_el0 = 0; + current->thread.gcs_base = 0; + current->thread.gcs_size = 0; current->thread.gcs_el0_mode = 0; write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1); write_sysreg_s(0, SYS_GCSPR_EL0); @@ -671,6 +673,11 @@ static void permission_overlay_switch(struct task_struct *next) current->thread.por_el0 = read_sysreg_s(SYS_POR_EL0); if (current->thread.por_el0 != next->thread.por_el0) { write_sysreg_s(next->thread.por_el0, SYS_POR_EL0); + /* + * No ISB required as we can tolerate spurious Overlay faults - + * the fault handler will check again based on the new value + * of POR_EL0. + */ } } diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index a360e52db02f..ee94b72bf8fb 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -141,7 +141,7 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) addr += n; if (regs_within_kernel_stack(regs, (unsigned long)addr)) - return *addr; + return READ_ONCE_NOCHECK(*addr); else return 0; } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 3b3f6b56e733..21a795303568 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -1143,7 +1143,7 @@ static inline unsigned int num_other_online_cpus(void) void smp_send_stop(void) { static unsigned long stop_in_progress; - cpumask_t mask; + static cpumask_t mask; unsigned long timeout; /* diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index fdbc8beec930..701ea10a63f1 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -108,16 +108,16 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: - __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl; + __vcpu_assign_sys_reg(vcpu, CNTV_CTL_EL0, ctl); break; case TIMER_PTIMER: - __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl; + __vcpu_assign_sys_reg(vcpu, CNTP_CTL_EL0, ctl); break; case TIMER_HVTIMER: - __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl; + __vcpu_assign_sys_reg(vcpu, CNTHV_CTL_EL2, ctl); break; case TIMER_HPTIMER: - __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl; + __vcpu_assign_sys_reg(vcpu, CNTHP_CTL_EL2, ctl); break; default: WARN_ON(1); @@ -130,16 +130,16 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: - __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval; + __vcpu_assign_sys_reg(vcpu, CNTV_CVAL_EL0, cval); break; case TIMER_PTIMER: - __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval; + __vcpu_assign_sys_reg(vcpu, CNTP_CVAL_EL0, cval); break; case TIMER_HVTIMER: - __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval; + __vcpu_assign_sys_reg(vcpu, CNTHV_CVAL_EL2, cval); break; case TIMER_HPTIMER: - __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval; + __vcpu_assign_sys_reg(vcpu, CNTHP_CVAL_EL2, cval); break; default: WARN_ON(1); @@ -1036,7 +1036,7 @@ void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) if (vcpu_has_nv(vcpu)) { struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset; - offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); + offs->vcpu_offset = __ctxt_sys_reg(&vcpu->arch.ctxt, CNTVOFF_EL2); offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset; } diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index de2b4e9c9f9f..23dd3f3fc3eb 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -825,10 +825,6 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (!kvm_arm_vcpu_is_finalized(vcpu)) return -EPERM; - ret = kvm_arch_vcpu_run_map_fp(vcpu); - if (ret) - return ret; - if (likely(vcpu_has_run_once(vcpu))) return 0; @@ -2129,7 +2125,7 @@ static void cpu_hyp_init(void *discard) static void cpu_hyp_uninit(void *discard) { - if (__this_cpu_read(kvm_hyp_initialized)) { + if (!is_protected_kvm_enabled() && __this_cpu_read(kvm_hyp_initialized)) { cpu_hyp_reset(); __this_cpu_write(kvm_hyp_initialized, 0); } @@ -2345,8 +2341,13 @@ static void __init teardown_hyp_mode(void) free_hyp_pgds(); for_each_possible_cpu(cpu) { + if (per_cpu(kvm_hyp_initialized, cpu)) + continue; + free_pages(per_cpu(kvm_arm_hyp_stack_base, cpu), NVHE_STACK_SHIFT - PAGE_SHIFT); - free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); + + if (!kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]) + continue; if (free_sve) { struct cpu_sve_state *sve_state; @@ -2354,6 +2355,9 @@ static void __init teardown_hyp_mode(void) sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state; free_pages((unsigned long) sve_state, pkvm_host_sve_state_order()); } + + free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); + } } @@ -2764,7 +2768,8 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old, struct kvm_kernel_irq_routing_entry *new) { - if (new->type != KVM_IRQ_ROUTING_MSI) + if (old->type != KVM_IRQ_ROUTING_MSI || + new->type != KVM_IRQ_ROUTING_MSI) return true; return memcmp(&old->msi, &new->msi, sizeof(new->msi)); diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 0e4c805e7e89..1a7dab333f55 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -216,9 +216,9 @@ void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu) void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val) { if (val & OSLAR_EL1_OSLK) - __vcpu_sys_reg(vcpu, OSLSR_EL1) |= OSLSR_EL1_OSLK; + __vcpu_rmw_sys_reg(vcpu, OSLSR_EL1, |=, OSLSR_EL1_OSLK); else - __vcpu_sys_reg(vcpu, OSLSR_EL1) &= ~OSLSR_EL1_OSLK; + __vcpu_rmw_sys_reg(vcpu, OSLSR_EL1, &=, ~OSLSR_EL1_OSLK); preempt_disable(); kvm_arch_vcpu_put(vcpu); diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 7f6e43d25691..15e17aca1dec 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -15,32 +15,6 @@ #include <asm/sysreg.h> /* - * Called on entry to KVM_RUN unless this vcpu previously ran at least - * once and the most recent prior KVM_RUN for this vcpu was called from - * the same task as current (highly likely). - * - * This is guaranteed to execute before kvm_arch_vcpu_load_fp(vcpu), - * such that on entering hyp the relevant parts of current are already - * mapped. - */ -int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) -{ - struct user_fpsimd_state *fpsimd = ¤t->thread.uw.fpsimd_state; - int ret; - - /* pKVM has its own tracking of the host fpsimd state. */ - if (is_protected_kvm_enabled()) - return 0; - - /* Make sure the host task fpsimd state is visible to hyp: */ - ret = kvm_share_hyp(fpsimd, fpsimd + 1); - if (ret) - return ret; - - return 0; -} - -/* * Prepare vcpu for saving the host's FPSIMD state and loading the guest's. * The actual loading is done by the FPSIMD access trap taken to hyp. * @@ -103,8 +77,8 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) fp_state.sve_state = vcpu->arch.sve_state; fp_state.sve_vl = vcpu->arch.sve_max_vl; fp_state.sme_state = NULL; - fp_state.svcr = &__vcpu_sys_reg(vcpu, SVCR); - fp_state.fpmr = &__vcpu_sys_reg(vcpu, FPMR); + fp_state.svcr = __ctxt_sys_reg(&vcpu->arch.ctxt, SVCR); + fp_state.fpmr = __ctxt_sys_reg(&vcpu->arch.ctxt, FPMR); fp_state.fp_type = &vcpu->arch.fp_type; if (vcpu_has_sve(vcpu)) diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 424a5107cddb..6a2a899a344e 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -37,7 +37,7 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) if (unlikely(vcpu_has_nv(vcpu))) vcpu_write_sys_reg(vcpu, val, reg); else if (!__vcpu_write_sys_reg_to_cpu(val, reg)) - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); } static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode, @@ -51,7 +51,7 @@ static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode, } else if (has_vhe()) { write_sysreg_el1(val, SYS_SPSR); } else { - __vcpu_sys_reg(vcpu, SPSR_EL1) = val; + __vcpu_assign_sys_reg(vcpu, SPSR_EL1, val); } } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index bb9f2eecfb67..2ad57b117385 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -45,7 +45,7 @@ static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) if (!vcpu_el1_is_32bit(vcpu)) return; - __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2); + __vcpu_assign_sys_reg(vcpu, FPEXC32_EL2, read_sysreg(fpexc32_el2)); } static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) @@ -65,6 +65,136 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) } } +static inline void __activate_cptr_traps_nvhe(struct kvm_vcpu *vcpu) +{ + u64 val = CPTR_NVHE_EL2_RES1 | CPTR_EL2_TAM | CPTR_EL2_TTA; + + /* + * Always trap SME since it's not supported in KVM. + * TSM is RES1 if SME isn't implemented. + */ + val |= CPTR_EL2_TSM; + + if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs()) + val |= CPTR_EL2_TZ; + + if (!guest_owns_fp_regs()) + val |= CPTR_EL2_TFP; + + write_sysreg(val, cptr_el2); +} + +static inline void __activate_cptr_traps_vhe(struct kvm_vcpu *vcpu) +{ + /* + * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to + * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, + * except for some missing controls, such as TAM. + * In this case, CPTR_EL2.TAM has the same position with or without + * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM + * shift value for trapping the AMU accesses. + */ + u64 val = CPTR_EL2_TAM | CPACR_EL1_TTA; + u64 cptr; + + if (guest_owns_fp_regs()) { + val |= CPACR_EL1_FPEN; + if (vcpu_has_sve(vcpu)) + val |= CPACR_EL1_ZEN; + } + + if (!vcpu_has_nv(vcpu)) + goto write; + + /* + * The architecture is a bit crap (what a surprise): an EL2 guest + * writing to CPTR_EL2 via CPACR_EL1 can't set any of TCPAC or TTA, + * as they are RES0 in the guest's view. To work around it, trap the + * sucker using the very same bit it can't set... + */ + if (vcpu_el2_e2h_is_set(vcpu) && is_hyp_ctxt(vcpu)) + val |= CPTR_EL2_TCPAC; + + /* + * Layer the guest hypervisor's trap configuration on top of our own if + * we're in a nested context. + */ + if (is_hyp_ctxt(vcpu)) + goto write; + + cptr = vcpu_sanitised_cptr_el2(vcpu); + + /* + * Pay attention, there's some interesting detail here. + * + * The CPTR_EL2.xEN fields are 2 bits wide, although there are only two + * meaningful trap states when HCR_EL2.TGE = 0 (running a nested guest): + * + * - CPTR_EL2.xEN = x0, traps are enabled + * - CPTR_EL2.xEN = x1, traps are disabled + * + * In other words, bit[0] determines if guest accesses trap or not. In + * the interest of simplicity, clear the entire field if the guest + * hypervisor has traps enabled to dispel any illusion of something more + * complicated taking place. + */ + if (!(SYS_FIELD_GET(CPACR_EL1, FPEN, cptr) & BIT(0))) + val &= ~CPACR_EL1_FPEN; + if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0))) + val &= ~CPACR_EL1_ZEN; + + if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP)) + val |= cptr & CPACR_EL1_E0POE; + + val |= cptr & CPTR_EL2_TCPAC; + +write: + write_sysreg(val, cpacr_el1); +} + +static inline void __activate_cptr_traps(struct kvm_vcpu *vcpu) +{ + if (!guest_owns_fp_regs()) + __activate_traps_fpsimd32(vcpu); + + if (has_vhe() || has_hvhe()) + __activate_cptr_traps_vhe(vcpu); + else + __activate_cptr_traps_nvhe(vcpu); +} + +static inline void __deactivate_cptr_traps_nvhe(struct kvm_vcpu *vcpu) +{ + u64 val = CPTR_NVHE_EL2_RES1; + + if (!cpus_have_final_cap(ARM64_SVE)) + val |= CPTR_EL2_TZ; + if (!cpus_have_final_cap(ARM64_SME)) + val |= CPTR_EL2_TSM; + + write_sysreg(val, cptr_el2); +} + +static inline void __deactivate_cptr_traps_vhe(struct kvm_vcpu *vcpu) +{ + u64 val = CPACR_EL1_FPEN; + + if (cpus_have_final_cap(ARM64_SVE)) + val |= CPACR_EL1_ZEN; + if (cpus_have_final_cap(ARM64_SME)) + val |= CPACR_EL1_SMEN; + + write_sysreg(val, cpacr_el1); +} + +static inline void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) +{ + if (has_vhe() || has_hvhe()) + __deactivate_cptr_traps_vhe(vcpu); + else + __deactivate_cptr_traps_nvhe(vcpu); +} + #define reg_to_fgt_masks(reg) \ ({ \ struct fgt_masks *m; \ @@ -456,7 +586,7 @@ static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu) */ if (vcpu_has_sve(vcpu)) { zcr_el1 = read_sysreg_el1(SYS_ZCR); - __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)) = zcr_el1; + __vcpu_assign_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu), zcr_el1); /* * The guest's state is always saved using the guest's max VL. @@ -486,11 +616,6 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu) */ if (system_supports_sve()) { __hyp_sve_save_host(); - - /* Re-enable SVE traps if not supported for the guest vcpu. */ - if (!vcpu_has_sve(vcpu)) - cpacr_clear_set(CPACR_EL1_ZEN, 0); - } else { __fpsimd_save_state(host_data_ptr(host_ctxt.fp_regs)); } @@ -541,10 +666,7 @@ static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) /* Valid trap. Switch the context: */ /* First disable enough traps to allow us to update the registers */ - if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve())) - cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN); - else - cpacr_clear_set(0, CPACR_EL1_FPEN); + __deactivate_cptr_traps(vcpu); isb(); /* Write out the host state if it's in the registers */ @@ -566,6 +688,13 @@ static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) *host_data_ptr(fp_owner) = FP_STATE_GUEST_OWNED; + /* + * Re-enable traps necessary for the current state of the guest, e.g. + * those enabled by a guest hypervisor. The ERET to the guest will + * provide the necessary context synchronization. + */ + __activate_cptr_traps(vcpu); + return true; } diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index b9cff893bbe0..4d0dbea4c56f 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -307,11 +307,11 @@ static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) vcpu->arch.ctxt.spsr_irq = read_sysreg(spsr_irq); vcpu->arch.ctxt.spsr_fiq = read_sysreg(spsr_fiq); - __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2); - __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2); + __vcpu_assign_sys_reg(vcpu, DACR32_EL2, read_sysreg(dacr32_el2)); + __vcpu_assign_sys_reg(vcpu, IFSR32_EL2, read_sysreg(ifsr32_el2)); if (has_vhe() || kvm_debug_regs_in_use(vcpu)) - __vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2); + __vcpu_assign_sys_reg(vcpu, DBGVCR32_EL2, read_sysreg(dbgvcr32_el2)); } static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 8e8848de4d47..3206b2c07f82 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -26,7 +26,7 @@ void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt); static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu) { - __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); + __vcpu_assign_sys_reg(vcpu, ZCR_EL1, read_sysreg_el1(SYS_ZCR)); /* * On saving/restoring guest sve state, always use the maximum VL for * the guest. The layout of the data when saving the sve state depends @@ -69,7 +69,10 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) if (!guest_owns_fp_regs()) return; - cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN); + /* + * Traps have been disabled by __deactivate_cptr_traps(), but there + * hasn't necessarily been a context synchronization event yet. + */ isb(); if (vcpu_has_sve(vcpu)) @@ -79,7 +82,7 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) has_fpmr = kvm_has_fpmr(kern_hyp_va(vcpu->kvm)); if (has_fpmr) - __vcpu_sys_reg(vcpu, FPMR) = read_sysreg_s(SYS_FPMR); + __vcpu_assign_sys_reg(vcpu, FPMR, read_sysreg_s(SYS_FPMR)); if (system_supports_sve()) __hyp_sve_restore_host(); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 95d7534c9679..8957734d6183 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -479,6 +479,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) { struct kvm_mem_range cur; kvm_pte_t pte; + u64 granule; s8 level; int ret; @@ -496,18 +497,21 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) return -EPERM; } - do { - u64 granule = kvm_granule_size(level); + for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) { + if (!kvm_level_supports_block_mapping(level)) + continue; + granule = kvm_granule_size(level); cur.start = ALIGN_DOWN(addr, granule); cur.end = cur.start + granule; - level++; - } while ((level <= KVM_PGTABLE_LAST_LEVEL) && - !(kvm_level_supports_block_mapping(level) && - range_included(&cur, range))); + if (!range_included(&cur, range)) + continue; + *range = cur; + return 0; + } - *range = cur; + WARN_ON(1); - return 0; + return -EINVAL; } int host_stage2_idmap_locked(phys_addr_t addr, u64 size, diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 73affe1333a4..0e752b515d0f 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -47,65 +47,6 @@ struct fgt_masks hdfgwtr2_masks; extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc); -static void __activate_cptr_traps(struct kvm_vcpu *vcpu) -{ - u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */ - - if (!guest_owns_fp_regs()) - __activate_traps_fpsimd32(vcpu); - - if (has_hvhe()) { - val |= CPACR_EL1_TTA; - - if (guest_owns_fp_regs()) { - val |= CPACR_EL1_FPEN; - if (vcpu_has_sve(vcpu)) - val |= CPACR_EL1_ZEN; - } - - write_sysreg(val, cpacr_el1); - } else { - val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1; - - /* - * Always trap SME since it's not supported in KVM. - * TSM is RES1 if SME isn't implemented. - */ - val |= CPTR_EL2_TSM; - - if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs()) - val |= CPTR_EL2_TZ; - - if (!guest_owns_fp_regs()) - val |= CPTR_EL2_TFP; - - write_sysreg(val, cptr_el2); - } -} - -static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) -{ - if (has_hvhe()) { - u64 val = CPACR_EL1_FPEN; - - if (cpus_have_final_cap(ARM64_SVE)) - val |= CPACR_EL1_ZEN; - if (cpus_have_final_cap(ARM64_SME)) - val |= CPACR_EL1_SMEN; - - write_sysreg(val, cpacr_el1); - } else { - u64 val = CPTR_NVHE_EL2_RES1; - - if (!cpus_have_final_cap(ARM64_SVE)) - val |= CPTR_EL2_TZ; - if (!cpus_have_final_cap(ARM64_SME)) - val |= CPTR_EL2_TSM; - - write_sysreg(val, cptr_el2); - } -} - static void __activate_traps(struct kvm_vcpu *vcpu) { ___activate_traps(vcpu, vcpu->arch.hcr_el2); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index c9b330dc2066..477f1580ffea 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -90,87 +90,6 @@ static u64 __compute_hcr(struct kvm_vcpu *vcpu) return hcr | (guest_hcr & ~NV_HCR_GUEST_EXCLUDE); } -static void __activate_cptr_traps(struct kvm_vcpu *vcpu) -{ - u64 cptr; - - /* - * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to - * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, - * except for some missing controls, such as TAM. - * In this case, CPTR_EL2.TAM has the same position with or without - * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM - * shift value for trapping the AMU accesses. - */ - u64 val = CPACR_EL1_TTA | CPTR_EL2_TAM; - - if (guest_owns_fp_regs()) { - val |= CPACR_EL1_FPEN; - if (vcpu_has_sve(vcpu)) - val |= CPACR_EL1_ZEN; - } else { - __activate_traps_fpsimd32(vcpu); - } - - if (!vcpu_has_nv(vcpu)) - goto write; - - /* - * The architecture is a bit crap (what a surprise): an EL2 guest - * writing to CPTR_EL2 via CPACR_EL1 can't set any of TCPAC or TTA, - * as they are RES0 in the guest's view. To work around it, trap the - * sucker using the very same bit it can't set... - */ - if (vcpu_el2_e2h_is_set(vcpu) && is_hyp_ctxt(vcpu)) - val |= CPTR_EL2_TCPAC; - - /* - * Layer the guest hypervisor's trap configuration on top of our own if - * we're in a nested context. - */ - if (is_hyp_ctxt(vcpu)) - goto write; - - cptr = vcpu_sanitised_cptr_el2(vcpu); - - /* - * Pay attention, there's some interesting detail here. - * - * The CPTR_EL2.xEN fields are 2 bits wide, although there are only two - * meaningful trap states when HCR_EL2.TGE = 0 (running a nested guest): - * - * - CPTR_EL2.xEN = x0, traps are enabled - * - CPTR_EL2.xEN = x1, traps are disabled - * - * In other words, bit[0] determines if guest accesses trap or not. In - * the interest of simplicity, clear the entire field if the guest - * hypervisor has traps enabled to dispel any illusion of something more - * complicated taking place. - */ - if (!(SYS_FIELD_GET(CPACR_EL1, FPEN, cptr) & BIT(0))) - val &= ~CPACR_EL1_FPEN; - if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0))) - val &= ~CPACR_EL1_ZEN; - - if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP)) - val |= cptr & CPACR_EL1_E0POE; - - val |= cptr & CPTR_EL2_TCPAC; - -write: - write_sysreg(val, cpacr_el1); -} - -static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) -{ - u64 val = CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN; - - if (cpus_have_final_cap(ARM64_SME)) - val |= CPACR_EL1_SMEN_EL1EN; - - write_sysreg(val, cpacr_el1); -} - static void __activate_traps(struct kvm_vcpu *vcpu) { u64 val; @@ -223,9 +142,9 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) */ val = read_sysreg_el0(SYS_CNTP_CVAL); if (map.direct_ptimer == vcpu_ptimer(vcpu)) - __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val; + __vcpu_assign_sys_reg(vcpu, CNTP_CVAL_EL0, val); if (map.direct_ptimer == vcpu_hptimer(vcpu)) - __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val; + __vcpu_assign_sys_reg(vcpu, CNTHP_CVAL_EL2, val); offset = read_sysreg_s(SYS_CNTPOFF_EL2); @@ -639,10 +558,10 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; - sysreg_save_host_state_vhe(host_ctxt); - fpsimd_lazy_switch_to_guest(vcpu); + sysreg_save_host_state_vhe(host_ctxt); + /* * Note that ARM erratum 1165522 requires us to configure both stage 1 * and stage 2 translation for the guest context before we clear @@ -667,15 +586,23 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) __deactivate_traps(vcpu); - fpsimd_lazy_switch_to_host(vcpu); - sysreg_restore_host_state_vhe(host_ctxt); + __debug_switch_to_host(vcpu); + + /* + * Ensure that all system register writes above have taken effect + * before returning to the host. In VHE mode, CPTR traps for + * FPSIMD/SVE/SME also apply to EL2, so FPSIMD/SVE/SME state must be + * manipulated after the ISB. + */ + isb(); + + fpsimd_lazy_switch_to_host(vcpu); + if (guest_owns_fp_regs()) __fpsimd_save_fpexc32(vcpu); - __debug_switch_to_host(vcpu); - return exit_code; } NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe); @@ -705,12 +632,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) */ local_daif_restore(DAIF_PROCCTX_NOIRQ); - /* - * When we exit from the guest we change a number of CPU configuration - * parameters, such as traps. We rely on the isb() in kvm_call_hyp*() - * to make sure these changes take effect before running the host or - * additional guests. - */ return ret; } diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index 3814b0b2c937..73e4bc7fde9e 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -18,17 +18,17 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) { /* These registers are common with EL1 */ - __vcpu_sys_reg(vcpu, PAR_EL1) = read_sysreg(par_el1); - __vcpu_sys_reg(vcpu, TPIDR_EL1) = read_sysreg(tpidr_el1); - - __vcpu_sys_reg(vcpu, ESR_EL2) = read_sysreg_el1(SYS_ESR); - __vcpu_sys_reg(vcpu, AFSR0_EL2) = read_sysreg_el1(SYS_AFSR0); - __vcpu_sys_reg(vcpu, AFSR1_EL2) = read_sysreg_el1(SYS_AFSR1); - __vcpu_sys_reg(vcpu, FAR_EL2) = read_sysreg_el1(SYS_FAR); - __vcpu_sys_reg(vcpu, MAIR_EL2) = read_sysreg_el1(SYS_MAIR); - __vcpu_sys_reg(vcpu, VBAR_EL2) = read_sysreg_el1(SYS_VBAR); - __vcpu_sys_reg(vcpu, CONTEXTIDR_EL2) = read_sysreg_el1(SYS_CONTEXTIDR); - __vcpu_sys_reg(vcpu, AMAIR_EL2) = read_sysreg_el1(SYS_AMAIR); + __vcpu_assign_sys_reg(vcpu, PAR_EL1, read_sysreg(par_el1)); + __vcpu_assign_sys_reg(vcpu, TPIDR_EL1, read_sysreg(tpidr_el1)); + + __vcpu_assign_sys_reg(vcpu, ESR_EL2, read_sysreg_el1(SYS_ESR)); + __vcpu_assign_sys_reg(vcpu, AFSR0_EL2, read_sysreg_el1(SYS_AFSR0)); + __vcpu_assign_sys_reg(vcpu, AFSR1_EL2, read_sysreg_el1(SYS_AFSR1)); + __vcpu_assign_sys_reg(vcpu, FAR_EL2, read_sysreg_el1(SYS_FAR)); + __vcpu_assign_sys_reg(vcpu, MAIR_EL2, read_sysreg_el1(SYS_MAIR)); + __vcpu_assign_sys_reg(vcpu, VBAR_EL2, read_sysreg_el1(SYS_VBAR)); + __vcpu_assign_sys_reg(vcpu, CONTEXTIDR_EL2, read_sysreg_el1(SYS_CONTEXTIDR)); + __vcpu_assign_sys_reg(vcpu, AMAIR_EL2, read_sysreg_el1(SYS_AMAIR)); /* * In VHE mode those registers are compatible between EL1 and EL2, @@ -46,21 +46,21 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) * are always trapped, ensuring that the in-memory * copy is always up-to-date. A small blessing... */ - __vcpu_sys_reg(vcpu, SCTLR_EL2) = read_sysreg_el1(SYS_SCTLR); - __vcpu_sys_reg(vcpu, TTBR0_EL2) = read_sysreg_el1(SYS_TTBR0); - __vcpu_sys_reg(vcpu, TTBR1_EL2) = read_sysreg_el1(SYS_TTBR1); - __vcpu_sys_reg(vcpu, TCR_EL2) = read_sysreg_el1(SYS_TCR); + __vcpu_assign_sys_reg(vcpu, SCTLR_EL2, read_sysreg_el1(SYS_SCTLR)); + __vcpu_assign_sys_reg(vcpu, TTBR0_EL2, read_sysreg_el1(SYS_TTBR0)); + __vcpu_assign_sys_reg(vcpu, TTBR1_EL2, read_sysreg_el1(SYS_TTBR1)); + __vcpu_assign_sys_reg(vcpu, TCR_EL2, read_sysreg_el1(SYS_TCR)); if (ctxt_has_tcrx(&vcpu->arch.ctxt)) { - __vcpu_sys_reg(vcpu, TCR2_EL2) = read_sysreg_el1(SYS_TCR2); + __vcpu_assign_sys_reg(vcpu, TCR2_EL2, read_sysreg_el1(SYS_TCR2)); if (ctxt_has_s1pie(&vcpu->arch.ctxt)) { - __vcpu_sys_reg(vcpu, PIRE0_EL2) = read_sysreg_el1(SYS_PIRE0); - __vcpu_sys_reg(vcpu, PIR_EL2) = read_sysreg_el1(SYS_PIR); + __vcpu_assign_sys_reg(vcpu, PIRE0_EL2, read_sysreg_el1(SYS_PIRE0)); + __vcpu_assign_sys_reg(vcpu, PIR_EL2, read_sysreg_el1(SYS_PIR)); } if (ctxt_has_s1poe(&vcpu->arch.ctxt)) - __vcpu_sys_reg(vcpu, POR_EL2) = read_sysreg_el1(SYS_POR); + __vcpu_assign_sys_reg(vcpu, POR_EL2, read_sysreg_el1(SYS_POR)); } /* @@ -70,13 +70,13 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) */ val = read_sysreg_el1(SYS_CNTKCTL); val &= CNTKCTL_VALID_BITS; - __vcpu_sys_reg(vcpu, CNTHCTL_EL2) &= ~CNTKCTL_VALID_BITS; - __vcpu_sys_reg(vcpu, CNTHCTL_EL2) |= val; + __vcpu_rmw_sys_reg(vcpu, CNTHCTL_EL2, &=, ~CNTKCTL_VALID_BITS); + __vcpu_rmw_sys_reg(vcpu, CNTHCTL_EL2, |=, val); } - __vcpu_sys_reg(vcpu, SP_EL2) = read_sysreg(sp_el1); - __vcpu_sys_reg(vcpu, ELR_EL2) = read_sysreg_el1(SYS_ELR); - __vcpu_sys_reg(vcpu, SPSR_EL2) = read_sysreg_el1(SYS_SPSR); + __vcpu_assign_sys_reg(vcpu, SP_EL2, read_sysreg(sp_el1)); + __vcpu_assign_sys_reg(vcpu, ELR_EL2, read_sysreg_el1(SYS_ELR)); + __vcpu_assign_sys_reg(vcpu, SPSR_EL2, read_sysreg_el1(SYS_SPSR)); } static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 4a53e4147fb0..dc1d26559bfa 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1402,6 +1402,21 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu) } } +#define has_tgran_2(__r, __sz) \ + ({ \ + u64 _s1, _s2, _mmfr0 = __r; \ + \ + _s2 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \ + TGRAN##__sz##_2, _mmfr0); \ + \ + _s1 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \ + TGRAN##__sz, _mmfr0); \ + \ + ((_s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_NI && \ + _s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz) || \ + (_s2 == ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz && \ + _s1 != ID_AA64MMFR0_EL1_TGRAN##__sz##_NI)); \ + }) /* * Our emulated CPU doesn't support all the possible features. For the * sake of simplicity (and probably mental sanity), wipe out a number @@ -1411,6 +1426,8 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu) */ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) { + u64 orig_val = val; + switch (reg) { case SYS_ID_AA64ISAR0_EL1: /* Support everything but TME */ @@ -1480,13 +1497,16 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) */ switch (PAGE_SIZE) { case SZ_4K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP); + if (has_tgran_2(orig_val, 4)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP); fallthrough; case SZ_16K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP); + if (has_tgran_2(orig_val, 16)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP); fallthrough; case SZ_64K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP); + if (has_tgran_2(orig_val, 64)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP); break; } @@ -1757,7 +1777,7 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu) out: for (enum vcpu_sysreg sr = __SANITISED_REG_START__; sr < NR_SYS_REGS; sr++) - (void)__vcpu_sys_reg(vcpu, sr); + __vcpu_rmw_sys_reg(vcpu, sr, |=, 0); return 0; } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 25c29107f13f..b03dbda7f1ab 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -178,7 +178,7 @@ static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force) val |= lower_32_bits(val); } - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); /* Recreate the perf event to reflect the updated sample_period */ kvm_pmu_create_perf_event(pmc); @@ -204,7 +204,7 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) void kvm_pmu_set_counter_value_user(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) { kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, select_idx)); - __vcpu_sys_reg(vcpu, counter_index_to_reg(select_idx)) = val; + __vcpu_assign_sys_reg(vcpu, counter_index_to_reg(select_idx), val); kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); } @@ -239,7 +239,7 @@ static void kvm_pmu_stop_counter(struct kvm_pmc *pmc) reg = counter_index_to_reg(pmc->idx); - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); kvm_pmu_release_perf_event(pmc); } @@ -503,14 +503,14 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1; if (!kvm_pmc_is_64bit(pmc)) reg = lower_32_bits(reg); - __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg; + __vcpu_assign_sys_reg(vcpu, counter_index_to_reg(i), reg); /* No overflow? move on */ if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg)) continue; /* Mark overflow */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, |=, BIT(i)); if (kvm_pmu_counter_can_chain(pmc)) kvm_pmu_counter_increment(vcpu, BIT(i + 1), @@ -556,7 +556,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, perf_event->attr.sample_period = period; perf_event->hw.sample_period = period; - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, |=, BIT(idx)); if (kvm_pmu_counter_can_chain(pmc)) kvm_pmu_counter_increment(vcpu, BIT(idx + 1), @@ -602,7 +602,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); /* The reset bits don't indicate any state, and shouldn't be saved. */ - __vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P); + __vcpu_assign_sys_reg(vcpu, PMCR_EL0, (val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P))); if (val & ARMV8_PMU_PMCR_C) kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); @@ -779,7 +779,7 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, u64 reg; reg = counter_index_to_evtreg(pmc->idx); - __vcpu_sys_reg(vcpu, reg) = data & kvm_pmu_evtyper_mask(vcpu->kvm); + __vcpu_assign_sys_reg(vcpu, reg, (data & kvm_pmu_evtyper_mask(vcpu->kvm))); kvm_pmu_create_perf_event(pmc); } @@ -914,9 +914,9 @@ void kvm_vcpu_reload_pmu(struct kvm_vcpu *vcpu) { u64 mask = kvm_pmu_implemented_counter_mask(vcpu); - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= mask; - __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= mask; - __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= mask; + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, &=, mask); + __vcpu_rmw_sys_reg(vcpu, PMINTENSET_EL1, &=, mask); + __vcpu_rmw_sys_reg(vcpu, PMCNTENSET_EL0, &=, mask); kvm_pmu_reprogram_counter_mask(vcpu, mask); } @@ -1038,7 +1038,7 @@ static void kvm_arm_set_nr_counters(struct kvm *kvm, unsigned int nr) u64 val = __vcpu_sys_reg(vcpu, MDCR_EL2); val &= ~MDCR_EL2_HPMN; val |= FIELD_PREP(MDCR_EL2_HPMN, kvm->arch.nr_pmu_counters); - __vcpu_sys_reg(vcpu, MDCR_EL2) = val; + __vcpu_assign_sys_reg(vcpu, MDCR_EL2, val); } } } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index a6cf2888d150..c20bd6f21e60 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -228,7 +228,7 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) * to reverse-translate virtual EL2 system registers for a * non-VHE guest hypervisor. */ - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); switch (reg) { case CNTHCTL_EL2: @@ -263,7 +263,7 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) return; memory_write: - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); } /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */ @@ -605,7 +605,7 @@ static int set_oslsr_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, if ((val ^ rd->val) & ~OSLSR_EL1_OSLK) return -EINVAL; - __vcpu_sys_reg(vcpu, rd->reg) = val; + __vcpu_assign_sys_reg(vcpu, rd->reg, val); return 0; } @@ -791,7 +791,7 @@ static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) mask |= GENMASK(n - 1, 0); reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= mask; + __vcpu_rmw_sys_reg(vcpu, r->reg, &=, mask); return __vcpu_sys_reg(vcpu, r->reg); } @@ -799,7 +799,7 @@ static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static u64 reset_pmevcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= GENMASK(31, 0); + __vcpu_rmw_sys_reg(vcpu, r->reg, &=, GENMASK(31, 0)); return __vcpu_sys_reg(vcpu, r->reg); } @@ -811,7 +811,7 @@ static u64 reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) return 0; reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= kvm_pmu_evtyper_mask(vcpu->kvm); + __vcpu_rmw_sys_reg(vcpu, r->reg, &=, kvm_pmu_evtyper_mask(vcpu->kvm)); return __vcpu_sys_reg(vcpu, r->reg); } @@ -819,7 +819,7 @@ static u64 reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static u64 reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= PMSELR_EL0_SEL_MASK; + __vcpu_rmw_sys_reg(vcpu, r->reg, &=, PMSELR_EL0_SEL_MASK); return __vcpu_sys_reg(vcpu, r->reg); } @@ -835,7 +835,7 @@ static u64 reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) * The value of PMCR.N field is included when the * vCPU register is read via kvm_vcpu_read_pmcr(). */ - __vcpu_sys_reg(vcpu, r->reg) = pmcr; + __vcpu_assign_sys_reg(vcpu, r->reg, pmcr); return __vcpu_sys_reg(vcpu, r->reg); } @@ -907,7 +907,7 @@ static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return false; if (p->is_write) - __vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval; + __vcpu_assign_sys_reg(vcpu, PMSELR_EL0, p->regval); else /* return PMSELR.SEL field */ p->regval = __vcpu_sys_reg(vcpu, PMSELR_EL0) @@ -1076,7 +1076,7 @@ static int set_pmreg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, u64 va { u64 mask = kvm_pmu_accessible_counter_mask(vcpu); - __vcpu_sys_reg(vcpu, r->reg) = val & mask; + __vcpu_assign_sys_reg(vcpu, r->reg, val & mask); kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); return 0; @@ -1103,10 +1103,10 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, val = p->regval & mask; if (r->Op2 & 0x1) /* accessing PMCNTENSET_EL0 */ - __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val; + __vcpu_rmw_sys_reg(vcpu, PMCNTENSET_EL0, |=, val); else /* accessing PMCNTENCLR_EL0 */ - __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val; + __vcpu_rmw_sys_reg(vcpu, PMCNTENSET_EL0, &=, ~val); kvm_pmu_reprogram_counter_mask(vcpu, val); } else { @@ -1129,10 +1129,10 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (r->Op2 & 0x1) /* accessing PMINTENSET_EL1 */ - __vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val; + __vcpu_rmw_sys_reg(vcpu, PMINTENSET_EL1, |=, val); else /* accessing PMINTENCLR_EL1 */ - __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val; + __vcpu_rmw_sys_reg(vcpu, PMINTENSET_EL1, &=, ~val); } else { p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1); } @@ -1151,10 +1151,10 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (p->is_write) { if (r->CRm & 0x2) /* accessing PMOVSSET_EL0 */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask); + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, |=, (p->regval & mask)); else /* accessing PMOVSCLR_EL0 */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask); + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, &=, ~(p->regval & mask)); } else { p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); } @@ -1185,8 +1185,8 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (!vcpu_mode_priv(vcpu)) return undef_access(vcpu, p, r); - __vcpu_sys_reg(vcpu, PMUSERENR_EL0) = - p->regval & ARMV8_PMU_USERENR_MASK; + __vcpu_assign_sys_reg(vcpu, PMUSERENR_EL0, + (p->regval & ARMV8_PMU_USERENR_MASK)); } else { p->regval = __vcpu_sys_reg(vcpu, PMUSERENR_EL0) & ARMV8_PMU_USERENR_MASK; @@ -1237,7 +1237,7 @@ static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; - __vcpu_sys_reg(vcpu, r->reg) = val; + __vcpu_assign_sys_reg(vcpu, r->reg, val); kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); return 0; @@ -2213,7 +2213,7 @@ static u64 reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) if (kvm_has_mte(vcpu->kvm)) clidr |= 2ULL << CLIDR_TTYPE_SHIFT(loc); - __vcpu_sys_reg(vcpu, r->reg) = clidr; + __vcpu_assign_sys_reg(vcpu, r->reg, clidr); return __vcpu_sys_reg(vcpu, r->reg); } @@ -2227,7 +2227,7 @@ static int set_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, if ((val & CLIDR_EL1_RES0) || (!(ctr_el0 & CTR_EL0_IDC) && idc)) return -EINVAL; - __vcpu_sys_reg(vcpu, rd->reg) = val; + __vcpu_assign_sys_reg(vcpu, rd->reg, val); return 0; } @@ -2404,7 +2404,7 @@ static bool access_sp_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (p->is_write) - __vcpu_sys_reg(vcpu, SP_EL1) = p->regval; + __vcpu_assign_sys_reg(vcpu, SP_EL1, p->regval); else p->regval = __vcpu_sys_reg(vcpu, SP_EL1); @@ -2428,7 +2428,7 @@ static bool access_spsr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (p->is_write) - __vcpu_sys_reg(vcpu, SPSR_EL1) = p->regval; + __vcpu_assign_sys_reg(vcpu, SPSR_EL1, p->regval); else p->regval = __vcpu_sys_reg(vcpu, SPSR_EL1); @@ -2440,7 +2440,7 @@ static bool access_cntkctl_el12(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (p->is_write) - __vcpu_sys_reg(vcpu, CNTKCTL_EL1) = p->regval; + __vcpu_assign_sys_reg(vcpu, CNTKCTL_EL1, p->regval); else p->regval = __vcpu_sys_reg(vcpu, CNTKCTL_EL1); @@ -2454,7 +2454,9 @@ static u64 reset_hcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1)) val |= HCR_E2H; - return __vcpu_sys_reg(vcpu, r->reg) = val; + __vcpu_assign_sys_reg(vcpu, r->reg, val); + + return __vcpu_sys_reg(vcpu, r->reg); } static unsigned int __el2_visibility(const struct kvm_vcpu *vcpu, @@ -2622,10 +2624,10 @@ static bool access_mdcr(struct kvm_vcpu *vcpu, */ if (hpmn > vcpu->kvm->arch.nr_pmu_counters) { hpmn = vcpu->kvm->arch.nr_pmu_counters; - u64_replace_bits(val, hpmn, MDCR_EL2_HPMN); + u64p_replace_bits(&val, hpmn, MDCR_EL2_HPMN); } - __vcpu_sys_reg(vcpu, MDCR_EL2) = val; + __vcpu_assign_sys_reg(vcpu, MDCR_EL2, val); /* * Request a reload of the PMU to enable/disable the counters @@ -2754,7 +2756,7 @@ static int set_imp_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, static u64 reset_mdcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - __vcpu_sys_reg(vcpu, r->reg) = vcpu->kvm->arch.nr_pmu_counters; + __vcpu_assign_sys_reg(vcpu, r->reg, vcpu->kvm->arch.nr_pmu_counters); return vcpu->kvm->arch.nr_pmu_counters; } @@ -4790,7 +4792,7 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) r->reset(vcpu, r); if (r->reg >= __SANITISED_REG_START__ && r->reg < NR_SYS_REGS) - (void)__vcpu_sys_reg(vcpu, r->reg); + __vcpu_rmw_sys_reg(vcpu, r->reg, |=, 0); } set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags); @@ -5012,7 +5014,7 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, if (r->set_user) { ret = (r->set_user)(vcpu, r, val); } else { - __vcpu_sys_reg(vcpu, r->reg) = val; + __vcpu_assign_sys_reg(vcpu, r->reg, val); ret = 0; } diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index cc6338d38766..ef97d9fc67cc 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -137,7 +137,7 @@ static inline u64 reset_unknown(struct kvm_vcpu *vcpu, { BUG_ON(!r->reg); BUG_ON(r->reg >= NR_SYS_REGS); - __vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL; + __vcpu_assign_sys_reg(vcpu, r->reg, 0x1de7ec7edbadc0deULL); return __vcpu_sys_reg(vcpu, r->reg); } @@ -145,7 +145,7 @@ static inline u64 reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { BUG_ON(!r->reg); BUG_ON(r->reg >= NR_SYS_REGS); - __vcpu_sys_reg(vcpu, r->reg) = r->val; + __vcpu_assign_sys_reg(vcpu, r->reg, r->val); return __vcpu_sys_reg(vcpu, r->reg); } diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c index 4f6954c30674..679aafe77de2 100644 --- a/arch/arm64/kvm/vgic/vgic-v3-nested.c +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c @@ -36,6 +36,11 @@ struct shadow_if { static DEFINE_PER_CPU(struct shadow_if, shadow_if); +static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx) +{ + return hweight16(shadow_if->lr_map & (BIT(idx) - 1)); +} + /* * Nesting GICv3 support * @@ -209,6 +214,29 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu) return reg; } +static u64 translate_lr_pintid(struct kvm_vcpu *vcpu, u64 lr) +{ + struct vgic_irq *irq; + + if (!(lr & ICH_LR_HW)) + return lr; + + /* We have the HW bit set, check for validity of pINTID */ + irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); + /* If there was no real mapping, nuke the HW bit */ + if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI) + lr &= ~ICH_LR_HW; + + /* Translate the virtual mapping to the real one, even if invalid */ + if (irq) { + lr &= ~ICH_LR_PHYS_ID_MASK; + lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid); + vgic_put_irq(vcpu->kvm, irq); + } + + return lr; +} + /* * For LRs which have HW bit set such as timer interrupts, we modify them to * have the host hardware interrupt number instead of the virtual one programmed @@ -217,58 +245,37 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu) static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu, struct vgic_v3_cpu_if *s_cpu_if) { - unsigned long lr_map = 0; - int index = 0; + struct shadow_if *shadow_if; + + shadow_if = container_of(s_cpu_if, struct shadow_if, cpuif); + shadow_if->lr_map = 0; for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) { u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i)); - struct vgic_irq *irq; if (!(lr & ICH_LR_STATE)) - lr = 0; - - if (!(lr & ICH_LR_HW)) - goto next; - - /* We have the HW bit set, check for validity of pINTID */ - irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); - if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI ) { - /* There was no real mapping, so nuke the HW bit */ - lr &= ~ICH_LR_HW; - if (irq) - vgic_put_irq(vcpu->kvm, irq); - goto next; - } - - /* Translate the virtual mapping to the real one */ - lr &= ~ICH_LR_PHYS_ID_MASK; - lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid); + continue; - vgic_put_irq(vcpu->kvm, irq); + lr = translate_lr_pintid(vcpu, lr); -next: - s_cpu_if->vgic_lr[index] = lr; - if (lr) { - lr_map |= BIT(i); - index++; - } + s_cpu_if->vgic_lr[hweight16(shadow_if->lr_map)] = lr; + shadow_if->lr_map |= BIT(i); } - container_of(s_cpu_if, struct shadow_if, cpuif)->lr_map = lr_map; - s_cpu_if->used_lrs = index; + s_cpu_if->used_lrs = hweight16(shadow_if->lr_map); } void vgic_v3_sync_nested(struct kvm_vcpu *vcpu) { struct shadow_if *shadow_if = get_shadow_if(); - int i, index = 0; + int i; for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) { u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i)); struct vgic_irq *irq; if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE)) - goto next; + continue; /* * If we had a HW lr programmed by the guest hypervisor, we @@ -277,15 +284,13 @@ void vgic_v3_sync_nested(struct kvm_vcpu *vcpu) */ irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */ - goto next; + continue; - lr = __gic_v3_get_lr(index); + lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i)); if (!(lr & ICH_LR_STATE)) irq->active = false; vgic_put_irq(vcpu->kvm, irq); - next: - index++; } } @@ -356,25 +361,23 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu) val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2); val &= ~ICH_HCR_EL2_EOIcount_MASK; val |= (s_cpu_if->vgic_hcr & ICH_HCR_EL2_EOIcount_MASK); - __vcpu_sys_reg(vcpu, ICH_HCR_EL2) = val; - __vcpu_sys_reg(vcpu, ICH_VMCR_EL2) = s_cpu_if->vgic_vmcr; + __vcpu_assign_sys_reg(vcpu, ICH_HCR_EL2, val); + __vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, s_cpu_if->vgic_vmcr); for (i = 0; i < 4; i++) { - __vcpu_sys_reg(vcpu, ICH_AP0RN(i)) = s_cpu_if->vgic_ap0r[i]; - __vcpu_sys_reg(vcpu, ICH_AP1RN(i)) = s_cpu_if->vgic_ap1r[i]; + __vcpu_assign_sys_reg(vcpu, ICH_AP0RN(i), s_cpu_if->vgic_ap0r[i]); + __vcpu_assign_sys_reg(vcpu, ICH_AP1RN(i), s_cpu_if->vgic_ap1r[i]); } for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) { val = __vcpu_sys_reg(vcpu, ICH_LRN(i)); val &= ~ICH_LR_STATE; - val |= s_cpu_if->vgic_lr[i] & ICH_LR_STATE; + val |= s_cpu_if->vgic_lr[lr_map_idx_to_shadow_idx(shadow_if, i)] & ICH_LR_STATE; - __vcpu_sys_reg(vcpu, ICH_LRN(i)) = val; - s_cpu_if->vgic_lr[i] = 0; + __vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val); } - shadow_if->lr_map = 0; vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0; } @@ -398,9 +401,7 @@ void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu) { bool level; - level = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En; - if (level) - level &= vgic_v3_get_misr(vcpu); + level = (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En) && vgic_v3_get_misr(vcpu); kvm_vgic_inject_irq(vcpu->kvm, vcpu, vcpu->kvm->arch.vgic.mi_intid, level, vcpu); } diff --git a/arch/arm64/lib/crypto/poly1305-glue.c b/arch/arm64/lib/crypto/poly1305-glue.c index 6a661cf04821..c9a74766785b 100644 --- a/arch/arm64/lib/crypto/poly1305-glue.c +++ b/arch/arm64/lib/crypto/poly1305-glue.c @@ -38,14 +38,14 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, unsigned int todo = min_t(unsigned int, len, SZ_4K); kernel_neon_begin(); - poly1305_blocks_neon(state, src, todo, 1); + poly1305_blocks_neon(state, src, todo, padbit); kernel_neon_end(); len -= todo; src += todo; } while (len); } else - poly1305_blocks(state, src, len, 1); + poly1305_blocks(state, src, len, padbit); } EXPORT_SYMBOL_GPL(poly1305_blocks_arch); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index ec0a337891dd..11eb8d1adc84 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -487,17 +487,29 @@ static void do_bad_area(unsigned long far, unsigned long esr, } } -static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma, - unsigned int mm_flags) +static bool fault_from_pkey(struct vm_area_struct *vma, unsigned int mm_flags) { - unsigned long iss2 = ESR_ELx_ISS2(esr); - if (!system_supports_poe()) return false; - if (esr_fsc_is_permission_fault(esr) && (iss2 & ESR_ELx_Overlay)) - return true; - + /* + * We do not check whether an Overlay fault has occurred because we + * cannot make a decision based solely on its value: + * + * - If Overlay is set, a fault did occur due to POE, but it may be + * spurious in those cases where we update POR_EL0 without ISB (e.g. + * on context-switch). We would then need to manually check POR_EL0 + * against vma_pkey(vma), which is exactly what + * arch_vma_access_permitted() does. + * + * - If Overlay is not set, we may still need to report a pkey fault. + * This is the case if an access was made within a mapping but with no + * page mapped, and POR_EL0 forbids the access (according to + * vma_pkey()). Such access will result in a SIGSEGV regardless + * because core code checks arch_vma_access_permitted(), but in order + * to report the correct error code - SEGV_PKUERR - we must handle + * that case here. + */ return !arch_vma_access_permitted(vma, mm_flags & FAULT_FLAG_WRITE, mm_flags & FAULT_FLAG_INSTRUCTION, @@ -635,7 +647,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, goto bad_area; } - if (fault_from_pkey(esr, vma, mm_flags)) { + if (fault_from_pkey(vma, mm_flags)) { pkey = vma_pkey(vma); vma_end_read(vma); fault = 0; @@ -679,7 +691,7 @@ retry: goto bad_area; } - if (fault_from_pkey(esr, vma, mm_flags)) { + if (fault_from_pkey(vma, mm_flags)) { pkey = vma_pkey(vma); mmap_read_unlock(mm); fault = 0; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 8fcf59ba39db..00ab1d648db6 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1305,7 +1305,8 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr) next = addr; end = addr + PUD_SIZE; do { - pmd_free_pte_page(pmdp, next); + if (pmd_present(pmdp_get(pmdp))) + pmd_free_pte_page(pmdp, next); } while (pmdp++, next += PMD_SIZE, next != end); pud_clear(pudp); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 80d470aa469d..54dccfd6aa11 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -518,7 +518,6 @@ alternative_else_nop_endif msr REG_PIR_EL1, x0 orr tcr2, tcr2, TCR2_EL1_PIE - msr REG_TCR2_EL1, x0 .Lskip_indirection: diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index b8378431aeff..5a394be09c35 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -200,7 +200,7 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/csky/kernel/Makefile b/arch/csky/kernel/Makefile index de1c3472e8f0..a406a4ac2378 100644 --- a/arch/csky/kernel/Makefile +++ b/arch/csky/kernel/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -extra-y := vmlinux.lds +always-$(KBUILD_BUILTIN) := vmlinux.lds obj-y += head.o entry.o atomic.o signal.o traps.o irq.o time.o vdso.o vdso/ obj-y += power.o syscall.o syscall_table.o setup.o diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index 9fbdfdbc539f..fbf24d1d1ca6 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -387,7 +387,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) (((type & 0x1f) << 1) | \ ((offset & 0x3ffff8) << 10) | ((offset & 0x7) << 7)) }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/hexagon/kernel/Makefile b/arch/hexagon/kernel/Makefile index 3fdf937eb572..8e0fb4a62315 100644 --- a/arch/hexagon/kernel/Makefile +++ b/arch/hexagon/kernel/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -extra-y := vmlinux.lds +always-$(KBUILD_BUILTIN) := vmlinux.lds obj-y += head.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 1a2cf012b8f2..4b19f93379a1 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -69,6 +69,7 @@ config LOONGARCH select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select ARCH_SUPPORTS_LTO_CLANG select ARCH_SUPPORTS_LTO_CLANG_THIN + select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_SUPPORTS_RT select ARCH_USE_BUILTIN_BSWAP @@ -123,6 +124,7 @@ config LOONGARCH select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_STACKLEAK select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD @@ -187,6 +189,7 @@ config LOONGARCH select MODULES_USE_ELF_RELA if MODULES select NEED_PER_CPU_EMBED_FIRST_CHUNK select NEED_PER_CPU_PAGE_FIRST_CHUNK + select NUMA_MEMBLKS if NUMA select OF select OF_EARLY_FLATTREE select PCI @@ -456,6 +459,15 @@ config SCHED_SMT Improves scheduler's performance when there are multiple threads in one physical core. +config SCHED_MC + bool "Multi-core scheduler support" + depends on SMP + default y + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. + config SMP bool "Multi-Processing support" help @@ -485,10 +497,10 @@ config HOTPLUG_CPU Say N if you want to disable CPU hotplug. config NR_CPUS - int "Maximum number of CPUs (2-256)" - range 2 256 + int "Maximum number of CPUs (2-2048)" + range 2 2048 + default "2048" depends on SMP - default "64" help This allows you to specify the maximum number of CPUs which this kernel will support. diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 0304eabbe606..b0703a4e02a2 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -181,11 +181,14 @@ vmlinux.elf vmlinux.efi vmlinuz.efi: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(bootvars-y) $(boot)/$@ install: - $(Q)install -D -m 755 $(KBUILD_IMAGE) $(INSTALL_PATH)/$(image-name-y)-$(KERNELRELEASE) - $(Q)install -D -m 644 .config $(INSTALL_PATH)/config-$(KERNELRELEASE) - $(Q)install -D -m 644 System.map $(INSTALL_PATH)/System.map-$(KERNELRELEASE) + $(call cmd,install) define archhelp - echo ' install - install kernel into $(INSTALL_PATH)' + echo ' vmlinux.elf - Uncompressed ELF kernel image (arch/loongarch/boot/vmlinux.elf)' + echo ' vmlinux.efi - Uncompressed EFI kernel image (arch/loongarch/boot/vmlinux.efi)' + echo ' vmlinuz.efi - GZIP/ZSTD-compressed EFI kernel image (arch/loongarch/boot/vmlinuz.efi)' + echo ' Default when CONFIG_EFI_ZBOOT=y' + echo ' install - Install kernel using (your) ~/bin/$(INSTALLKERNEL) or' + echo ' (distribution) /sbin/$(INSTALLKERNEL) or install.sh to $$(INSTALL_PATH)' echo endef diff --git a/arch/loongarch/boot/dts/loongson-2k0500.dtsi b/arch/loongarch/boot/dts/loongson-2k0500.dtsi index 3b38ff8853a7..760c60eebb89 100644 --- a/arch/loongarch/boot/dts/loongson-2k0500.dtsi +++ b/arch/loongarch/boot/dts/loongson-2k0500.dtsi @@ -169,6 +169,166 @@ interrupts = <3>; }; + pwm@1ff5c000 { + compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x1ff5c000 0x0 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <24 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@1ff5c010 { + compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x1ff5c010 0x0 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <24 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@1ff5c020 { + compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x1ff5c020 0x0 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <24 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@1ff5c030 { + compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x1ff5c030 0x0 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <24 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@1ff5c040 { + compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x1ff5c040 0x0 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <25 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@1ff5c050 { + compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x1ff5c050 0x0 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <25 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@1ff5c060 { + compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x1ff5c060 0x0 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <25 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@1ff5c070 { + compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x1ff5c070 0x0 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <25 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@1ff5c080 { + compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x1ff5c080 0x0 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <26 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@1ff5c090 { + compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x1ff5c090 0x0 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <26 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@1ff5c0a0 { + compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x1ff5c0a0 0x0 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <26 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@1ff5c0b0 { + compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x1ff5c0b0 0x0 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <26 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@1ff5c0c0 { + compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x1ff5c0c0 0x0 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <27 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@1ff5c0d0 { + compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x1ff5c0d0 0x0 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <27 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@1ff5c0e0 { + compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x1ff5c0e0 0x0 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <27 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@1ff5c0f0 { + compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x1ff5c0f0 0x0 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <27 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + gmac0: ethernet@1f020000 { compatible = "snps,dwmac-3.70a"; reg = <0x0 0x1f020000 0x0 0x10000>; diff --git a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts index 3514ea78f525..78ea995abf1c 100644 --- a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts +++ b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts @@ -5,6 +5,7 @@ /dts-v1/; +#include "dt-bindings/thermal/thermal.h" #include "loongson-2k1000.dtsi" / { @@ -38,6 +39,13 @@ linux,cma-default; }; }; + + fan0: pwm-fan { + compatible = "pwm-fan"; + cooling-levels = <255 153 85 25>; + pwms = <&pwm1 0 100000 0>; + #cooling-cells = <2>; + }; }; &gmac0 { @@ -92,6 +100,22 @@ #size-cells = <0>; }; +&pwm1 { + status = "okay"; + + pinctrl-0 = <&pwm1_pins_default>; + pinctrl-names = "default"; +}; + +&cpu_thermal { + cooling-maps { + map0 { + trip = <&cpu_alert>; + cooling-device = <&fan0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; + }; + }; +}; + &ehci0 { status = "okay"; }; diff --git a/arch/loongarch/boot/dts/loongson-2k1000.dtsi b/arch/loongarch/boot/dts/loongson-2k1000.dtsi index 8dff2aa52417..1da3beb00f0e 100644 --- a/arch/loongarch/boot/dts/loongson-2k1000.dtsi +++ b/arch/loongarch/boot/dts/loongson-2k1000.dtsi @@ -68,7 +68,7 @@ }; thermal-zones { - cpu-thermal { + cpu_thermal: cpu-thermal { polling-delay-passive = <1000>; polling-delay = <5000>; thermal-sensors = <&tsensor 0>; @@ -322,6 +322,46 @@ status = "disabled"; }; + pwm@1fe22000 { + compatible = "loongson,ls2k1000-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x1fe22000 0x0 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <24 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm1: pwm@1fe22010 { + compatible = "loongson,ls2k1000-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x1fe22010 0x0 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <25 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@1fe22020 { + compatible = "loongson,ls2k1000-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x1fe22020 0x0 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <26 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@1fe22030 { + compatible = "loongson,ls2k1000-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x1fe22030 0x0 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <27 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + pmc: power-management@1fe27000 { compatible = "loongson,ls2k1000-pmc", "loongson,ls2k0500-pmc", "syscon"; reg = <0x0 0x1fe27000 0x0 0x58>; diff --git a/arch/loongarch/boot/dts/loongson-2k2000.dtsi b/arch/loongarch/boot/dts/loongson-2k2000.dtsi index b4ff55a33e90..9e0411f2754c 100644 --- a/arch/loongarch/boot/dts/loongson-2k2000.dtsi +++ b/arch/loongarch/boot/dts/loongson-2k2000.dtsi @@ -165,6 +165,66 @@ interrupt-parent = <&eiointc>; }; + pwm@100a0000 { + compatible = "loongson,ls2k2000-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x100a0000 0x0 0x10>; + interrupt-parent = <&pic>; + interrupts = <24 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_MISC_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@100a0100 { + compatible = "loongson,ls2k2000-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x100a0100 0x0 0x10>; + interrupt-parent = <&pic>; + interrupts = <25 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_MISC_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@100a0200 { + compatible = "loongson,ls2k2000-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x100a0200 0x0 0x10>; + interrupt-parent = <&pic>; + interrupts = <26 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_MISC_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@100a0300 { + compatible = "loongson,ls2k2000-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x100a0300 0x0 0x10>; + interrupt-parent = <&pic>; + interrupts = <27 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_MISC_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@100a0400 { + compatible = "loongson,ls2k2000-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x100a0400 0x0 0x10>; + interrupt-parent = <&pic>; + interrupts = <38 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_MISC_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + + pwm@100a0500 { + compatible = "loongson,ls2k2000-pwm", "loongson,ls7a-pwm"; + reg = <0x0 0x100a0500 0x0 0x10>; + interrupt-parent = <&pic>; + interrupts = <39 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_MISC_CLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + rtc0: rtc@100d0100 { compatible = "loongson,ls2k2000-rtc", "loongson,ls7a-rtc"; reg = <0x0 0x100d0100 0x0 0x100>; diff --git a/arch/loongarch/boot/install.sh b/arch/loongarch/boot/install.sh new file mode 100755 index 000000000000..daac197d3315 --- /dev/null +++ b/arch/loongarch/boot/install.sh @@ -0,0 +1,56 @@ +#!/bin/sh +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1995 by Linus Torvalds +# +# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin +# Adapted from code in arch/i386/boot/install.sh by Russell King +# +# "make install" script for the LoongArch Linux port +# +# Arguments: +# $1 - kernel version +# $2 - kernel image file +# $3 - kernel map file +# $4 - default install path (blank if root directory) + +set -e + +case "${2##*/}" in +vmlinux.elf) + echo "Installing uncompressed vmlinux.elf kernel" + base=vmlinux + ;; +vmlinux.efi) + echo "Installing uncompressed vmlinux.efi kernel" + base=vmlinux + ;; +vmlinuz.efi) + echo "Installing gzip/zstd compressed vmlinuz.efi kernel" + base=vmlinuz + ;; +*) + echo "Warning: Unexpected kernel type" + exit 1 + ;; +esac + +if [ -f $4/$base-$1 ]; then + mv $4/$base-$1 $4/$base-$1.old +fi +cat $2 > $4/$base-$1 + +# Install system map file +if [ -f $4/System.map-$1 ]; then + mv $4/System.map-$1 $4/System.map-$1.old +fi +cp $3 $4/System.map-$1 + +# Install kernel config file +if [ -f $4/config-$1 ]; then + mv $4/config-$1 $4/config-$1.old +fi +cp .config $4/config-$1 diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h index 313f66f7913a..7376840fa9f7 100644 --- a/arch/loongarch/include/asm/acpi.h +++ b/arch/loongarch/include/asm/acpi.h @@ -33,7 +33,7 @@ static inline bool acpi_has_cpu_in_madt(void) return true; } -#define MAX_CORE_PIC 256 +#define MAX_CORE_PIC 2048 extern struct list_head acpi_wakeup_device_list; extern struct acpi_madt_core_pic acpi_core_pic[MAX_CORE_PIC]; diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h index fe198b473f84..e739dbc6329d 100644 --- a/arch/loongarch/include/asm/addrspace.h +++ b/arch/loongarch/include/asm/addrspace.h @@ -18,12 +18,12 @@ /* * This gives the physical RAM offset. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifndef PHYS_OFFSET #define PHYS_OFFSET _UL(0) #endif extern unsigned long vm_map_base; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #ifndef IO_BASE #define IO_BASE CSR_DMW0_BASE @@ -66,7 +66,7 @@ extern unsigned long vm_map_base; #define FIXADDR_TOP ((unsigned long)(long)(int)0xfffe0000) #endif -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define _ATYPE_ #define _ATYPE32_ #define _ATYPE64_ @@ -85,7 +85,7 @@ extern unsigned long vm_map_base; /* * 32/64-bit LoongArch address spaces */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define _ACAST32_ #define _ACAST64_ #else diff --git a/arch/loongarch/include/asm/alternative-asm.h b/arch/loongarch/include/asm/alternative-asm.h index ff3d10ac393f..7dc29bd9b2f0 100644 --- a/arch/loongarch/include/asm/alternative-asm.h +++ b/arch/loongarch/include/asm/alternative-asm.h @@ -2,7 +2,7 @@ #ifndef _ASM_ALTERNATIVE_ASM_H #define _ASM_ALTERNATIVE_ASM_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include <asm/asm.h> @@ -77,6 +77,6 @@ .previous .endm -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_ALTERNATIVE_ASM_H */ diff --git a/arch/loongarch/include/asm/alternative.h b/arch/loongarch/include/asm/alternative.h index cee7b29785ab..b5bae21fb3c8 100644 --- a/arch/loongarch/include/asm/alternative.h +++ b/arch/loongarch/include/asm/alternative.h @@ -2,7 +2,7 @@ #ifndef _ASM_ALTERNATIVE_H #define _ASM_ALTERNATIVE_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <linux/stddef.h> @@ -106,6 +106,6 @@ extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ (asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_ALTERNATIVE_H */ diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h index df05005f2b80..d60bdf2e6377 100644 --- a/arch/loongarch/include/asm/asm-extable.h +++ b/arch/loongarch/include/asm/asm-extable.h @@ -7,7 +7,7 @@ #define EX_TYPE_UACCESS_ERR_ZERO 2 #define EX_TYPE_BPF 3 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ .pushsection __ex_table, "a"; \ @@ -22,7 +22,7 @@ __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0) .endm -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #include <linux/bits.h> #include <linux/stringify.h> @@ -60,6 +60,6 @@ #define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/loongarch/include/asm/asm.h b/arch/loongarch/include/asm/asm.h index f591b3245def..f018d26fc995 100644 --- a/arch/loongarch/include/asm/asm.h +++ b/arch/loongarch/include/asm/asm.h @@ -110,7 +110,7 @@ #define LONG_SRA srai.w #define LONG_SRAV sra.w -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define LONG .word #endif #define LONGSIZE 4 @@ -131,7 +131,7 @@ #define LONG_SRA srai.d #define LONG_SRAV sra.d -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define LONG .dword #endif #define LONGSIZE 8 @@ -158,7 +158,7 @@ #define PTR_SCALESHIFT 2 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define PTR .word #endif #define PTRSIZE 4 @@ -181,7 +181,7 @@ #define PTR_SCALESHIFT 3 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define PTR .dword #endif #define PTRSIZE 8 diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index 98cf4d7b4b0a..dfb982fe8701 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -46,7 +46,7 @@ #define PRID_PRODUCT_MASK 0x0fff -#if !defined(__ASSEMBLY__) +#if !defined(__ASSEMBLER__) enum cpu_type_enum { CPU_UNKNOWN, @@ -55,7 +55,7 @@ enum cpu_type_enum { CPU_LAST }; -#endif /* !__ASSEMBLY */ +#endif /* !__ASSEMBLER__ */ /* * ISA Level encodings diff --git a/arch/loongarch/include/asm/entry-common.h b/arch/loongarch/include/asm/entry-common.h index 0fe2a098ded9..099132980dc9 100644 --- a/arch/loongarch/include/asm/entry-common.h +++ b/arch/loongarch/include/asm/entry-common.h @@ -2,12 +2,6 @@ #ifndef ARCH_LOONGARCH_ENTRY_COMMON_H #define ARCH_LOONGARCH_ENTRY_COMMON_H -#include <linux/sched.h> -#include <linux/processor.h> - -static inline bool on_thread_stack(void) -{ - return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1)); -} +#include <asm/stacktrace.h> /* For on_thread_stack() */ #endif diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index 6e0a99763a9a..f4caaf764f9e 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -14,7 +14,7 @@ #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifndef CONFIG_DYNAMIC_FTRACE @@ -84,7 +84,7 @@ __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/loongarch/include/asm/gpr-num.h b/arch/loongarch/include/asm/gpr-num.h index 996038da806d..af95b941f48b 100644 --- a/arch/loongarch/include/asm/gpr-num.h +++ b/arch/loongarch/include/asm/gpr-num.h @@ -2,7 +2,7 @@ #ifndef __ASM_GPR_NUM_H #define __ASM_GPR_NUM_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .equ .L__gpr_num_zero, 0 .irp num,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 @@ -25,7 +25,7 @@ .equ .L__gpr_num_$s\num, 23 + \num .endr -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #define __DEFINE_ASM_GPR_NUMS \ " .equ .L__gpr_num_zero, 0\n" \ @@ -47,6 +47,6 @@ " .equ .L__gpr_num_$s\\num, 23 + \\num\n" \ " .endr\n" \ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_GPR_NUM_H */ diff --git a/arch/loongarch/include/asm/irqflags.h b/arch/loongarch/include/asm/irqflags.h index 319a8c616f1f..620163628a7f 100644 --- a/arch/loongarch/include/asm/irqflags.h +++ b/arch/loongarch/include/asm/irqflags.h @@ -5,7 +5,7 @@ #ifndef _ASM_IRQFLAGS_H #define _ASM_IRQFLAGS_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/compiler.h> #include <linux/stringify.h> @@ -14,40 +14,48 @@ static inline void arch_local_irq_enable(void) { u32 flags = CSR_CRMD_IE; + register u32 mask asm("t0") = CSR_CRMD_IE; + __asm__ __volatile__( "csrxchg %[val], %[mask], %[reg]\n\t" : [val] "+r" (flags) - : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD) + : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD) : "memory"); } static inline void arch_local_irq_disable(void) { u32 flags = 0; + register u32 mask asm("t0") = CSR_CRMD_IE; + __asm__ __volatile__( "csrxchg %[val], %[mask], %[reg]\n\t" : [val] "+r" (flags) - : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD) + : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD) : "memory"); } static inline unsigned long arch_local_irq_save(void) { u32 flags = 0; + register u32 mask asm("t0") = CSR_CRMD_IE; + __asm__ __volatile__( "csrxchg %[val], %[mask], %[reg]\n\t" : [val] "+r" (flags) - : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD) + : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD) : "memory"); return flags; } static inline void arch_local_irq_restore(unsigned long flags) { + register u32 mask asm("t0") = CSR_CRMD_IE; + __asm__ __volatile__( "csrxchg %[val], %[mask], %[reg]\n\t" : [val] "+r" (flags) - : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD) + : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD) : "memory"); } @@ -72,6 +80,6 @@ static inline int arch_irqs_disabled(void) return arch_irqs_disabled_flags(arch_local_save_flags()); } -#endif /* #ifndef __ASSEMBLY__ */ +#endif /* #ifndef __ASSEMBLER__ */ #endif /* _ASM_IRQFLAGS_H */ diff --git a/arch/loongarch/include/asm/jump_label.h b/arch/loongarch/include/asm/jump_label.h index 8a924bd69d19..4000c7603d8e 100644 --- a/arch/loongarch/include/asm/jump_label.h +++ b/arch/loongarch/include/asm/jump_label.h @@ -7,7 +7,7 @@ #ifndef __ASM_JUMP_LABEL_H #define __ASM_JUMP_LABEL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> @@ -50,5 +50,5 @@ l_yes: return true; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_JUMP_LABEL_H */ diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h index 7f52bd31b9d4..62f139a9c87d 100644 --- a/arch/loongarch/include/asm/kasan.h +++ b/arch/loongarch/include/asm/kasan.h @@ -2,7 +2,7 @@ #ifndef __ASM_KASAN_H #define __ASM_KASAN_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/linkage.h> #include <linux/mmzone.h> diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 52651aa0e583..a0994d226eff 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -9,15 +9,15 @@ #include <linux/linkage.h> #include <linux/types.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <larchintrin.h> /* CPUCFG */ #define read_cpucfg(reg) __cpucfg(reg) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* LoongArch Registers */ #define REG_ZERO 0x0 @@ -53,7 +53,7 @@ #define REG_S7 0x1e #define REG_S8 0x1f -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* Bit fields for CPUCFG registers */ #define LOONGARCH_CPUCFG0 0x0 @@ -171,7 +171,7 @@ * SW emulation for KVM hypervirsor, see arch/loongarch/include/uapi/asm/kvm_para.h */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* CSR */ #define csr_read32(reg) __csrrd_w(reg) @@ -187,7 +187,7 @@ #define iocsr_write32(val, reg) __iocsrwr_w(val, reg) #define iocsr_write64(val, reg) __iocsrwr_d(val, reg) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* CSR register number */ @@ -411,8 +411,8 @@ /* Config CSR registers */ #define LOONGARCH_CSR_CPUID 0x20 /* CPU core id */ -#define CSR_CPUID_COREID_WIDTH 9 -#define CSR_CPUID_COREID _ULCAST_(0x1ff) +#define CSR_CPUID_COREID_WIDTH 11 +#define CSR_CPUID_COREID _ULCAST_(0x7ff) #define LOONGARCH_CSR_PRCFG1 0x21 /* Config1 */ #define CSR_CONF1_VSMAX_SHIFT 12 @@ -1195,7 +1195,7 @@ #define LOONGARCH_IOCSR_EXTIOI_ROUTE_BASE 0x1c00 #define IOCSR_EXTIOI_VECTOR_NUM 256 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static __always_inline u64 drdtime(void) { @@ -1357,7 +1357,7 @@ __BUILD_CSR_OP(tlbidx) #define clear_csr_estat(val) \ csr_xchg32(~(val), val, LOONGARCH_CSR_ESTAT) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* Generic EntryLo bit definitions */ #define ENTRYLO_V (_ULCAST_(1) << 0) diff --git a/arch/loongarch/include/asm/numa.h b/arch/loongarch/include/asm/numa.h index b5f9de9f102e..bbf9f70bd25f 100644 --- a/arch/loongarch/include/asm/numa.h +++ b/arch/loongarch/include/asm/numa.h @@ -22,20 +22,6 @@ extern int numa_off; extern s16 __cpuid_to_node[CONFIG_NR_CPUS]; extern nodemask_t numa_nodes_parsed __initdata; -struct numa_memblk { - u64 start; - u64 end; - int nid; -}; - -#define NR_NODE_MEMBLKS (MAX_NUMNODES*2) -struct numa_meminfo { - int nr_blks; - struct numa_memblk blk[NR_NODE_MEMBLKS]; -}; - -extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); - extern void __init early_numa_add_cpu(int cpuid, s16 node); extern void numa_add_cpu(unsigned int cpu); extern void numa_remove_cpu(unsigned int cpu); diff --git a/arch/loongarch/include/asm/orc_types.h b/arch/loongarch/include/asm/orc_types.h index caf1f71a1057..d5fa98d1d177 100644 --- a/arch/loongarch/include/asm/orc_types.h +++ b/arch/loongarch/include/asm/orc_types.h @@ -34,7 +34,7 @@ #define ORC_TYPE_REGS 3 #define ORC_TYPE_REGS_PARTIAL 4 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * This struct is more or less a vastly simplified version of the DWARF Call * Frame Information standard. It contains only the necessary parts of DWARF @@ -53,6 +53,6 @@ struct orc_entry { unsigned int type:3; unsigned int signal:1; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ORC_TYPES_H */ diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index 7368f12b7cb1..a3aaf34fba16 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -15,7 +15,7 @@ #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/kernel.h> #include <linux/pfn.h> @@ -110,6 +110,6 @@ extern int __virt_addr_valid(volatile void *kaddr); #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_PAGE_H */ diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h index 45bfc65a0c9f..7bbfb04a54cc 100644 --- a/arch/loongarch/include/asm/pgtable-bits.h +++ b/arch/loongarch/include/asm/pgtable-bits.h @@ -92,7 +92,7 @@ #define PAGE_KERNEL_WUC __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \ _PAGE_GLOBAL | _PAGE_KERN | _CACHE_WUC) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL_SUC) @@ -127,6 +127,6 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot) return __pgprot(prot); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_PGTABLE_BITS_H */ diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index a3f17914dbab..f2aeff544cee 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -55,7 +55,7 @@ #define USER_PTRS_PER_PGD ((TASK_SIZE64 / PGDIR_SIZE)?(TASK_SIZE64 / PGDIR_SIZE):1) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/mm_types.h> #include <linux/mmzone.h> @@ -301,7 +301,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) #define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) }) #define __swp_entry_to_pmd(x) ((pmd_t) { (x).val | _PAGE_HUGE }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } @@ -618,6 +618,6 @@ static inline long pmd_protnone(pmd_t pmd) #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_PGTABLE_H */ diff --git a/arch/loongarch/include/asm/prefetch.h b/arch/loongarch/include/asm/prefetch.h index 1672262a5e2e..0b168cdaae9a 100644 --- a/arch/loongarch/include/asm/prefetch.h +++ b/arch/loongarch/include/asm/prefetch.h @@ -8,7 +8,7 @@ #define Pref_Load 0 #define Pref_Store 8 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro __pref hint addr #ifdef CONFIG_CPU_HAS_PREFETCH diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index b87d1d5e5890..3a47f52959a8 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -25,6 +25,7 @@ extern int smp_num_siblings; extern int num_processors; extern int disabled_cpus; extern cpumask_t cpu_sibling_map[]; +extern cpumask_t cpu_llc_shared_map[]; extern cpumask_t cpu_core_map[]; extern cpumask_t cpu_foreign_map[]; @@ -38,7 +39,7 @@ int loongson_cpu_disable(void); void loongson_cpu_die(unsigned int cpu); #endif -static inline void plat_smp_setup(void) +static inline void __init plat_smp_setup(void) { loongson_smp_setup(); } diff --git a/arch/loongarch/include/asm/sparsemem.h b/arch/loongarch/include/asm/sparsemem.h index 8d4af6aff8a8..4501efac1a87 100644 --- a/arch/loongarch/include/asm/sparsemem.h +++ b/arch/loongarch/include/asm/sparsemem.h @@ -21,11 +21,6 @@ #define VMEMMAP_SIZE 0 /* 1, For FLATMEM; 2, For SPARSEMEM without VMEMMAP. */ #endif -#ifdef CONFIG_MEMORY_HOTPLUG -int memory_add_physaddr_to_nid(u64 addr); -#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid -#endif - #define INIT_MEMBLOCK_RESERVED_REGIONS (INIT_MEMBLOCK_REGIONS + NR_CPUS) #endif /* _LOONGARCH_SPARSEMEM_H */ diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h index 66736837085b..3eda298702b1 100644 --- a/arch/loongarch/include/asm/stackframe.h +++ b/arch/loongarch/include/asm/stackframe.h @@ -57,6 +57,12 @@ jirl zero, \temp1, 0xc .endm + .macro STACKLEAK_ERASE +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK + bl stackleak_erase_on_task_stack +#endif + .endm + .macro BACKUP_T0T1 csrwr t0, EXCEPTION_KS0 csrwr t1, EXCEPTION_KS1 diff --git a/arch/loongarch/include/asm/stacktrace.h b/arch/loongarch/include/asm/stacktrace.h index fc8b64773794..5c8be156567c 100644 --- a/arch/loongarch/include/asm/stacktrace.h +++ b/arch/loongarch/include/asm/stacktrace.h @@ -31,6 +31,11 @@ bool in_irq_stack(unsigned long stack, struct stack_info *info); bool in_task_stack(unsigned long stack, struct task_struct *task, struct stack_info *info); int get_stack_info(unsigned long stack, struct task_struct *task, struct stack_info *info); +static __always_inline bool on_thread_stack(void) +{ + return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1)); +} + #define STR_LONG_L __stringify(LONG_L) #define STR_LONG_S __stringify(LONG_S) #define STR_LONGSIZE __stringify(LONGSIZE) diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h index 4f5a9441754e..9dfa2ef00816 100644 --- a/arch/loongarch/include/asm/thread_info.h +++ b/arch/loongarch/include/asm/thread_info.h @@ -10,7 +10,7 @@ #ifdef __KERNEL__ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/processor.h> @@ -53,7 +53,7 @@ static inline struct thread_info *current_thread_info(void) register unsigned long current_stack_pointer __asm__("$sp"); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* thread information allocation */ #define THREAD_SIZE SZ_16K diff --git a/arch/loongarch/include/asm/topology.h b/arch/loongarch/include/asm/topology.h index 50273c9187d0..f06e7ff25bb7 100644 --- a/arch/loongarch/include/asm/topology.h +++ b/arch/loongarch/include/asm/topology.h @@ -19,17 +19,22 @@ extern int pcibus_to_node(struct pci_bus *); #define cpumask_of_pcibus(bus) (cpu_online_mask) -extern unsigned char node_distances[MAX_NUMNODES][MAX_NUMNODES]; - -void numa_set_distance(int from, int to, int distance); - -#define node_distance(from, to) (node_distances[(from)][(to)]) +int __node_distance(int from, int to); +#define node_distance(from, to) __node_distance(from, to) #else #define pcibus_to_node(bus) 0 #endif #ifdef CONFIG_SMP +/* + * Return cpus that shares the last level cache. + */ +static inline const struct cpumask *cpu_coregroup_mask(int cpu) +{ + return &cpu_llc_shared_map[cpu]; +} + #define topology_physical_package_id(cpu) (cpu_data[cpu].package) #define topology_core_id(cpu) (cpu_data[cpu].core) #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) diff --git a/arch/loongarch/include/asm/types.h b/arch/loongarch/include/asm/types.h index baf15a0dcf8b..0edd731f3d6a 100644 --- a/arch/loongarch/include/asm/types.h +++ b/arch/loongarch/include/asm/types.h @@ -8,7 +8,7 @@ #include <asm-generic/int-ll64.h> #include <uapi/asm/types.h> -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define _ULCAST_ #define _U64CAST_ #else diff --git a/arch/loongarch/include/asm/unwind_hints.h b/arch/loongarch/include/asm/unwind_hints.h index 2c68bc72736c..16c7f7e465a0 100644 --- a/arch/loongarch/include/asm/unwind_hints.h +++ b/arch/loongarch/include/asm/unwind_hints.h @@ -5,7 +5,7 @@ #include <linux/objtool.h> #include <asm/orc_types.h> -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro UNWIND_HINT_UNDEFINED UNWIND_HINT type=UNWIND_HINT_TYPE_UNDEFINED @@ -23,7 +23,7 @@ UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_CALL .endm -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ #define UNWIND_HINT_SAVE \ UNWIND_HINT(UNWIND_HINT_TYPE_SAVE, 0, 0, 0) @@ -31,6 +31,6 @@ #define UNWIND_HINT_RESTORE \ UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_LOONGARCH_UNWIND_HINTS_H */ diff --git a/arch/loongarch/include/asm/vdso/arch_data.h b/arch/loongarch/include/asm/vdso/arch_data.h index 322d0a5f1c84..395ec223bcbe 100644 --- a/arch/loongarch/include/asm/vdso/arch_data.h +++ b/arch/loongarch/include/asm/vdso/arch_data.h @@ -7,7 +7,7 @@ #ifndef _VDSO_ARCH_DATA_H #define _VDSO_ARCH_DATA_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/asm.h> #include <asm/vdso.h> @@ -20,6 +20,6 @@ struct vdso_arch_data { struct vdso_pcpu_data pdata[NR_CPUS]; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/loongarch/include/asm/vdso/getrandom.h b/arch/loongarch/include/asm/vdso/getrandom.h index 48c43f55b039..2ff05003c6e7 100644 --- a/arch/loongarch/include/asm/vdso/getrandom.h +++ b/arch/loongarch/include/asm/vdso/getrandom.h @@ -5,7 +5,7 @@ #ifndef __ASM_VDSO_GETRANDOM_H #define __ASM_VDSO_GETRANDOM_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/unistd.h> #include <asm/vdso/vdso.h> @@ -20,7 +20,7 @@ static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, uns asm volatile( " syscall 0\n" - : "+r" (ret) + : "=r" (ret) : "r" (nr), "r" (buffer), "r" (len), "r" (flags) : "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8", "memory"); @@ -28,6 +28,6 @@ static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, uns return ret; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/loongarch/include/asm/vdso/gettimeofday.h b/arch/loongarch/include/asm/vdso/gettimeofday.h index 88cfcf133116..dcafabca9bb6 100644 --- a/arch/loongarch/include/asm/vdso/gettimeofday.h +++ b/arch/loongarch/include/asm/vdso/gettimeofday.h @@ -7,7 +7,7 @@ #ifndef __ASM_VDSO_GETTIMEOFDAY_H #define __ASM_VDSO_GETTIMEOFDAY_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/unistd.h> #include <asm/vdso/vdso.h> @@ -25,7 +25,7 @@ static __always_inline long gettimeofday_fallback( asm volatile( " syscall 0\n" - : "+r" (ret) + : "=r" (ret) : "r" (nr), "r" (tv), "r" (tz) : "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8", "memory"); @@ -44,7 +44,7 @@ static __always_inline long clock_gettime_fallback( asm volatile( " syscall 0\n" - : "+r" (ret) + : "=r" (ret) : "r" (nr), "r" (clkid), "r" (ts) : "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8", "memory"); @@ -63,7 +63,7 @@ static __always_inline int clock_getres_fallback( asm volatile( " syscall 0\n" - : "+r" (ret) + : "=r" (ret) : "r" (nr), "r" (clkid), "r" (ts) : "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8", "memory"); @@ -89,6 +89,6 @@ static inline bool loongarch_vdso_hres_capable(void) } #define __arch_vdso_hres_capable loongarch_vdso_hres_capable -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/loongarch/include/asm/vdso/processor.h b/arch/loongarch/include/asm/vdso/processor.h index ef5770b343a0..1e255373b0b8 100644 --- a/arch/loongarch/include/asm/vdso/processor.h +++ b/arch/loongarch/include/asm/vdso/processor.h @@ -5,10 +5,10 @@ #ifndef __ASM_VDSO_PROCESSOR_H #define __ASM_VDSO_PROCESSOR_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define cpu_relax() barrier() -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_VDSO_PROCESSOR_H */ diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h index 50c65fb29daf..04bd2d452876 100644 --- a/arch/loongarch/include/asm/vdso/vdso.h +++ b/arch/loongarch/include/asm/vdso/vdso.h @@ -7,7 +7,7 @@ #ifndef _ASM_VDSO_VDSO_H #define _ASM_VDSO_VDSO_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/asm.h> #include <asm/page.h> @@ -16,6 +16,6 @@ #define VVAR_SIZE (VDSO_NR_PAGES << PAGE_SHIFT) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/loongarch/include/asm/vdso/vsyscall.h b/arch/loongarch/include/asm/vdso/vsyscall.h index 1140b54b4bc8..558eb9dfda52 100644 --- a/arch/loongarch/include/asm/vdso/vsyscall.h +++ b/arch/loongarch/include/asm/vdso/vsyscall.h @@ -2,13 +2,13 @@ #ifndef __ASM_VDSO_VSYSCALL_H #define __ASM_VDSO_VSYSCALL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <vdso/datapage.h> /* The asm-generic header needs to be included after the definitions above */ #include <asm-generic/vdso/vsyscall.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index f9dcaa60033d..6f5a4574a911 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -5,7 +5,7 @@ OBJECT_FILES_NON_STANDARD_head.o := y -extra-y := vmlinux.lds +always-$(KBUILD_BUILTIN) := vmlinux.lds obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \ traps.o irq.o idle.o process.o dma.o mem.o reset.o switch.o \ diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 1120ac2824f6..1367ca759468 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -10,6 +10,7 @@ #include <linux/init.h> #include <linux/acpi.h> #include <linux/efi-bgrt.h> +#include <linux/export.h> #include <linux/irq.h> #include <linux/irqdomain.h> #include <linux/memblock.h> @@ -244,22 +245,6 @@ fdt_earlycon: #ifdef CONFIG_ACPI_NUMA -static __init int setup_node(int pxm) -{ - return acpi_map_pxm_to_node(pxm); -} - -void __init numa_set_distance(int from, int to, int distance) -{ - if ((u8)distance != distance || (from == to && distance != LOCAL_DISTANCE)) { - pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n", - from, to, distance); - return; - } - - node_distances[from][to] = distance; -} - /* Callback for Proximity Domain -> CPUID mapping */ void __init acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) @@ -280,7 +265,41 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) pxm |= (pa->proximity_domain_hi[1] << 16); pxm |= (pa->proximity_domain_hi[2] << 24); } - node = setup_node(pxm); + node = acpi_map_pxm_to_node(pxm); + if (node < 0) { + pr_err("SRAT: Too many proximity domains %x\n", pxm); + bad_srat(); + return; + } + + if (pa->apic_id >= CONFIG_NR_CPUS) { + pr_info("SRAT: PXM %u -> CPU 0x%02x -> Node %u skipped apicid that is too big\n", + pxm, pa->apic_id, node); + return; + } + + early_numa_add_cpu(pa->apic_id, node); + + set_cpuid_to_node(pa->apic_id, node); + node_set(node, numa_nodes_parsed); + pr_info("SRAT: PXM %u -> CPU 0x%02x -> Node %u\n", pxm, pa->apic_id, node); +} + +void __init +acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) +{ + int pxm, node; + + if (srat_disabled()) + return; + if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) { + bad_srat(); + return; + } + if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0) + return; + pxm = pa->proximity_domain; + node = acpi_map_pxm_to_node(pxm); if (node < 0) { pr_err("SRAT: Too many proximity domains %x\n", pxm); bad_srat(); diff --git a/arch/loongarch/kernel/alternative.c b/arch/loongarch/kernel/alternative.c index 4ad13847e962..0e0c766df1e3 100644 --- a/arch/loongarch/kernel/alternative.c +++ b/arch/loongarch/kernel/alternative.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <linux/export.h> #include <linux/mm.h> #include <linux/module.h> #include <asm/alternative.h> diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c index de21e72759ee..860a3bc030e0 100644 --- a/arch/loongarch/kernel/efi.c +++ b/arch/loongarch/kernel/efi.c @@ -144,6 +144,18 @@ void __init efi_init(void) if (efi_memmap_init_early(&data) < 0) panic("Unable to map EFI memory map.\n"); + /* + * Reserve the physical memory region occupied by the EFI + * memory map table (header + descriptors). This is crucial + * for kdump, as the kdump kernel relies on this original + * memmap passed by the bootloader. Without reservation, + * this region could be overwritten by the primary kernel. + * Also, set the EFI_PRESERVE_BS_REGIONS flag to indicate that + * critical boot services code/data regions like this are preserved. + */ + memblock_reserve((phys_addr_t)boot_memmap, sizeof(*tbl) + data.size); + set_bit(EFI_PRESERVE_BS_REGIONS, &efi.flags); + early_memunmap(tbl, sizeof(*tbl)); } diff --git a/arch/loongarch/kernel/elf.c b/arch/loongarch/kernel/elf.c index 0fa81ced28dc..3d98c6aa00db 100644 --- a/arch/loongarch/kernel/elf.c +++ b/arch/loongarch/kernel/elf.c @@ -6,7 +6,6 @@ #include <linux/binfmts.h> #include <linux/elf.h> -#include <linux/export.h> #include <linux/sched.h> #include <asm/cpu-features.h> diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S index 2abc29e57381..47e1db9a1ce4 100644 --- a/arch/loongarch/kernel/entry.S +++ b/arch/loongarch/kernel/entry.S @@ -73,6 +73,7 @@ SYM_CODE_START(handle_syscall) move a0, sp bl do_syscall + STACKLEAK_ERASE RESTORE_ALL_AND_RET SYM_CODE_END(handle_syscall) _ASM_NOKPROBE(handle_syscall) @@ -81,6 +82,7 @@ SYM_CODE_START(ret_from_fork_asm) UNWIND_HINT_REGS move a1, sp bl ret_from_fork + STACKLEAK_ERASE RESTORE_STATIC RESTORE_SOME RESTORE_SP_AND_RET @@ -92,6 +94,7 @@ SYM_CODE_START(ret_from_kernel_thread_asm) move a2, s0 move a3, s1 bl ret_from_kernel_thread + STACKLEAK_ERASE RESTORE_STATIC RESTORE_SOME RESTORE_SP_AND_RET diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c index 4c476904227f..141b49bd989c 100644 --- a/arch/loongarch/kernel/kfpu.c +++ b/arch/loongarch/kernel/kfpu.c @@ -4,6 +4,7 @@ */ #include <linux/cpu.h> +#include <linux/export.h> #include <linux/init.h> #include <asm/fpu.h> #include <asm/smp.h> diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index 30a72fd528c0..d6e73e8f9c0b 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -11,6 +11,7 @@ #include <linux/mmzone.h> #include <linux/export.h> #include <linux/nodemask.h> +#include <linux/numa_memblks.h> #include <linux/swap.h> #include <linux/memblock.h> #include <linux/pfn.h> @@ -27,10 +28,6 @@ #include <asm/time.h> int numa_off; -unsigned char node_distances[MAX_NUMNODES][MAX_NUMNODES]; -EXPORT_SYMBOL(node_distances); - -static struct numa_meminfo numa_meminfo; cpumask_t cpus_on_node[MAX_NUMNODES]; cpumask_t phys_cpus_on_node[MAX_NUMNODES]; EXPORT_SYMBOL(cpus_on_node); @@ -43,8 +40,6 @@ s16 __cpuid_to_node[CONFIG_NR_CPUS] = { }; EXPORT_SYMBOL(__cpuid_to_node); -nodemask_t numa_nodes_parsed __initdata; - #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); @@ -145,48 +140,6 @@ void numa_remove_cpu(unsigned int cpu) cpumask_clear_cpu(cpu, &cpus_on_node[nid]); } -static int __init numa_add_memblk_to(int nid, u64 start, u64 end, - struct numa_meminfo *mi) -{ - /* ignore zero length blks */ - if (start == end) - return 0; - - /* whine about and ignore invalid blks */ - if (start > end || nid < 0 || nid >= MAX_NUMNODES) { - pr_warn("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n", - nid, start, end - 1); - return 0; - } - - if (mi->nr_blks >= NR_NODE_MEMBLKS) { - pr_err("NUMA: too many memblk ranges\n"); - return -EINVAL; - } - - mi->blk[mi->nr_blks].start = PFN_ALIGN(start); - mi->blk[mi->nr_blks].end = PFN_ALIGN(end - PAGE_SIZE + 1); - mi->blk[mi->nr_blks].nid = nid; - mi->nr_blks++; - return 0; -} - -/** - * numa_add_memblk - Add one numa_memblk to numa_meminfo - * @nid: NUMA node ID of the new memblk - * @start: Start address of the new memblk - * @end: End address of the new memblk - * - * Add a new memblk to the default numa_meminfo. - * - * RETURNS: - * 0 on success, -errno on failure. - */ -int __init numa_add_memblk(int nid, u64 start, u64 end) -{ - return numa_add_memblk_to(nid, start, end, &numa_meminfo); -} - static void __init node_mem_init(unsigned int node) { unsigned long start_pfn, end_pfn; @@ -205,18 +158,6 @@ static void __init node_mem_init(unsigned int node) #ifdef CONFIG_ACPI_NUMA -static void __init add_node_intersection(u32 node, u64 start, u64 size, u32 type) -{ - static unsigned long num_physpages; - - num_physpages += (size >> PAGE_SHIFT); - pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n", - node, type, start, size); - pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", - start >> PAGE_SHIFT, (start + size) >> PAGE_SHIFT, num_physpages); - memblock_set_node(start, size, &memblock.memory, node); -} - /* * add_numamem_region * @@ -228,28 +169,21 @@ static void __init add_node_intersection(u32 node, u64 start, u64 size, u32 type */ static void __init add_numamem_region(u64 start, u64 end, u32 type) { - u32 i; - u64 ofs = start; + u32 node = pa_to_nid(start); + u64 size = end - start; + static unsigned long num_physpages; if (start >= end) { pr_debug("Invalid region: %016llx-%016llx\n", start, end); return; } - for (i = 0; i < numa_meminfo.nr_blks; i++) { - struct numa_memblk *mb = &numa_meminfo.blk[i]; - - if (ofs > mb->end) - continue; - - if (end > mb->end) { - add_node_intersection(mb->nid, ofs, mb->end - ofs, type); - ofs = mb->end; - } else { - add_node_intersection(mb->nid, ofs, end - ofs, type); - break; - } - } + num_physpages += (size >> PAGE_SHIFT); + pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n", + node, type, start, size); + pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", + start >> PAGE_SHIFT, end >> PAGE_SHIFT, num_physpages); + memblock_set_node(start, size, &memblock.memory, node); } static void __init init_node_memblock(void) @@ -291,24 +225,6 @@ static void __init init_node_memblock(void) } } -static void __init numa_default_distance(void) -{ - int row, col; - - for (row = 0; row < MAX_NUMNODES; row++) - for (col = 0; col < MAX_NUMNODES; col++) { - if (col == row) - node_distances[row][col] = LOCAL_DISTANCE; - else - /* We assume that one node per package here! - * - * A SLIT should be used for multiple nodes - * per package to override default setting. - */ - node_distances[row][col] = REMOTE_DISTANCE; - } -} - /* * fake_numa_init() - For Non-ACPI systems * Return: 0 on success, -errno on failure. @@ -333,11 +249,11 @@ int __init init_numa_memory(void) for (i = 0; i < NR_CPUS; i++) set_cpuid_to_node(i, NUMA_NO_NODE); - numa_default_distance(); + numa_reset_distance(); nodes_clear(numa_nodes_parsed); nodes_clear(node_possible_map); nodes_clear(node_online_map); - memset(&numa_meminfo, 0, sizeof(numa_meminfo)); + WARN_ON(memblock_clear_hotplug(0, PHYS_ADDR_MAX)); /* Parse SRAT and SLIT if provided by firmware. */ ret = acpi_disabled ? fake_numa_init() : acpi_numa_init(); diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c index e5a39bbad078..b1b51f920b23 100644 --- a/arch/loongarch/kernel/paravirt.c +++ b/arch/loongarch/kernel/paravirt.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/export.h> #include <linux/types.h> #include <linux/interrupt.h> #include <linux/irq_work.h> diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 4b24589c0b56..46036d98da75 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -46,6 +46,10 @@ EXPORT_SYMBOL(__cpu_logical_map); cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); +/* Representing the last level cache shared map of each logical CPU */ +cpumask_t cpu_llc_shared_map[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(cpu_llc_shared_map); + /* Representing the core map of multi-core chips of each logical CPU */ cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); @@ -63,6 +67,9 @@ EXPORT_SYMBOL(cpu_foreign_map); /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; +/* representing cpus for which llc shared maps can be computed */ +static cpumask_t cpu_llc_shared_setup_map; + /* representing cpus for which core maps can be computed */ static cpumask_t cpu_core_setup_map; @@ -102,6 +109,34 @@ static inline void set_cpu_core_map(int cpu) } } +static inline void set_cpu_llc_shared_map(int cpu) +{ + int i; + + cpumask_set_cpu(cpu, &cpu_llc_shared_setup_map); + + for_each_cpu(i, &cpu_llc_shared_setup_map) { + if (cpu_to_node(cpu) == cpu_to_node(i)) { + cpumask_set_cpu(i, &cpu_llc_shared_map[cpu]); + cpumask_set_cpu(cpu, &cpu_llc_shared_map[i]); + } + } +} + +static inline void clear_cpu_llc_shared_map(int cpu) +{ + int i; + + for_each_cpu(i, &cpu_llc_shared_setup_map) { + if (cpu_to_node(cpu) == cpu_to_node(i)) { + cpumask_clear_cpu(i, &cpu_llc_shared_map[cpu]); + cpumask_clear_cpu(cpu, &cpu_llc_shared_map[i]); + } + } + + cpumask_clear_cpu(cpu, &cpu_llc_shared_setup_map); +} + static inline void set_cpu_sibling_map(int cpu) { int i; @@ -406,6 +441,7 @@ int loongson_cpu_disable(void) #endif set_cpu_online(cpu, false); clear_cpu_sibling_map(cpu); + clear_cpu_llc_shared_map(cpu); calculate_cpu_foreign_map(); local_irq_save(flags); irq_migrate_all_off_this_cpu(); @@ -572,6 +608,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) current_thread_info()->cpu = 0; loongson_prepare_cpus(max_cpus); set_cpu_sibling_map(0); + set_cpu_llc_shared_map(0); set_cpu_core_map(0); calculate_cpu_foreign_map(); #ifndef CONFIG_HOTPLUG_CPU @@ -613,6 +650,7 @@ asmlinkage void start_secondary(void) loongson_init_secondary(); set_cpu_sibling_map(cpu); + set_cpu_llc_shared_map(cpu); set_cpu_core_map(cpu); notify_cpu_starting(cpu); diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index bc75a3a69fc8..367906b10f81 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -102,7 +102,7 @@ static int constant_timer_next_event(unsigned long delta, struct clock_event_dev return 0; } -static unsigned long __init get_loops_per_jiffy(void) +static unsigned long get_loops_per_jiffy(void) { unsigned long lpj = (unsigned long)const_clock_freq; diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 47fc2de6d150..3d9be6ca7ec5 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/kexec.h> #include <linux/module.h> +#include <linux/export.h> #include <linux/extable.h> #include <linux/mm.h> #include <linux/sched/mm.h> diff --git a/arch/loongarch/kernel/unwind_guess.c b/arch/loongarch/kernel/unwind_guess.c index 98379b7d4147..08d7951b2f60 100644 --- a/arch/loongarch/kernel/unwind_guess.c +++ b/arch/loongarch/kernel/unwind_guess.c @@ -3,6 +3,7 @@ * Copyright (C) 2022 Loongson Technology Corporation Limited */ #include <asm/unwind.h> +#include <linux/export.h> unsigned long unwind_get_return_address(struct unwind_state *state) { diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c index d623935a7547..0005be49b056 100644 --- a/arch/loongarch/kernel/unwind_orc.c +++ b/arch/loongarch/kernel/unwind_orc.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only -#include <linux/objtool.h> +#include <linux/export.h> #include <linux/module.h> +#include <linux/objtool.h> #include <linux/sort.h> #include <asm/exception.h> #include <asm/orc_header.h> diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index 929ae240280a..729e775bd40d 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -3,6 +3,7 @@ * Copyright (C) 2022 Loongson Technology Corporation Limited */ #include <linux/cpumask.h> +#include <linux/export.h> #include <linux/ftrace.h> #include <linux/kallsyms.h> diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c index 10cf1608c7b3..7b888d9085a0 100644 --- a/arch/loongarch/kernel/vdso.c +++ b/arch/loongarch/kernel/vdso.c @@ -105,7 +105,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) vdso_addr = data_addr + VVAR_SIZE; vma = _install_special_mapping(mm, vdso_addr, info->size, - VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, + VM_READ | VM_EXEC | + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | + VM_SEALED_SYSMAP, &info->code_mapping); if (IS_ERR(vma)) { ret = PTR_ERR(vma); diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c index f39929d7bf8a..a75f865d6fb9 100644 --- a/arch/loongarch/kvm/intc/eiointc.c +++ b/arch/loongarch/kvm/intc/eiointc.c @@ -9,7 +9,8 @@ static void eiointc_set_sw_coreisr(struct loongarch_eiointc *s) { - int ipnum, cpu, irq_index, irq_mask, irq; + int ipnum, cpu, cpuid, irq_index, irq_mask, irq; + struct kvm_vcpu *vcpu; for (irq = 0; irq < EIOINTC_IRQS; irq++) { ipnum = s->ipmap.reg_u8[irq / 32]; @@ -20,7 +21,12 @@ static void eiointc_set_sw_coreisr(struct loongarch_eiointc *s) irq_index = irq / 32; irq_mask = BIT(irq & 0x1f); - cpu = s->coremap.reg_u8[irq]; + cpuid = s->coremap.reg_u8[irq]; + vcpu = kvm_get_vcpu_by_cpuid(s->kvm, cpuid); + if (!vcpu) + continue; + + cpu = vcpu->vcpu_id; if (!!(s->coreisr.reg_u32[cpu][irq_index] & irq_mask)) set_bit(irq, s->sw_coreisr[cpu][ipnum]); else @@ -66,20 +72,25 @@ static void eiointc_update_irq(struct loongarch_eiointc *s, int irq, int level) } static inline void eiointc_update_sw_coremap(struct loongarch_eiointc *s, - int irq, void *pvalue, u32 len, bool notify) + int irq, u64 val, u32 len, bool notify) { - int i, cpu; - u64 val = *(u64 *)pvalue; + int i, cpu, cpuid; + struct kvm_vcpu *vcpu; for (i = 0; i < len; i++) { - cpu = val & 0xff; + cpuid = val & 0xff; val = val >> 8; if (!(s->status & BIT(EIOINTC_ENABLE_CPU_ENCODE))) { - cpu = ffs(cpu) - 1; - cpu = (cpu >= 4) ? 0 : cpu; + cpuid = ffs(cpuid) - 1; + cpuid = (cpuid >= 4) ? 0 : cpuid; } + vcpu = kvm_get_vcpu_by_cpuid(s->kvm, cpuid); + if (!vcpu) + continue; + + cpu = vcpu->vcpu_id; if (s->sw_coremap[irq + i] == cpu) continue; @@ -305,6 +316,11 @@ static int kvm_eiointc_read(struct kvm_vcpu *vcpu, return -EINVAL; } + if (addr & (len - 1)) { + kvm_err("%s: eiointc not aligned addr %llx len %d\n", __func__, addr, len); + return -EINVAL; + } + vcpu->kvm->stat.eiointc_read_exits++; spin_lock_irqsave(&eiointc->lock, flags); switch (len) { @@ -398,7 +414,7 @@ static int loongarch_eiointc_writeb(struct kvm_vcpu *vcpu, irq = offset - EIOINTC_COREMAP_START; index = irq; s->coremap.reg_u8[index] = data; - eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + eiointc_update_sw_coremap(s, irq, data, sizeof(data), true); break; default: ret = -EINVAL; @@ -436,17 +452,16 @@ static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu, break; case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: index = (offset - EIOINTC_ENABLE_START) >> 1; - old_data = s->enable.reg_u32[index]; + old_data = s->enable.reg_u16[index]; s->enable.reg_u16[index] = data; /* * 1: enable irq. * update irq when isr is set. */ data = s->enable.reg_u16[index] & ~old_data & s->isr.reg_u16[index]; - index = index << 1; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index + i, mask, 1); + eiointc_enable_irq(vcpu, s, index * 2 + i, mask, 1); } /* * 0: disable irq. @@ -455,7 +470,7 @@ static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu, data = ~s->enable.reg_u16[index] & old_data & s->isr.reg_u16[index]; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index, mask, 0); + eiointc_enable_irq(vcpu, s, index * 2 + i, mask, 0); } break; case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: @@ -484,7 +499,7 @@ static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu, irq = offset - EIOINTC_COREMAP_START; index = irq >> 1; s->coremap.reg_u16[index] = data; - eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + eiointc_update_sw_coremap(s, irq, data, sizeof(data), true); break; default: ret = -EINVAL; @@ -529,10 +544,9 @@ static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu, * update irq when isr is set. */ data = s->enable.reg_u32[index] & ~old_data & s->isr.reg_u32[index]; - index = index << 2; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index + i, mask, 1); + eiointc_enable_irq(vcpu, s, index * 4 + i, mask, 1); } /* * 0: disable irq. @@ -541,7 +555,7 @@ static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu, data = ~s->enable.reg_u32[index] & old_data & s->isr.reg_u32[index]; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index, mask, 0); + eiointc_enable_irq(vcpu, s, index * 4 + i, mask, 0); } break; case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: @@ -570,7 +584,7 @@ static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu, irq = offset - EIOINTC_COREMAP_START; index = irq >> 2; s->coremap.reg_u32[index] = data; - eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + eiointc_update_sw_coremap(s, irq, data, sizeof(data), true); break; default: ret = -EINVAL; @@ -615,10 +629,9 @@ static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu, * update irq when isr is set. */ data = s->enable.reg_u64[index] & ~old_data & s->isr.reg_u64[index]; - index = index << 3; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index + i, mask, 1); + eiointc_enable_irq(vcpu, s, index * 8 + i, mask, 1); } /* * 0: disable irq. @@ -627,7 +640,7 @@ static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu, data = ~s->enable.reg_u64[index] & old_data & s->isr.reg_u64[index]; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index, mask, 0); + eiointc_enable_irq(vcpu, s, index * 8 + i, mask, 0); } break; case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: @@ -656,7 +669,7 @@ static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu, irq = offset - EIOINTC_COREMAP_START; index = irq >> 3; s->coremap.reg_u64[index] = data; - eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + eiointc_update_sw_coremap(s, irq, data, sizeof(data), true); break; default: ret = -EINVAL; @@ -679,6 +692,11 @@ static int kvm_eiointc_write(struct kvm_vcpu *vcpu, return -EINVAL; } + if (addr & (len - 1)) { + kvm_err("%s: eiointc not aligned addr %llx len %d\n", __func__, addr, len); + return -EINVAL; + } + vcpu->kvm->stat.eiointc_write_exits++; spin_lock_irqsave(&eiointc->lock, flags); switch (len) { @@ -787,7 +805,7 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev, int ret = 0; unsigned long flags; unsigned long type = (unsigned long)attr->attr; - u32 i, start_irq; + u32 i, start_irq, val; void __user *data; struct loongarch_eiointc *s = dev->kvm->arch.eiointc; @@ -795,8 +813,14 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev, spin_lock_irqsave(&s->lock, flags); switch (type) { case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU: - if (copy_from_user(&s->num_cpu, data, 4)) + if (copy_from_user(&val, data, 4)) ret = -EFAULT; + else { + if (val >= EIOINTC_ROUTE_MAX_VCPUS) + ret = -EINVAL; + else + s->num_cpu = val; + } break; case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE: if (copy_from_user(&s->features, data, 4)) @@ -809,7 +833,7 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev, for (i = 0; i < (EIOINTC_IRQS / 4); i++) { start_irq = i * 4; eiointc_update_sw_coremap(s, start_irq, - (void *)&s->coremap.reg_u32[i], sizeof(u32), false); + s->coremap.reg_u32[i], sizeof(u32), false); } break; default: @@ -824,7 +848,7 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev, struct kvm_device_attr *attr, bool is_write) { - int addr, cpuid, offset, ret = 0; + int addr, cpu, offset, ret = 0; unsigned long flags; void *p = NULL; void __user *data; @@ -832,7 +856,7 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev, s = dev->kvm->arch.eiointc; addr = attr->attr; - cpuid = addr >> 16; + cpu = addr >> 16; addr &= 0xffff; data = (void __user *)attr->addr; switch (addr) { @@ -857,8 +881,11 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev, p = &s->isr.reg_u32[offset]; break; case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + if (cpu >= s->num_cpu) + return -EINVAL; + offset = (addr - EIOINTC_COREISR_START) / 4; - p = &s->coreisr.reg_u32[cpuid][offset]; + p = &s->coreisr.reg_u32[cpu][offset]; break; case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: offset = (addr - EIOINTC_COREMAP_START) / 4; @@ -899,9 +926,15 @@ static int kvm_eiointc_sw_status_access(struct kvm_device *dev, data = (void __user *)attr->addr; switch (addr) { case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_NUM_CPU: + if (is_write) + return ret; + p = &s->num_cpu; break; case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_FEATURE: + if (is_write) + return ret; + p = &s->features; break; case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_STATE: diff --git a/arch/loongarch/lib/crc32-loongarch.c b/arch/loongarch/lib/crc32-loongarch.c index b37cd8537b45..db22c2ec55e2 100644 --- a/arch/loongarch/lib/crc32-loongarch.c +++ b/arch/loongarch/lib/crc32-loongarch.c @@ -11,6 +11,7 @@ #include <asm/cpu-features.h> #include <linux/crc32.h> +#include <linux/export.h> #include <linux/module.h> #include <linux/unaligned.h> diff --git a/arch/loongarch/lib/csum.c b/arch/loongarch/lib/csum.c index df309ae4045d..bcc9d01d8c41 100644 --- a/arch/loongarch/lib/csum.c +++ b/arch/loongarch/lib/csum.c @@ -2,6 +2,7 @@ // Copyright (C) 2019-2020 Arm Ltd. #include <linux/compiler.h> +#include <linux/export.h> #include <linux/kasan-checks.h> #include <linux/kernel.h> diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c index cea84d7f2b91..02dad4624fe3 100644 --- a/arch/loongarch/mm/hugetlbpage.c +++ b/arch/loongarch/mm/hugetlbpage.c @@ -47,7 +47,8 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, pmd = pmd_offset(pud, addr); } } - return pmd_none(pmdp_get(pmd)) ? NULL : (pte_t *) pmd; + + return (!pmd || pmd_none(pmdp_get(pmd))) ? NULL : (pte_t *) pmd; } uint64_t pmd_to_entrylo(unsigned long pmd_val) diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 06f11d9e4ec1..c3e4586a7975 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -106,14 +106,6 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) page += vmem_altmap_offset(altmap); __remove_pages(start_pfn, nr_pages, altmap); } - -#ifdef CONFIG_NUMA -int memory_add_physaddr_to_nid(u64 start) -{ - return pa_to_nid(start); -} -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); -#endif #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP diff --git a/arch/loongarch/mm/ioremap.c b/arch/loongarch/mm/ioremap.c index 70ca73019811..df949a3d0f34 100644 --- a/arch/loongarch/mm/ioremap.c +++ b/arch/loongarch/mm/ioremap.c @@ -16,12 +16,12 @@ void __init early_iounmap(void __iomem *addr, unsigned long size) } -void *early_memremap_ro(resource_size_t phys_addr, unsigned long size) +void * __init early_memremap_ro(resource_size_t phys_addr, unsigned long size) { return early_memremap(phys_addr, size); } -void *early_memremap_prot(resource_size_t phys_addr, unsigned long size, +void * __init early_memremap_prot(resource_size_t phys_addr, unsigned long size, unsigned long prot_val) { return early_memremap(phys_addr, size); diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c index 1da4dc46df43..50c9016641a4 100644 --- a/arch/loongarch/pci/acpi.c +++ b/arch/loongarch/pci/acpi.c @@ -194,6 +194,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) { struct pci_bus *bus; struct pci_root_info *info; + struct pci_host_bridge *host; struct acpi_pci_root_ops *root_ops; int domain = root->segment; int busnum = root->secondary.start; @@ -237,8 +238,17 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) return NULL; } - pci_bus_size_bridges(bus); - pci_bus_assign_resources(bus); + /* If we must preserve the resource configuration, claim now */ + host = pci_find_host_bridge(bus); + if (host->preserve_config) + pci_bus_claim_resources(bus); + + /* + * Assign whatever was left unassigned. If we didn't claim above, + * this will reassign everything. + */ + pci_assign_unassigned_root_bus_resources(bus); + list_for_each_entry(child, &bus->children, node) pcie_bus_configure_settings(child); } diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c index 2726639150bc..5bc9627a6cf9 100644 --- a/arch/loongarch/pci/pci.c +++ b/arch/loongarch/pci/pci.c @@ -3,7 +3,6 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include <linux/kernel.h> -#include <linux/export.h> #include <linux/init.h> #include <linux/acpi.h> #include <linux/types.h> diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h index f5c596b211d4..d79fef609194 100644 --- a/arch/m68k/include/asm/mcf_pgtable.h +++ b/arch/m68k/include/asm/mcf_pgtable.h @@ -268,7 +268,7 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) (__pte((x).val)) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h index 040ac3bad713..14fee64d3e60 100644 --- a/arch/m68k/include/asm/motorola_pgtable.h +++ b/arch/m68k/include/asm/motorola_pgtable.h @@ -185,7 +185,7 @@ extern pgd_t kernel_pg_dir[128]; #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h index 73745dc0ec0e..858cbe936f5b 100644 --- a/arch/m68k/include/asm/sun3_pgtable.h +++ b/arch/m68k/include/asm/sun3_pgtable.h @@ -169,7 +169,7 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile index 6c732ed3998b..57c1b3e8d60e 100644 --- a/arch/m68k/kernel/Makefile +++ b/arch/m68k/kernel/Makefile @@ -3,7 +3,7 @@ # Makefile for the linux kernel. # -extra-y += vmlinux.lds +always-$(KBUILD_BUILTIN) += vmlinux.lds obj-$(CONFIG_MMU_MOTOROLA) := head.o obj-$(CONFIG_SUN3) := sun3-head.o diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index b1bb2c65dd04..bae1abfa6f6b 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -398,7 +398,7 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 2 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 2 }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/microblaze/kernel/Makefile b/arch/microblaze/kernel/Makefile index 85c4d29ef43e..241e466e7333 100644 --- a/arch/microblaze/kernel/Makefile +++ b/arch/microblaze/kernel/Makefile @@ -11,7 +11,7 @@ CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_process.o = -pg endif -extra-y := vmlinux.lds +always-$(KBUILD_BUILTIN) := vmlinux.lds obj-y += head.o dma.o exceptions.o \ hw_exception_handler.o irq.o \ diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms index bca37ddf974b..41a00fa860c1 100644 --- a/arch/mips/Kbuild.platforms +++ b/arch/mips/Kbuild.platforms @@ -11,6 +11,7 @@ platform-$(CONFIG_CAVIUM_OCTEON_SOC) += cavium-octeon/ platform-$(CONFIG_EYEQ) += mobileye/ platform-$(CONFIG_MIPS_COBALT) += cobalt/ platform-$(CONFIG_MACH_DECSTATION) += dec/ +platform-$(CONFIG_ECONET) += econet/ platform-$(CONFIG_MIPS_GENERIC) += generic/ platform-$(CONFIG_MACH_JAZZ) += jazz/ platform-$(CONFIG_LANTIQ) += lantiq/ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index fc0772c1bad4..1e48184ecf1e 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -391,6 +391,31 @@ config MACH_DECSTATION otherwise choose R3000. +config ECONET + bool "EcoNet MIPS family" + select BOOT_RAW + select CPU_BIG_ENDIAN + select DEBUG_ZBOOT if DEBUG_KERNEL + select EARLY_PRINTK_8250 + select ECONET_EN751221_TIMER + select SERIAL_8250 + select SERIAL_OF_PLATFORM + select SYS_SUPPORTS_BIG_ENDIAN + select SYS_HAS_CPU_MIPS32_R1 + select SYS_HAS_CPU_MIPS32_R2 + select SYS_HAS_EARLY_PRINTK + select SYS_SUPPORTS_32BIT_KERNEL + select SYS_SUPPORTS_MIPS16 + select SYS_SUPPORTS_ZBOOT_UART16550 + select USE_GENERIC_EARLY_PRINTK_8250 + select USE_OF + help + EcoNet EN75xx MIPS devices are big endian MIPS machines used + in XPON (fiber) and DSL applications. They have SPI, PCI, USB, + GPIO, and Ethernet, with optional XPON, DSL, and VoIP DSP cores. + Don't confuse these with the Airoha ARM devices sometimes referred + to as "EcoNet", this family is for MIPS based devices only. + config MACH_JAZZ bool "Jazz family of machines" select ARC_MEMORY @@ -617,6 +642,7 @@ config EYEQ select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN select USE_OF + select HOTPLUG_PARALLEL if SMP help Select this to build a kernel supporting EyeQ SoC from Mobileye. @@ -1020,6 +1046,7 @@ source "arch/mips/ath79/Kconfig" source "arch/mips/bcm47xx/Kconfig" source "arch/mips/bcm63xx/Kconfig" source "arch/mips/bmips/Kconfig" +source "arch/mips/econet/Kconfig" source "arch/mips/generic/Kconfig" source "arch/mips/ingenic/Kconfig" source "arch/mips/jazz/Kconfig" @@ -2287,6 +2314,7 @@ config MIPS_CPS select MIPS_CM select MIPS_CPS_PM if HOTPLUG_CPU select SMP + select HOTPLUG_SMT if HOTPLUG_PARALLEL select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select SYNC_R4K if (CEVT_R4K || CSRC_R4K) select SYS_SUPPORTS_HOTPLUG_CPU diff --git a/arch/mips/alchemy/common/gpiolib.c b/arch/mips/alchemy/common/gpiolib.c index 1b16daaa86ae..411f70ceb762 100644 --- a/arch/mips/alchemy/common/gpiolib.c +++ b/arch/mips/alchemy/common/gpiolib.c @@ -119,9 +119,11 @@ static int alchemy_gpic_get(struct gpio_chip *chip, unsigned int off) return !!au1300_gpio_get_value(off + AU1300_GPIO_BASE); } -static void alchemy_gpic_set(struct gpio_chip *chip, unsigned int off, int v) +static int alchemy_gpic_set(struct gpio_chip *chip, unsigned int off, int v) { au1300_gpio_set_value(off + AU1300_GPIO_BASE, v); + + return 0; } static int alchemy_gpic_dir_input(struct gpio_chip *chip, unsigned int off) @@ -145,7 +147,7 @@ static struct gpio_chip au1300_gpiochip = { .direction_input = alchemy_gpic_dir_input, .direction_output = alchemy_gpic_dir_output, .get = alchemy_gpic_get, - .set = alchemy_gpic_set, + .set_rv = alchemy_gpic_set, .to_irq = alchemy_gpic_gpio_to_irq, .base = AU1300_GPIO_BASE, .ngpio = AU1300_GPIO_NUM, diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c index 9cc8fbf218a5..c5617b889b1c 100644 --- a/arch/mips/bcm63xx/boards/board_bcm963xx.c +++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c @@ -764,7 +764,7 @@ void __init board_prom_init(void) snprintf(cfe_version, 12, "%s", (char *) &cfe[4]); } } else { - strcpy(cfe_version, "unknown"); + strscpy(cfe_version, "unknown"); } pr_info("CFE version: %s\n", cfe_version); diff --git a/arch/mips/bcm63xx/gpio.c b/arch/mips/bcm63xx/gpio.c index 5c4a233db55f..e7a53cd0dec5 100644 --- a/arch/mips/bcm63xx/gpio.c +++ b/arch/mips/bcm63xx/gpio.c @@ -35,8 +35,7 @@ static void bcm63xx_gpio_out_low_reg_init(void) static DEFINE_SPINLOCK(bcm63xx_gpio_lock); static u32 gpio_out_low, gpio_out_high; -static void bcm63xx_gpio_set(struct gpio_chip *chip, - unsigned gpio, int val) +static int bcm63xx_gpio_set(struct gpio_chip *chip, unsigned int gpio, int val) { u32 reg; u32 mask; @@ -62,6 +61,8 @@ static void bcm63xx_gpio_set(struct gpio_chip *chip, *v &= ~mask; bcm_gpio_writel(*v, reg); spin_unlock_irqrestore(&bcm63xx_gpio_lock, flags); + + return 0; } static int bcm63xx_gpio_get(struct gpio_chip *chip, unsigned gpio) @@ -130,7 +131,7 @@ static struct gpio_chip bcm63xx_gpio_chip = { .direction_input = bcm63xx_gpio_direction_input, .direction_output = bcm63xx_gpio_direction_output, .get = bcm63xx_gpio_get, - .set = bcm63xx_gpio_set, + .set_rv = bcm63xx_gpio_set, .base = 0, }; diff --git a/arch/mips/boot/compressed/uart-16550.c b/arch/mips/boot/compressed/uart-16550.c index db618e72a0c4..529e77a6487c 100644 --- a/arch/mips/boot/compressed/uart-16550.c +++ b/arch/mips/boot/compressed/uart-16550.c @@ -20,6 +20,11 @@ #define PORT(offset) (CKSEG1ADDR(INGENIC_UART_BASE_ADDR) + (4 * offset)) #endif +#ifdef CONFIG_ECONET +#define EN75_UART_BASE 0x1fbf0003 +#define PORT(offset) (CKSEG1ADDR(EN75_UART_BASE) + (4 * (offset))) +#endif + #ifndef IOTYPE #define IOTYPE char #endif diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile index ff468439a8c4..7375c6ced82b 100644 --- a/arch/mips/boot/dts/Makefile +++ b/arch/mips/boot/dts/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 subdir-$(CONFIG_BMIPS_GENERIC) += brcm subdir-$(CONFIG_CAVIUM_OCTEON_SOC) += cavium-octeon +subdir-$(CONFIG_ECONET) += econet subdir-$(CONFIG_EYEQ) += mobileye subdir-$(CONFIG_FIT_IMAGE_FDT_MARDUK) += img subdir-$(CONFIG_FIT_IMAGE_FDT_BOSTON) += img diff --git a/arch/mips/boot/dts/econet/Makefile b/arch/mips/boot/dts/econet/Makefile new file mode 100644 index 000000000000..b467d5624e39 --- /dev/null +++ b/arch/mips/boot/dts/econet/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +dtb-$(CONFIG_DTB_ECONET_SMARTFIBER_XP8421_B) += en751221_smartfiber_xp8421-b.dtb diff --git a/arch/mips/boot/dts/econet/en751221.dtsi b/arch/mips/boot/dts/econet/en751221.dtsi new file mode 100644 index 000000000000..66197e73d4f0 --- /dev/null +++ b/arch/mips/boot/dts/econet/en751221.dtsi @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/dts-v1/; + +/ { + compatible = "econet,en751221"; + #address-cells = <1>; + #size-cells = <1>; + + hpt_clock: clock { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <200000000>; /* 200 MHz */ + }; + + cpus: cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "mips,mips24KEc"; + reg = <0>; + }; + }; + + cpuintc: interrupt-controller { + compatible = "mti,cpu-interrupt-controller"; + interrupt-controller; + #address-cells = <0>; + #interrupt-cells = <1>; + }; + + intc: interrupt-controller@1fb40000 { + compatible = "econet,en751221-intc"; + reg = <0x1fb40000 0x100>; + interrupt-parent = <&cpuintc>; + interrupts = <2>; + + interrupt-controller; + #interrupt-cells = <1>; + econet,shadow-interrupts = <7 2>, <8 3>, <13 12>, <30 29>; + }; + + uart: serial@1fbf0000 { + compatible = "ns16550"; + reg = <0x1fbf0000 0x30>; + reg-io-width = <4>; + reg-shift = <2>; + interrupt-parent = <&intc>; + interrupts = <0>; + /* + * Conversion of baud rate to clock frequency requires a + * computation that is not in the ns16550 driver, so this + * uart is fixed at 115200 baud. + */ + clock-frequency = <1843200>; + }; + + timer_hpt: timer@1fbf0400 { + compatible = "econet,en751221-timer"; + reg = <0x1fbf0400 0x100>; + + interrupt-parent = <&intc>; + interrupts = <30>; + clocks = <&hpt_clock>; + }; +}; diff --git a/arch/mips/boot/dts/econet/en751221_smartfiber_xp8421-b.dts b/arch/mips/boot/dts/econet/en751221_smartfiber_xp8421-b.dts new file mode 100644 index 000000000000..8223c5bce67f --- /dev/null +++ b/arch/mips/boot/dts/econet/en751221_smartfiber_xp8421-b.dts @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/dts-v1/; + +#include "en751221.dtsi" + +/ { + model = "SmartFiber XP8421-B"; + compatible = "smartfiber,xp8421-b", "econet,en751221"; + + memory@0 { + device_type = "memory"; + reg = <0x00000000 0x1c000000>; + }; + + chosen { + stdout-path = "/serial@1fbf0000:115200"; + linux,usable-memory-range = <0x00020000 0x1bfe0000>; + }; +}; diff --git a/arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts b/arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts index c7ea4f1c0bb2..6c277ab83d4b 100644 --- a/arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts +++ b/arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts @@ -29,6 +29,7 @@ compatible = "loongson,pch-msi-1.0"; reg = <0 0x2ff00000 0 0x8>; interrupt-controller; + #interrupt-cells = <1>; msi-controller; loongson,msi-base-vec = <64>; loongson,msi-num-vecs = <64>; diff --git a/arch/mips/boot/dts/pic32/pic32mzda.dtsi b/arch/mips/boot/dts/pic32/pic32mzda.dtsi index fdc721b414a8..feca35ba56a4 100644 --- a/arch/mips/boot/dts/pic32/pic32mzda.dtsi +++ b/arch/mips/boot/dts/pic32/pic32mzda.dtsi @@ -225,7 +225,7 @@ gpio-ranges = <&pic32_pinctrl 0 144 16>; }; - sdhci: sdhci@1f8ec000 { + sdhci: mmc@1f8ec000 { compatible = "microchip,pic32mzda-sdhci"; reg = <0x1f8ec000 0x100>; interrupts = <191 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/mips/boot/dts/realtek/rtl930x.dtsi b/arch/mips/boot/dts/realtek/rtl930x.dtsi index f2e57ea3a60c..101bab72a95f 100644 --- a/arch/mips/boot/dts/realtek/rtl930x.dtsi +++ b/arch/mips/boot/dts/realtek/rtl930x.dtsi @@ -69,6 +69,39 @@ #size-cells = <0>; status = "disabled"; }; + + mdio_controller: mdio-controller@ca00 { + compatible = "realtek,rtl9301-mdio"; + reg = <0xca00 0x200>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + + mdio0: mdio-bus@0 { + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; + mdio1: mdio-bus@1 { + reg = <1>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; + mdio2: mdio-bus@2 { + reg = <2>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; + mdio3: mdio-bus@3 { + reg = <3>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; + }; }; soc: soc@18000000 { diff --git a/arch/mips/econet/Kconfig b/arch/mips/econet/Kconfig new file mode 100644 index 000000000000..fd69884cc9a8 --- /dev/null +++ b/arch/mips/econet/Kconfig @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: GPL-2.0 +if ECONET + +choice + prompt "EcoNet SoC selection" + default SOC_ECONET_EN751221 + help + Select EcoNet MIPS SoC type. Individual SoCs within a family are + very similar, so is it enough to select the right family, and + then customize to the specific SoC using the device tree only. + + config SOC_ECONET_EN751221 + bool "EN751221 family" + select COMMON_CLK + select ECONET_EN751221_INTC + select IRQ_MIPS_CPU + select SMP + select SMP_UP + select SYS_SUPPORTS_SMP + help + The EN751221 family includes EN7512, RN7513, EN7521, EN7526. + They are based on single core MIPS 34Kc processors. To boot + this kernel, you will need a device tree such as + MIPS_RAW_APPENDED_DTB=y, and a root filesystem. +endchoice + +choice + prompt "Devicetree selection" + default DTB_ECONET_NONE + help + Select the devicetree. + + config DTB_ECONET_NONE + bool "None" + + config DTB_ECONET_SMARTFIBER_XP8421_B + bool "EN751221 SmartFiber XP8421-B" + depends on SOC_ECONET_EN751221 + select BUILTIN_DTB + help + The SmartFiber XP8421-B is a device based on the EN751221 SoC. + It has 512MB of memory and 256MB of NAND flash. This kernel + needs only an appended initramfs to boot. It can be loaded + through XMODEM and booted from memory in the bootloader, or + it can be packed in tclinux.trx format and written to flash. +endchoice + +endif diff --git a/arch/mips/econet/Makefile b/arch/mips/econet/Makefile new file mode 100644 index 000000000000..7e4529e7d3d7 --- /dev/null +++ b/arch/mips/econet/Makefile @@ -0,0 +1,2 @@ + +obj-y := init.o diff --git a/arch/mips/econet/Platform b/arch/mips/econet/Platform new file mode 100644 index 000000000000..ea5616447bcd --- /dev/null +++ b/arch/mips/econet/Platform @@ -0,0 +1,5 @@ +# To address a 7.2MB kernel size limit in the EcoNet SDK bootloader, +# we put the load address well above where the bootloader loads and then use +# zboot. So please set CONFIG_ZBOOT_LOAD_ADDRESS to the address where your +# bootloader actually places the kernel. +load-$(CONFIG_ECONET) += 0xffffffff81000000 diff --git a/arch/mips/econet/init.c b/arch/mips/econet/init.c new file mode 100644 index 000000000000..6f43ffb209cb --- /dev/null +++ b/arch/mips/econet/init.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * EcoNet setup code + * + * Copyright (C) 2025 Caleb James DeLisle <cjd@cjdns.fr> + */ + +#include <linux/init.h> +#include <linux/of_clk.h> +#include <linux/irqchip.h> + +#include <asm/addrspace.h> +#include <asm/io.h> +#include <asm/bootinfo.h> +#include <asm/time.h> +#include <asm/prom.h> +#include <asm/smp-ops.h> +#include <asm/reboot.h> + +#define CR_AHB_RSTCR ((void __iomem *)CKSEG1ADDR(0x1fb00040)) +#define RESET BIT(31) + +#define UART_BASE CKSEG1ADDR(0x1fbf0003) +#define UART_REG_SHIFT 2 + +static void hw_reset(char *command) +{ + iowrite32(RESET, CR_AHB_RSTCR); +} + +/* 1. Bring up early printk. */ +void __init prom_init(void) +{ + setup_8250_early_printk_port(UART_BASE, UART_REG_SHIFT, 0); + _machine_restart = hw_reset; +} + +/* 2. Parse the DT and find memory */ +void __init plat_mem_setup(void) +{ + void *dtb; + + set_io_port_base(KSEG1); + + dtb = get_fdt(); + if (!dtb) + panic("no dtb found"); + + __dt_setup_arch(dtb); + + early_init_dt_scan_memory(); +} + +/* 3. Overload __weak device_tree_init(), add SMP_UP ops */ +void __init device_tree_init(void) +{ + unflatten_and_copy_device_tree(); + + register_up_smp_ops(); +} + +const char *get_system_type(void) +{ + return "EcoNet-EN75xx"; +} + +/* 4. Initialize the IRQ subsystem */ +void __init arch_init_irq(void) +{ + irqchip_init(); +} + +/* 5. Timers */ +void __init plat_time_init(void) +{ + of_clk_init(NULL); + timer_probe(); +} diff --git a/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_pci.h b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_pci.h index a0d4b752899e..5dbc9b13d15b 100644 --- a/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_pci.h +++ b/arch/mips/include/asm/mach-loongson2ef/cs5536/cs5536_pci.h @@ -12,12 +12,32 @@ #ifndef _CS5536_PCI_H #define _CS5536_PCI_H +#include <linux/init.h> #include <linux/types.h> #include <linux/pci_regs.h> extern void cs5536_pci_conf_write4(int function, int reg, u32 value); extern u32 cs5536_pci_conf_read4(int function, int reg); +extern void pci_ehci_write_reg(int reg, u32 value); +extern u32 pci_ehci_read_reg(int reg); + +extern void pci_ide_write_reg(int reg, u32 value); +extern u32 pci_ide_read_reg(int reg); + +extern void pci_acc_write_reg(int reg, u32 value); +extern u32 pci_acc_read_reg(int reg); + +extern void pci_ohci_write_reg(int reg, u32 value); +extern u32 pci_ohci_read_reg(int reg); + +extern void pci_isa_write_bar(int n, u32 value); +extern u32 pci_isa_read_bar(int n); +extern void pci_isa_write_reg(int reg, u32 value); +extern u32 pci_isa_read_reg(int reg); + +extern int __init init_mfgpt_clocksource(void); + #define CS5536_ACC_INTR 9 #define CS5536_IDE_INTR 14 #define CS5536_USB_INTR 11 diff --git a/arch/mips/include/asm/mach-loongson2ef/loongson.h b/arch/mips/include/asm/mach-loongson2ef/loongson.h index ca039b8dcde3..4a098fb10232 100644 --- a/arch/mips/include/asm/mach-loongson2ef/loongson.h +++ b/arch/mips/include/asm/mach-loongson2ef/loongson.h @@ -18,6 +18,9 @@ extern void bonito_irq_init(void); extern void mach_prepare_reboot(void); extern void mach_prepare_shutdown(void); +/* machine-specific PROM functions */ +extern void __init mach_prom_init_machtype(void); + /* environment arguments from bootloader */ extern u32 cpu_clock_freq; extern u32 memsize, highmemsize; @@ -45,6 +48,12 @@ extern void __init mach_init_irq(void); extern void mach_irq_dispatch(unsigned int pending); extern int mach_i8259_irq(void); +/* power management functions */ +extern void setup_wakeup_events(void); +extern int wakeup_loongson(void); +extern void __weak mach_suspend(void); +extern void __weak mach_resume(void); + /* We need this in some places... */ #define delay() ({ \ int x; \ diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 4852b005a72d..ae73ecf4c41a 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -534,7 +534,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #endif #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte.pte_low & _PAGE_SWP_EXCLUSIVE; } @@ -551,7 +551,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) return pte; } #else -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/mips/include/asm/topology.h b/arch/mips/include/asm/topology.h index 0673d2d0f2e6..5158c802eb65 100644 --- a/arch/mips/include/asm/topology.h +++ b/arch/mips/include/asm/topology.h @@ -16,6 +16,9 @@ #define topology_core_id(cpu) (cpu_core(&cpu_data[cpu])) #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) #define topology_sibling_cpumask(cpu) (&cpu_sibling_map[cpu]) + +extern struct cpumask __cpu_primary_thread_mask; +#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask) #endif #endif /* __ASM_TOPOLOGY_H */ diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index ecf3278a32f7..95a1e674fd67 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -3,7 +3,7 @@ # Makefile for the Linux/MIPS kernel. # -extra-y := vmlinux.lds +always-$(KBUILD_BUILTIN) := vmlinux.lds obj-y += head.o branch.o cmpxchg.o elf.o entry.o genex.o idle.o irq.o \ process.o prom.o ptrace.o reset.o setup.o signal.o \ diff --git a/arch/mips/kernel/gpio_txx9.c b/arch/mips/kernel/gpio_txx9.c index 8c083612df9d..027fb57d0d79 100644 --- a/arch/mips/kernel/gpio_txx9.c +++ b/arch/mips/kernel/gpio_txx9.c @@ -32,14 +32,16 @@ static void txx9_gpio_set_raw(unsigned int offset, int value) __raw_writel(val, &txx9_pioptr->dout); } -static void txx9_gpio_set(struct gpio_chip *chip, unsigned int offset, - int value) +static int txx9_gpio_set(struct gpio_chip *chip, unsigned int offset, + int value) { unsigned long flags; spin_lock_irqsave(&txx9_gpio_lock, flags); txx9_gpio_set_raw(offset, value); mmiowb(); spin_unlock_irqrestore(&txx9_gpio_lock, flags); + + return 0; } static int txx9_gpio_dir_in(struct gpio_chip *chip, unsigned int offset) @@ -68,7 +70,7 @@ static int txx9_gpio_dir_out(struct gpio_chip *chip, unsigned int offset, static struct gpio_chip txx9_gpio_chip = { .get = txx9_gpio_get, - .set = txx9_gpio_set, + .set_rv = txx9_gpio_set, .direction_input = txx9_gpio_dir_in, .direction_output = txx9_gpio_dir_out, .label = "TXx9", diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index f7107479c7fa..b890d64d352c 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -922,11 +922,13 @@ static const struct pt_regs_offset regoffset_table[] = { */ int regs_query_register_offset(const char *name) { - const struct pt_regs_offset *roff; - for (roff = regoffset_table; roff->name != NULL; roff++) - if (!strcmp(roff->name, name)) - return roff->offset; - return -EINVAL; + const struct pt_regs_offset *roff; + + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + + return -EINVAL; } #if defined(CONFIG_32BIT) || defined(CONFIG_MIPS32_O32) @@ -937,7 +939,7 @@ static const struct user_regset mips_regsets[] = { .n = ELF_NGREG, .size = sizeof(unsigned int), .align = sizeof(unsigned int), - .regset_get = gpr32_get, + .regset_get = gpr32_get, .set = gpr32_set, }, [REGSET_DSP] = { @@ -945,7 +947,7 @@ static const struct user_regset mips_regsets[] = { .n = NUM_DSP_REGS + 1, .size = sizeof(u32), .align = sizeof(u32), - .regset_get = dsp32_get, + .regset_get = dsp32_get, .set = dsp32_set, .active = dsp_active, }, @@ -955,7 +957,7 @@ static const struct user_regset mips_regsets[] = { .n = ELF_NFPREG, .size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t), - .regset_get = fpr_get, + .regset_get = fpr_get, .set = fpr_set, }, [REGSET_FP_MODE] = { @@ -963,7 +965,7 @@ static const struct user_regset mips_regsets[] = { .n = 1, .size = sizeof(int), .align = sizeof(int), - .regset_get = fp_mode_get, + .regset_get = fp_mode_get, .set = fp_mode_set, }, #endif @@ -973,7 +975,7 @@ static const struct user_regset mips_regsets[] = { .n = NUM_FPU_REGS + 1, .size = 16, .align = 16, - .regset_get = msa_get, + .regset_get = msa_get, .set = msa_set, }, #endif @@ -997,7 +999,7 @@ static const struct user_regset mips64_regsets[] = { .n = ELF_NGREG, .size = sizeof(unsigned long), .align = sizeof(unsigned long), - .regset_get = gpr64_get, + .regset_get = gpr64_get, .set = gpr64_set, }, [REGSET_DSP] = { @@ -1005,7 +1007,7 @@ static const struct user_regset mips64_regsets[] = { .n = NUM_DSP_REGS + 1, .size = sizeof(u64), .align = sizeof(u64), - .regset_get = dsp64_get, + .regset_get = dsp64_get, .set = dsp64_set, .active = dsp_active, }, @@ -1015,7 +1017,7 @@ static const struct user_regset mips64_regsets[] = { .n = 1, .size = sizeof(int), .align = sizeof(int), - .regset_get = fp_mode_get, + .regset_get = fp_mode_get, .set = fp_mode_set, }, [REGSET_FPR] = { @@ -1023,7 +1025,7 @@ static const struct user_regset mips64_regsets[] = { .n = ELF_NFPREG, .size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t), - .regset_get = fpr_get, + .regset_get = fpr_get, .set = fpr_set, }, #endif @@ -1033,7 +1035,7 @@ static const struct user_regset mips64_regsets[] = { .n = NUM_FPU_REGS + 1, .size = 16, .align = 16, - .regset_get = msa_get, + .regset_get = msa_get, .set = msa_set, }, #endif @@ -1351,7 +1353,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs) */ asmlinkage void syscall_trace_leave(struct pt_regs *regs) { - /* + /* * We may come here right after calling schedule_user() * or do_notify_resume(), in which case we can be in RCU * user mode. diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index cc26d56f3ab6..7b0e69af4097 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -236,6 +236,7 @@ static void __init cps_smp_setup(void) /* Use the number of VPEs in cluster 0 core 0 for smp_num_siblings */ if (!cl && !c) smp_num_siblings = core_vpes; + cpumask_set_cpu(nvpes, &__cpu_primary_thread_mask); for (v = 0; v < min_t(int, core_vpes, NR_CPUS - nvpes); v++) { cpu_set_cluster(&cpu_data[nvpes + v], cl); @@ -368,6 +369,7 @@ static void __init cps_prepare_cpus(unsigned int max_cpus) cl = cpu_cluster(¤t_cpu_data); c = cpu_core(¤t_cpu_data); cluster_bootcfg = &mips_cps_cluster_bootcfg[cl]; + cpu_smt_set_num_threads(core_vpes, core_vpes); core_bootcfg = &cluster_bootcfg->core_config[c]; bitmap_set(cluster_bootcfg->core_power, cpu_core(¤t_cpu_data), 1); atomic_set(&core_bootcfg->vpe_mask, 1 << cpu_vpe_id(¤t_cpu_data)); diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 39e193cad2b9..4868e79f3b30 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -56,8 +56,10 @@ EXPORT_SYMBOL(cpu_sibling_map); cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); +#ifndef CONFIG_HOTPLUG_PARALLEL static DECLARE_COMPLETION(cpu_starting); static DECLARE_COMPLETION(cpu_running); +#endif /* * A logical cpu mask containing only one VPE per core to @@ -74,6 +76,8 @@ static cpumask_t cpu_core_setup_map; cpumask_t cpu_coherent_mask; +struct cpumask __cpu_primary_thread_mask __read_mostly; + unsigned int smp_max_threads __initdata = UINT_MAX; static int __init early_nosmt(char *s) @@ -367,6 +371,9 @@ asmlinkage void start_secondary(void) * to an option instead of something based on .cputype */ +#ifdef CONFIG_HOTPLUG_PARALLEL + cpuhp_ap_sync_alive(); +#endif calibrate_delay(); cpu_data[cpu].udelay_val = loops_per_jiffy; @@ -376,8 +383,10 @@ asmlinkage void start_secondary(void) cpumask_set_cpu(cpu, &cpu_coherent_mask); notify_cpu_starting(cpu); +#ifndef CONFIG_HOTPLUG_PARALLEL /* Notify boot CPU that we're starting & ready to sync counters */ complete(&cpu_starting); +#endif synchronise_count_slave(cpu); @@ -386,11 +395,13 @@ asmlinkage void start_secondary(void) calculate_cpu_foreign_map(); +#ifndef CONFIG_HOTPLUG_PARALLEL /* * Notify boot CPU that we're up & online and it can safely return * from __cpu_up */ complete(&cpu_running); +#endif /* * irq will be enabled in ->smp_finish(), enabling it too early @@ -447,6 +458,12 @@ void __init smp_prepare_boot_cpu(void) set_cpu_online(0, true); } +#ifdef CONFIG_HOTPLUG_PARALLEL +int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle) +{ + return mp_ops->boot_secondary(cpu, tidle); +} +#else int __cpu_up(unsigned int cpu, struct task_struct *tidle) { int err; @@ -466,6 +483,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) wait_for_completion(&cpu_running); return 0; } +#endif #ifdef CONFIG_PROFILING /* Not really SMP stuff ... */ diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 737d0d4fdcd3..2b67c44adab9 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -22,6 +22,7 @@ #include <linux/vmalloc.h> #include <linux/elf.h> #include <linux/seq_file.h> +#include <linux/string.h> #include <linux/syscalls.h> #include <linux/moduleloader.h> #include <linux/interrupt.h> @@ -582,7 +583,7 @@ static int vpe_elfload(struct vpe *v) struct module mod; /* so we can re-use the relocations code */ memset(&mod, 0, sizeof(struct module)); - strcpy(mod.name, "VPE loader"); + strscpy(mod.name, "VPE loader"); hdr = (Elf_Ehdr *) v->pbuffer; len = v->plen; diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c index ea6ebfea4a67..0e47cd59b6cb 100644 --- a/arch/mips/rb532/gpio.c +++ b/arch/mips/rb532/gpio.c @@ -105,13 +105,15 @@ static int rb532_gpio_get(struct gpio_chip *chip, unsigned offset) /* * Set output GPIO level */ -static void rb532_gpio_set(struct gpio_chip *chip, - unsigned offset, int value) +static int rb532_gpio_set(struct gpio_chip *chip, unsigned int offset, + int value) { struct rb532_gpio_chip *gpch; gpch = gpiochip_get_data(chip); rb532_set_bit(value, offset, gpch->regbase + GPIOD); + + return 0; } /* @@ -162,7 +164,7 @@ static struct rb532_gpio_chip rb532_gpio_chip[] = { .direction_input = rb532_gpio_direction_input, .direction_output = rb532_gpio_direction_output, .get = rb532_gpio_get, - .set = rb532_gpio_set, + .set_rv = rb532_gpio_set, .to_irq = rb532_gpio_to_irq, .base = 0, .ngpio = 32, diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c index 1e67fecd466e..0586ca7668b4 100644 --- a/arch/mips/txx9/generic/setup.c +++ b/arch/mips/txx9/generic/setup.c @@ -603,8 +603,8 @@ static int txx9_iocled_get(struct gpio_chip *chip, unsigned int offset) return !!(data->cur_val & (1 << offset)); } -static void txx9_iocled_set(struct gpio_chip *chip, unsigned int offset, - int value) +static int txx9_iocled_set(struct gpio_chip *chip, unsigned int offset, + int value) { struct txx9_iocled_data *data = gpiochip_get_data(chip); unsigned long flags; @@ -616,6 +616,8 @@ static void txx9_iocled_set(struct gpio_chip *chip, unsigned int offset, writeb(data->cur_val, data->mmioaddr); mmiowb(); spin_unlock_irqrestore(&txx9_iocled_lock, flags); + + return 0; } static int txx9_iocled_dir_in(struct gpio_chip *chip, unsigned int offset) @@ -653,7 +655,7 @@ void __init txx9_iocled_init(unsigned long baseaddr, if (!iocled->mmioaddr) goto out_free; iocled->chip.get = txx9_iocled_get; - iocled->chip.set = txx9_iocled_set; + iocled->chip.set_rv = txx9_iocled_set; iocled->chip.direction_input = txx9_iocled_dir_in; iocled->chip.direction_output = txx9_iocled_dir_out; iocled->chip.label = "iocled"; diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index fb4c493aaffa..69d4593f64fe 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -27,6 +27,7 @@ endif # offsets. cflags-vdso := $(ccflags-vdso) \ $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ + $(filter -std=%,$(KBUILD_CFLAGS)) \ -O3 -g -fPIC -fno-strict-aliasing -fno-common -fno-builtin -G 0 \ -mrelax-pic-calls $(call cc-option, -mexplicit-relocs) \ -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \ diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index e98578e27e26..844dce55569f 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -259,7 +259,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define __swp_entry_to_pte(swp) ((pte_t) { (swp).val }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/nios2/kernel/Makefile b/arch/nios2/kernel/Makefile index 78a913181fa1..4dce965a7b73 100644 --- a/arch/nios2/kernel/Makefile +++ b/arch/nios2/kernel/Makefile @@ -3,7 +3,7 @@ # Makefile for the nios2 linux kernel. # -extra-y += vmlinux.lds +always-$(KBUILD_BUILTIN) += vmlinux.lds obj-y += head.o obj-y += cpuinfo.o diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 71bfb8c8c482..5bd6463bd514 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -411,7 +411,7 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/openrisc/kernel/Makefile b/arch/openrisc/kernel/Makefile index e4c7d9bdd598..58e6a1b525b7 100644 --- a/arch/openrisc/kernel/Makefile +++ b/arch/openrisc/kernel/Makefile @@ -3,7 +3,7 @@ # Makefile for the linux kernel. # -extra-y := vmlinux.lds +always-$(KBUILD_BUILTIN) := vmlinux.lds obj-y := head.o setup.o or32_ksyms.o process.o dma.o \ traps.o time.o irq.o entry.o ptrace.o signal.o \ diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 80f5e2a28413..1a86a4370b29 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -425,7 +425,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index 5ab0467be70a..d5055ba33722 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile @@ -3,7 +3,7 @@ # Makefile for arch/parisc/kernel # -extra-y := vmlinux.lds +always-$(KBUILD_BUILTIN) := vmlinux.lds obj-y := head.o cache.o pacache.o setup.o pdt.o traps.o time.o irq.o \ syscall.o entry.o sys_parisc.o firmware.o \ diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts index c4e4d2a9b460..b7eac4e56019 100644 --- a/arch/powerpc/boot/dts/microwatt.dts +++ b/arch/powerpc/boot/dts/microwatt.dts @@ -4,7 +4,7 @@ / { #size-cells = <0x02>; #address-cells = <0x02>; - model-name = "microwatt"; + model = "microwatt"; compatible = "microwatt-soc"; aliases { diff --git a/arch/powerpc/boot/dts/mpc8315erdb.dts b/arch/powerpc/boot/dts/mpc8315erdb.dts index e09b37d7489d..a89cb3139ca8 100644 --- a/arch/powerpc/boot/dts/mpc8315erdb.dts +++ b/arch/powerpc/boot/dts/mpc8315erdb.dts @@ -6,6 +6,7 @@ */ /dts-v1/; +#include <dt-bindings/interrupt-controller/irq.h> / { compatible = "fsl,mpc8315erdb"; @@ -358,6 +359,15 @@ interrupt-parent = <&ipic>; fsl,mpc8313-wakeup-timer = <>m1>; }; + + gpio: gpio-controller@c00 { + compatible = "fsl,mpc8314-gpio"; + reg = <0xc00 0x100>; + interrupts = <74 IRQ_TYPE_LEVEL_LOW>; + interrupt-parent = <&ipic>; + gpio-controller; + #gpio-cells = <2>; + }; }; pci0: pci@e0008500 { diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 42c3af90d1f0..92d21c6faf1e 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -365,7 +365,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 6ed93e290c2f..a2ddcbb3fcb9 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -693,7 +693,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte) return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_EXCLUSIVE)); } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SWP_EXCLUSIVE)); } diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 8d1f0b7062eb..7d6b9e5b286e 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -286,7 +286,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 02897f4b0dbf..b891910fce8a 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -183,7 +183,7 @@ /* * Used to name C functions called from asm */ -#ifdef CONFIG_PPC_KERNEL_PCREL +#if defined(__powerpc64__) && defined(CONFIG_PPC_KERNEL_PCREL) #define CFUNC(name) name@notoc #else #define CFUNC(name) name diff --git a/arch/powerpc/include/uapi/asm/ioctls.h b/arch/powerpc/include/uapi/asm/ioctls.h index 2c145da3b774..b5211e413829 100644 --- a/arch/powerpc/include/uapi/asm/ioctls.h +++ b/arch/powerpc/include/uapi/asm/ioctls.h @@ -23,10 +23,10 @@ #define TCSETSW _IOW('t', 21, struct termios) #define TCSETSF _IOW('t', 22, struct termios) -#define TCGETA _IOR('t', 23, struct termio) -#define TCSETA _IOW('t', 24, struct termio) -#define TCSETAW _IOW('t', 25, struct termio) -#define TCSETAF _IOW('t', 28, struct termio) +#define TCGETA 0x40147417 /* _IOR('t', 23, struct termio) */ +#define TCSETA 0x80147418 /* _IOW('t', 24, struct termio) */ +#define TCSETAW 0x80147419 /* _IOW('t', 25, struct termio) */ +#define TCSETAF 0x8014741c /* _IOW('t', 28, struct termio) */ #define TCSBRK _IO('t', 29) #define TCXONC _IO('t', 30) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 9d1ab3971694..fb2b95267022 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -126,7 +126,7 @@ obj-$(CONFIG_PPC_BOOK3S_32) += head_book3s_32.o obj-$(CONFIG_44x) += head_44x.o obj-$(CONFIG_PPC_8xx) += head_8xx.o obj-$(CONFIG_PPC_85xx) += head_85xx.o -extra-y += vmlinux.lds +always-$(KBUILD_BUILTIN) += vmlinux.lds obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 83fe99861eb1..ca7f7bb2b478 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1509,6 +1509,8 @@ int eeh_pe_configure(struct eeh_pe *pe) /* Invalid PE ? */ if (!pe) return -ENODEV; + else + ret = eeh_ops->configure_bridge(pe); return ret; } diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index e8824f933326..8834dfe9d727 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -53,7 +53,7 @@ ldflags-$(CONFIG_LD_ORPHAN_WARN) += -Wl,--orphan-handling=$(CONFIG_LD_ORPHAN_WAR ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -Wa$(comma)%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) CC32FLAGS := -m32 -CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc +CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc -mpcrel ifdef CONFIG_CC_IS_CLANG # This flag is supported by clang for 64-bit but not 32-bit so it will cause # an unused command line flag warning for this file. diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 791d1942a058..3401b96be475 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -628,7 +628,7 @@ static void arm_next_watchdog(struct kvm_vcpu *vcpu) static void kvmppc_watchdog_func(struct timer_list *t) { - struct kvm_vcpu *vcpu = from_timer(vcpu, t, arch.wdt_timer); + struct kvm_vcpu *vcpu = timer_container_of(vcpu, t, arch.wdt_timer); u32 tsr, new_tsr; int final; diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 0b6365d85d11..dc6f75d3ac6e 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -521,6 +521,15 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) return -EINVAL; } + /* + * Map complete page to the paste address. So the user + * space should pass 0ULL to the offset parameter. + */ + if (vma->vm_pgoff) { + pr_debug("Page offset unsupported to map paste address\n"); + return -EINVAL; + } + /* Ensure instance has an open send window */ if (!txwin) { pr_err("No send window open?\n"); diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index a0ae58636e10..02474e27df9b 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -359,7 +359,8 @@ static irqreturn_t kw_i2c_irq(int irq, void *dev_id) static void kw_i2c_timeout(struct timer_list *t) { - struct pmac_i2c_host_kw *host = from_timer(host, t, timeout_timer); + struct pmac_i2c_host_kw *host = timer_container_of(host, t, + timeout_timer); unsigned long flags; spin_lock_irqsave(&host->lock, flags); diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 4ac9808e55a4..2ea30b343354 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -48,11 +48,15 @@ static ssize_t memtrace_read(struct file *filp, char __user *ubuf, static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma) { struct memtrace_entry *ent = filp->private_data; + unsigned long ent_nrpages = ent->size >> PAGE_SHIFT; + unsigned long vma_nrpages = vma_pages(vma); - if (ent->size < vma->vm_end - vma->vm_start) + /* The requested page offset should be within object's page count */ + if (vma->vm_pgoff >= ent_nrpages) return -EINVAL; - if (vma->vm_pgoff << PAGE_SHIFT >= ent->size) + /* The requested mapping range should remain within the bounds */ + if (vma_nrpages > ent_nrpages - vma->vm_pgoff) return -EINVAL; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index bbec87b79309..1c5544401530 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -63,13 +63,15 @@ config RISCV select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_CFI_CLANG + # clang >= 17: https://github.com/llvm/llvm-project/commit/62fa708ceb027713b386c7e0efda994f8bdc27e2 + select ARCH_SUPPORTS_CFI_CLANG if CLANG_VERSION >= 170000 select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE select ARCH_SUPPORTS_HUGETLBFS if MMU # LLD >= 14: https://github.com/llvm/llvm-project/issues/50505 select ARCH_SUPPORTS_LTO_CLANG if LLD_VERSION >= 140000 select ARCH_SUPPORTS_LTO_CLANG_THIN if LLD_VERSION >= 140000 + select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS if 64BIT && MMU select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU select ARCH_SUPPORTS_PER_VMA_LOCK if MMU select ARCH_SUPPORTS_RT @@ -96,9 +98,11 @@ config RISCV select CLONE_BACKWARDS select COMMON_CLK select CPU_PM if CPU_IDLE || HIBERNATION || SUSPEND + select DYNAMIC_FTRACE if FUNCTION_TRACER select EDAC_SUPPORT select FRAME_POINTER if PERF_EVENTS || (FUNCTION_TRACER && !DYNAMIC_FTRACE) select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if DYNAMIC_FTRACE + select FUNCTION_ALIGNMENT_8B if DYNAMIC_FTRACE_WITH_CALL_OPS select GENERIC_ARCH_TOPOLOGY select GENERIC_ATOMIC64 if !64BIT select GENERIC_CLOCKEVENTS_BROADCAST if SMP @@ -143,6 +147,7 @@ config RISCV select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU + select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if 64BIT && MMU select HAVE_ARCH_USERFAULTFD_MINOR if 64BIT && USERFAULTFD select HAVE_ARCH_VMAP_STACK if MMU && 64BIT select HAVE_ASM_MODVERSIONS @@ -150,13 +155,15 @@ config RISCV select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS if MMU select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE) - select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + select FUNCTION_ALIGNMENT_4B if HAVE_DYNAMIC_FTRACE && RISCV_ISA_C + select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS if HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS + select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS if (DYNAMIC_FTRACE_WITH_ARGS && !CFI_CLANG) select HAVE_DYNAMIC_FTRACE_WITH_ARGS if HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_GRAPH_FUNC select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER if HAVE_DYNAMIC_FTRACE_WITH_ARGS select HAVE_FUNCTION_GRAPH_FREGS - select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION + select HAVE_FUNCTION_TRACER if !XIP_KERNEL && HAVE_DYNAMIC_FTRACE select HAVE_EBPF_JIT if MMU select HAVE_GUP_FAST if MMU select HAVE_FUNCTION_ARG_ACCESS_API @@ -218,6 +225,7 @@ config RISCV select THREAD_INFO_IN_TASK select TRACE_IRQFLAGS_SUPPORT select UACCESS_MEMCPY if !MMU + select VDSO_GETRANDOM if HAVE_GENERIC_VDSO select USER_STACKTRACE_SUPPORT select ZONE_DMA32 if 64BIT @@ -236,6 +244,7 @@ config CLANG_SUPPORTS_DYNAMIC_FTRACE config GCC_SUPPORTS_DYNAMIC_FTRACE def_bool CC_IS_GCC depends on $(cc-option,-fpatchable-function-entry=8) + depends on CC_HAS_MIN_FUNCTION_ALIGNMENT || !RISCV_ISA_C config HAVE_SHADOW_CALL_STACK def_bool $(cc-option,-fsanitize=shadow-call-stack) @@ -664,12 +673,12 @@ config RISCV_ISA_V_PREEMPTIVE default y help Usually, in-kernel SIMD routines are run with preemption disabled. - Functions which envoke long running SIMD thus must yield core's + Functions which invoke long running SIMD thus must yield the core's vector unit to prevent blocking other tasks for too long. - This config allows kernel to run SIMD without explicitly disable - preemption. Enabling this config will result in higher memory - consumption due to the allocation of per-task's kernel Vector context. + This config allows the kernel to run SIMD without explicitly disabling + preemption. Enabling this config will result in higher memory consumption + due to the allocation of per-task's kernel Vector context. config RISCV_ISA_ZAWRS bool "Zawrs extension support for more efficient busy waiting" @@ -842,6 +851,21 @@ config RISCV_ISA_ZICBOZ If you don't know what to do here, say Y. +config RISCV_ISA_ZICBOP + bool "Zicbop extension support for cache block prefetch" + depends on MMU + depends on RISCV_ALTERNATIVE + default y + help + Adds support to dynamically detect the presence of the ZICBOP + extension (Cache Block Prefetch Operations) and enable its + usage. + + The Zicbop extension can be used to prefetch cache blocks for + read/write fetch. + + If you don't know what to do here, say Y. + config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI def_bool y # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc @@ -1171,8 +1195,8 @@ config CMDLINE_FALLBACK config CMDLINE_EXTEND bool "Extend bootloader kernel arguments" help - The command-line arguments provided during boot will be - appended to the built-in command line. This is useful in + The built-in command line will be appended to the command- + line arguments provided during boot. This is useful in cases where the provided arguments are insufficient and you don't want to or cannot modify them. diff --git a/arch/riscv/Kconfig.vendor b/arch/riscv/Kconfig.vendor index b096548fe0ff..e14f26368963 100644 --- a/arch/riscv/Kconfig.vendor +++ b/arch/riscv/Kconfig.vendor @@ -16,6 +16,19 @@ config RISCV_ISA_VENDOR_EXT_ANDES If you don't know what to do here, say Y. endmenu +menu "SiFive" +config RISCV_ISA_VENDOR_EXT_SIFIVE + bool "SiFive vendor extension support" + select RISCV_ISA_VENDOR_EXT + default y + help + Say N here if you want to disable all SiFive vendor extension + support. This will cause any SiFive vendor extensions that are + requested by hardware probing to be ignored. + + If you don't know what to do here, say Y. +endmenu + menu "T-Head" config RISCV_ISA_VENDOR_EXT_THEAD bool "T-Head vendor extension support" diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 539d2aef5cab..df57654a615e 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -15,9 +15,9 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y) LDFLAGS_vmlinux += --no-relax KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY ifeq ($(CONFIG_RISCV_ISA_C),y) - CC_FLAGS_FTRACE := -fpatchable-function-entry=4 + CC_FLAGS_FTRACE := -fpatchable-function-entry=8,4 else - CC_FLAGS_FTRACE := -fpatchable-function-entry=2 + CC_FLAGS_FTRACE := -fpatchable-function-entry=4,2 endif endif diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index eea825ee58e1..fe8bd8afb418 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -18,12 +18,9 @@ CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y CONFIG_CGROUP_BPF=y -CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_BLK_DEV_INITRD=y -CONFIG_EXPERT=y -# CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y CONFIG_ARCH_MICROCHIP=y CONFIG_ARCH_SIFIVE=y @@ -182,6 +179,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_REGULATOR_AXP20X=y CONFIG_REGULATOR_GPIO=y CONFIG_MEDIA_SUPPORT=m +CONFIG_MEDIA_PLATFORM_SUPPORT=y CONFIG_VIDEO_CADENCE_CSI2RX=m CONFIG_DRM=m CONFIG_DRM_RADEON=m @@ -297,25 +295,7 @@ CONFIG_DEFAULT_SECURITY_DAC=y CONFIG_CRYPTO_USER_API_HASH=y CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_PAGEALLOC=y -CONFIG_SCHED_STACK_END_CHECK=y -CONFIG_DEBUG_VM=y -CONFIG_DEBUG_VM_PGFLAGS=y -CONFIG_DEBUG_MEMORY_INIT=y -CONFIG_DEBUG_PER_CPU_MAPS=y -CONFIG_SOFTLOCKUP_DETECTOR=y -CONFIG_WQ_WATCHDOG=y -CONFIG_DEBUG_RT_MUTEXES=y -CONFIG_DEBUG_SPINLOCK=y -CONFIG_DEBUG_MUTEXES=y -CONFIG_DEBUG_RWSEMS=y -CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_LIST=y -CONFIG_DEBUG_PLIST=y -CONFIG_DEBUG_SG=y -# CONFIG_RCU_TRACE is not set -CONFIG_RCU_EQS_DEBUG=y -# CONFIG_FTRACE is not set # CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_MEMTEST=y diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h index bfc8ea5f9319..a9988bf21ec8 100644 --- a/arch/riscv/include/asm/asm-prototypes.h +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -12,7 +12,7 @@ long long __ashlti3(long long a, int b); #ifdef CONFIG_RISCV_ISA_V #ifdef CONFIG_MMU -asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n); +asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n, bool enable_sum); #endif /* CONFIG_MMU */ void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1, diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index e1d9bf1deca6..b8c5726d86ac 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -14,11 +14,6 @@ #include <asm/cmpxchg.h> #include <asm/fence.h> -#define nop() __asm__ __volatile__ ("nop") -#define __nops(n) ".rept " #n "\nnop\n.endr\n" -#define nops(n) __asm__ __volatile__ (__nops(n)) - - /* These barriers need to enforce ordering on both devices or memory. */ #define __mb() RISCV_FENCE(iorw, iorw) #define __rmb() RISCV_FENCE(ir, ir) diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index b59ffeb668d6..6086b38d5427 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -85,6 +85,7 @@ static inline void flush_icache_range(unsigned long start, unsigned long end) extern unsigned int riscv_cbom_block_size; extern unsigned int riscv_cboz_block_size; +extern unsigned int riscv_cbop_block_size; void riscv_init_cbo_blocksizes(void); #ifdef CONFIG_RISCV_DMA_NONCOHERENT diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 2ec119eb147b..0b749e710216 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -13,6 +13,7 @@ #include <asm/hwcap.h> #include <asm/insn-def.h> #include <asm/cpufeature-macros.h> +#include <asm/processor.h> #define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \ swap_append, r, p, n) \ @@ -37,6 +38,7 @@ \ __asm__ __volatile__ ( \ prepend \ + PREFETCHW_ASM(%5) \ "0: lr.w %0, %2\n" \ " and %1, %0, %z4\n" \ " or %1, %1, %z3\n" \ @@ -44,7 +46,7 @@ " bnez %1, 0b\n" \ sc_append \ : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ - : "rJ" (__newx), "rJ" (~__mask) \ + : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \ : "memory"); \ \ r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index f56b409361fb..fbd0e4306c93 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -67,11 +67,11 @@ void __init riscv_user_isa_enable(void); _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate) bool __init check_unaligned_access_emulated_all_cpus(void); +void unaligned_access_init(void); +int cpu_online_unaligned_access_init(unsigned int cpu); #if defined(CONFIG_RISCV_SCALAR_MISALIGNED) -void check_unaligned_access_emulated(struct work_struct *work __always_unused); void unaligned_emulation_finish(void); bool unaligned_ctl_available(void); -DECLARE_PER_CPU(long, misaligned_access_speed); #else static inline bool unaligned_ctl_available(void) { @@ -79,6 +79,16 @@ static inline bool unaligned_ctl_available(void) } #endif +#if defined(CONFIG_RISCV_MISALIGNED) +DECLARE_PER_CPU(long, misaligned_access_speed); +bool misaligned_traps_can_delegate(void); +#else +static inline bool misaligned_traps_can_delegate(void) +{ + return false; +} +#endif + bool __init check_vector_unaligned_access_emulated_all_cpus(void); #if defined(CONFIG_RISCV_VECTOR_MISALIGNED) void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused); diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index d627f63ee289..22ebea3c2b26 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -20,10 +20,9 @@ extern void *return_address(unsigned int level); #define ftrace_return_address(n) return_address(n) void _mcount(void); -static inline unsigned long ftrace_call_adjust(unsigned long addr) -{ - return addr; -} +unsigned long ftrace_call_adjust(unsigned long addr); +unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip); +#define ftrace_get_symaddr(fentry_ip) arch_ftrace_get_symaddr(fentry_ip) /* * Let's do like x86/arm64 and ignore the compat syscalls. @@ -57,12 +56,21 @@ struct dyn_arch_ftrace { * 2) jalr: setting low-12 offset to ra, jump to ra, and set ra to * return address (original pc + 4) * + * The first 2 instructions for each tracable function is compiled to 2 nop + * instructions. Then, the kernel initializes the first instruction to auipc at + * boot time (<ftrace disable>). The second instruction is patched to jalr to + * start the trace. + * + *<Image>: + * 0: nop + * 4: nop + * *<ftrace enable>: - * 0: auipc t0/ra, 0x? - * 4: jalr t0/ra, ?(t0/ra) + * 0: auipc t0, 0x? + * 4: jalr t0, ?(t0) * *<ftrace disable>: - * 0: nop + * 0: auipc t0, 0x? * 4: nop * * Dynamic ftrace generates probes to call sites, so we must deal with @@ -75,10 +83,9 @@ struct dyn_arch_ftrace { #define AUIPC_OFFSET_MASK (0xfffff000) #define AUIPC_PAD (0x00001000) #define JALR_SHIFT 20 -#define JALR_RA (0x000080e7) -#define AUIPC_RA (0x00000097) #define JALR_T0 (0x000282e7) #define AUIPC_T0 (0x00000297) +#define JALR_RANGE (JALR_SIGN_MASK - 1) #define to_jalr_t0(offset) \ (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_T0) @@ -96,26 +103,14 @@ do { \ call[1] = to_jalr_t0(offset); \ } while (0) -#define to_jalr_ra(offset) \ - (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_RA) - -#define to_auipc_ra(offset) \ - ((offset & JALR_SIGN_MASK) ? \ - (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_RA) : \ - ((offset & AUIPC_OFFSET_MASK) | AUIPC_RA)) - -#define make_call_ra(caller, callee, call) \ -do { \ - unsigned int offset = \ - (unsigned long) (callee) - (unsigned long) (caller); \ - call[0] = to_auipc_ra(offset); \ - call[1] = to_jalr_ra(offset); \ -} while (0) - /* - * Let auipc+jalr be the basic *mcount unit*, so we make it 8 bytes here. + * Only the jalr insn in the auipc+jalr is patched, so we make it 4 + * bytes here. */ -#define MCOUNT_INSN_SIZE 8 +#define MCOUNT_INSN_SIZE 4 +#define MCOUNT_AUIPC_SIZE 4 +#define MCOUNT_JALR_SIZE 4 +#define MCOUNT_NOP4_SIZE 4 #ifndef __ASSEMBLY__ struct dyn_ftrace; @@ -135,6 +130,9 @@ struct __arch_ftrace_regs { unsigned long sp; unsigned long s0; unsigned long t1; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + unsigned long direct_tramp; +#endif union { unsigned long args[8]; struct { @@ -146,6 +144,13 @@ struct __arch_ftrace_regs { unsigned long a5; unsigned long a6; unsigned long a7; +#ifdef CONFIG_CC_IS_CLANG + unsigned long t2; + unsigned long t3; + unsigned long t4; + unsigned long t5; + unsigned long t6; +#endif }; }; }; @@ -221,10 +226,13 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); #define ftrace_graph_func ftrace_graph_func +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr) { arch_ftrace_regs(fregs)->t1 = addr; } +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + #endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index e3cbf203cdde..affd63e11b0a 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -105,6 +105,7 @@ #define RISCV_ISA_EXT_ZVFBFWMA 96 #define RISCV_ISA_EXT_ZAAMO 97 #define RISCV_ISA_EXT_ZALRSC 98 +#define RISCV_ISA_EXT_ZICBOP 99 #define RISCV_ISA_EXT_XLINUXENVCFG 127 diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 1f690fea0e03..7fe0a379474a 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -8,7 +8,7 @@ #include <uapi/asm/hwprobe.h> -#define RISCV_HWPROBE_MAX_KEY 12 +#define RISCV_HWPROBE_MAX_KEY 13 static inline bool riscv_hwprobe_key_is_valid(__s64 key) { @@ -22,6 +22,7 @@ static inline bool hwprobe_key_is_bitmask(__s64 key) case RISCV_HWPROBE_KEY_IMA_EXT_0: case RISCV_HWPROBE_KEY_CPUPERF_0: case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0: + case RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0: return true; } diff --git a/arch/riscv/include/asm/image.h b/arch/riscv/include/asm/image.h index e0b319af3681..8927a6ea1127 100644 --- a/arch/riscv/include/asm/image.h +++ b/arch/riscv/include/asm/image.h @@ -30,6 +30,8 @@ RISCV_HEADER_VERSION_MINOR) #ifndef __ASSEMBLY__ +#define riscv_image_flag_field(flags, field)\ + (((flags) >> field##_SHIFT) & field##_MASK) /** * struct riscv_image_header - riscv kernel image header * @code0: Executable code diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h index 71060a2f838e..d5adbaec1d01 100644 --- a/arch/riscv/include/asm/insn-def.h +++ b/arch/riscv/include/asm/insn-def.h @@ -18,6 +18,13 @@ #define INSN_I_RD_SHIFT 7 #define INSN_I_OPCODE_SHIFT 0 +#define INSN_S_SIMM7_SHIFT 25 +#define INSN_S_RS2_SHIFT 20 +#define INSN_S_RS1_SHIFT 15 +#define INSN_S_FUNC3_SHIFT 12 +#define INSN_S_SIMM5_SHIFT 7 +#define INSN_S_OPCODE_SHIFT 0 + #ifdef __ASSEMBLY__ #ifdef CONFIG_AS_HAS_INSN @@ -30,6 +37,10 @@ .insn i \opcode, \func3, \rd, \rs1, \simm12 .endm + .macro insn_s, opcode, func3, rs2, simm12, rs1 + .insn s \opcode, \func3, \rs2, \simm12(\rs1) + .endm + #else #include <asm/gpr-num.h> @@ -51,10 +62,20 @@ (\simm12 << INSN_I_SIMM12_SHIFT)) .endm + .macro insn_s, opcode, func3, rs2, simm12, rs1 + .4byte ((\opcode << INSN_S_OPCODE_SHIFT) | \ + (\func3 << INSN_S_FUNC3_SHIFT) | \ + (.L__gpr_num_\rs2 << INSN_S_RS2_SHIFT) | \ + (.L__gpr_num_\rs1 << INSN_S_RS1_SHIFT) | \ + ((\simm12 & 0x1f) << INSN_S_SIMM5_SHIFT) | \ + (((\simm12 >> 5) & 0x7f) << INSN_S_SIMM7_SHIFT)) + .endm + #endif #define __INSN_R(...) insn_r __VA_ARGS__ #define __INSN_I(...) insn_i __VA_ARGS__ +#define __INSN_S(...) insn_s __VA_ARGS__ #else /* ! __ASSEMBLY__ */ @@ -66,6 +87,9 @@ #define __INSN_I(opcode, func3, rd, rs1, simm12) \ ".insn i " opcode ", " func3 ", " rd ", " rs1 ", " simm12 "\n" +#define __INSN_S(opcode, func3, rs2, simm12, rs1) \ + ".insn s " opcode ", " func3 ", " rs2 ", " simm12 "(" rs1 ")\n" + #else #include <linux/stringify.h> @@ -92,12 +116,26 @@ " (\\simm12 << " __stringify(INSN_I_SIMM12_SHIFT) "))\n" \ " .endm\n" +#define DEFINE_INSN_S \ + __DEFINE_ASM_GPR_NUMS \ +" .macro insn_s, opcode, func3, rs2, simm12, rs1\n" \ +" .4byte ((\\opcode << " __stringify(INSN_S_OPCODE_SHIFT) ") |" \ +" (\\func3 << " __stringify(INSN_S_FUNC3_SHIFT) ") |" \ +" (.L__gpr_num_\\rs2 << " __stringify(INSN_S_RS2_SHIFT) ") |" \ +" (.L__gpr_num_\\rs1 << " __stringify(INSN_S_RS1_SHIFT) ") |" \ +" ((\\simm12 & 0x1f) << " __stringify(INSN_S_SIMM5_SHIFT) ") |" \ +" (((\\simm12 >> 5) & 0x7f) << " __stringify(INSN_S_SIMM7_SHIFT) "))\n" \ +" .endm\n" + #define UNDEFINE_INSN_R \ " .purgem insn_r\n" #define UNDEFINE_INSN_I \ " .purgem insn_i\n" +#define UNDEFINE_INSN_S \ +" .purgem insn_s\n" + #define __INSN_R(opcode, func3, func7, rd, rs1, rs2) \ DEFINE_INSN_R \ "insn_r " opcode ", " func3 ", " func7 ", " rd ", " rs1 ", " rs2 "\n" \ @@ -108,6 +146,11 @@ "insn_i " opcode ", " func3 ", " rd ", " rs1 ", " simm12 "\n" \ UNDEFINE_INSN_I +#define __INSN_S(opcode, func3, rs2, simm12, rs1) \ + DEFINE_INSN_S \ + "insn_s " opcode ", " func3 ", " rs2 ", " simm12 ", " rs1 "\n" \ + UNDEFINE_INSN_S + #endif #endif /* ! __ASSEMBLY__ */ @@ -120,6 +163,10 @@ __INSN_I(RV_##opcode, RV_##func3, RV_##rd, \ RV_##rs1, RV_##simm12) +#define INSN_S(opcode, func3, rs2, simm12, rs1) \ + __INSN_S(RV_##opcode, RV_##func3, RV_##rs2, \ + RV_##simm12, RV_##rs1) + #define RV_OPCODE(v) __ASM_STR(v) #define RV_FUNC3(v) __ASM_STR(v) #define RV_FUNC7(v) __ASM_STR(v) @@ -133,6 +180,7 @@ #define RV___RS2(v) __RV_REG(v) #define RV_OPCODE_MISC_MEM RV_OPCODE(15) +#define RV_OPCODE_OP_IMM RV_OPCODE(19) #define RV_OPCODE_SYSTEM RV_OPCODE(115) #define HFENCE_VVMA(vaddr, asid) \ @@ -196,6 +244,18 @@ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ RS1(base), SIMM12(4)) +#define PREFETCH_I(base, offset) \ + INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(0), \ + SIMM12((offset) & 0xfe0), RS1(base)) + +#define PREFETCH_R(base, offset) \ + INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(1), \ + SIMM12((offset) & 0xfe0), RS1(base)) + +#define PREFETCH_W(base, offset) \ + INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(3), \ + SIMM12((offset) & 0xfe0), RS1(base)) + #define RISCV_PAUSE ".4byte 0x100000f" #define ZAWRS_WRS_NTO ".4byte 0x00d00073" #define ZAWRS_WRS_STO ".4byte 0x01d00073" @@ -203,4 +263,10 @@ #define RISCV_INSN_NOP4 _AC(0x00000013, U) +#ifndef __ASSEMBLY__ +#define nop() __asm__ __volatile__ ("nop") +#define __nops(n) ".rept " #n "\nnop\n.endr\n" +#define nops(n) __asm__ __volatile__ (__nops(n)) +#endif + #endif /* __ASM_INSN_DEF_H */ diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h index 2b56769cb530..b9ee8346cc8c 100644 --- a/arch/riscv/include/asm/kexec.h +++ b/arch/riscv/include/asm/kexec.h @@ -56,6 +56,7 @@ extern riscv_kexec_method riscv_kexec_norelocate; #ifdef CONFIG_KEXEC_FILE extern const struct kexec_file_ops elf_kexec_ops; +extern const struct kexec_file_ops image_kexec_ops; struct purgatory_info; int arch_kexec_apply_relocations_add(struct purgatory_info *pi, @@ -67,6 +68,11 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, struct kimage; int arch_kimage_file_post_load_cleanup(struct kimage *image); #define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup + +int load_extra_segments(struct kimage *image, unsigned long kernel_start, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len); #endif #endif diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h index 3b643b9efc07..5acce285e56e 100644 --- a/arch/riscv/include/asm/kvm_aia.h +++ b/arch/riscv/include/asm/kvm_aia.h @@ -87,6 +87,9 @@ DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available); extern struct kvm_device_ops kvm_riscv_aia_device_ops; +bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_aia_imsic_load(struct kvm_vcpu *vcpu, int cpu); +void kvm_riscv_vcpu_aia_imsic_put(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu); int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu); @@ -161,7 +164,6 @@ void kvm_riscv_aia_destroy_vm(struct kvm *kvm); int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner, void __iomem **hgei_va, phys_addr_t *hgei_pa); void kvm_riscv_aia_free_hgei(int cpu, int hgei); -void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable); void kvm_riscv_aia_enable(void); void kvm_riscv_aia_disable(void); diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 85cfebc32e4c..bcbf8b1ec115 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -306,6 +306,9 @@ static inline bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu) return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu; } +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} + #define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12 void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid, diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 188fadc1c21f..7de05db7d3bd 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -184,7 +184,7 @@ static inline int pud_none(pud_t pud) static inline int pud_bad(pud_t pud) { - return !pud_present(pud); + return !pud_present(pud) || (pud_val(pud) & _PAGE_LEAF); } #define pud_leaf pud_leaf @@ -399,6 +399,7 @@ p4d_t *p4d_offset(pgd_t *pgd, unsigned long address); #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline int pte_devmap(pte_t pte); static inline pte_t pmd_pte(pmd_t pmd); +static inline pte_t pud_pte(pud_t pud); static inline int pmd_devmap(pmd_t pmd) { @@ -407,7 +408,7 @@ static inline int pmd_devmap(pmd_t pmd) static inline int pud_devmap(pud_t pud) { - return 0; + return pte_devmap(pud_pte(pud)); } static inline int pgd_devmap(pgd_t pgd) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index f19240fd018e..5bd5aae60d53 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -900,6 +900,103 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, #define pmdp_collapse_flush pmdp_collapse_flush extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); + +static inline pud_t pud_wrprotect(pud_t pud) +{ + return pte_pud(pte_wrprotect(pud_pte(pud))); +} + +static inline int pud_trans_huge(pud_t pud) +{ + return pud_leaf(pud); +} + +static inline int pud_dirty(pud_t pud) +{ + return pte_dirty(pud_pte(pud)); +} + +static inline pud_t pud_mkyoung(pud_t pud) +{ + return pte_pud(pte_mkyoung(pud_pte(pud))); +} + +static inline pud_t pud_mkold(pud_t pud) +{ + return pte_pud(pte_mkold(pud_pte(pud))); +} + +static inline pud_t pud_mkdirty(pud_t pud) +{ + return pte_pud(pte_mkdirty(pud_pte(pud))); +} + +static inline pud_t pud_mkclean(pud_t pud) +{ + return pte_pud(pte_mkclean(pud_pte(pud))); +} + +static inline pud_t pud_mkwrite(pud_t pud) +{ + return pte_pud(pte_mkwrite_novma(pud_pte(pud))); +} + +static inline pud_t pud_mkhuge(pud_t pud) +{ + return pud; +} + +static inline pud_t pud_mkdevmap(pud_t pud) +{ + return pte_pud(pte_mkdevmap(pud_pte(pud))); +} + +static inline int pudp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pud_t *pudp, + pud_t entry, int dirty) +{ + return ptep_set_access_flags(vma, address, (pte_t *)pudp, pud_pte(entry), dirty); +} + +static inline int pudp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, pud_t *pudp) +{ + return ptep_test_and_clear_young(vma, address, (pte_t *)pudp); +} + +static inline int pud_young(pud_t pud) +{ + return pte_young(pud_pte(pud)); +} + +static inline void update_mmu_cache_pud(struct vm_area_struct *vma, + unsigned long address, pud_t *pudp) +{ + pte_t *ptep = (pte_t *)pudp; + + update_mmu_cache(vma, address, ptep); +} + +static inline pud_t pudp_establish(struct vm_area_struct *vma, + unsigned long address, pud_t *pudp, pud_t pud) +{ + page_table_check_pud_set(vma->vm_mm, pudp, pud); + return __pud(atomic_long_xchg((atomic_long_t *)pudp, pud_val(pud))); +} + +static inline pud_t pud_mkinvalid(pud_t pud) +{ + return __pud(pud_val(pud) & ~(_PAGE_PRESENT | _PAGE_PROT_NONE)); +} + +extern pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address, + pud_t *pudp); + +static inline pud_t pud_modify(pud_t pud, pgprot_t newprot) +{ + return pte_pud(pte_modify(pud_pte(pud), newprot)); +} + #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* @@ -931,7 +1028,7 @@ extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } @@ -978,7 +1075,6 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) */ #ifdef CONFIG_64BIT #define TASK_SIZE_64 (PGDIR_SIZE * PTRS_PER_PGD / 2) -#define TASK_SIZE_MAX LONG_MAX #ifdef CONFIG_COMPAT #define TASK_SIZE_32 (_AC(0x80000000, UL) - PAGE_SIZE) diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 5f56eb9d114a..24d3af4d3807 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -13,6 +13,9 @@ #include <vdso/processor.h> #include <asm/ptrace.h> +#include <asm/insn-def.h> +#include <asm/alternative-macros.h> +#include <asm/hwcap.h> #define arch_get_mmap_end(addr, len, flags) \ ({ \ @@ -52,7 +55,6 @@ #endif #ifndef __ASSEMBLY__ -#include <linux/cpumask.h> struct task_struct; struct pt_regs; @@ -79,6 +81,10 @@ struct pt_regs; * Thus, the task does not own preempt_v. Any use of Vector will have to * save preempt_v, if dirty, and fallback to non-preemptible kernel-mode * Vector. + * - bit 29: The thread voluntarily calls schedule() while holding an active + * preempt_v. All preempt_v context should be dropped in such case because + * V-regs are caller-saved. Only sstatus.VS=ON is persisted across a + * schedule() call. * - bit 30: The in-kernel preempt_v context is saved, and requries to be * restored when returning to the context that owns the preempt_v. * - bit 31: The in-kernel preempt_v context is dirty, as signaled by the @@ -93,6 +99,7 @@ struct pt_regs; #define RISCV_PREEMPT_V 0x00000100 #define RISCV_PREEMPT_V_DIRTY 0x80000000 #define RISCV_PREEMPT_V_NEED_RESTORE 0x40000000 +#define RISCV_PREEMPT_V_IN_SCHEDULE 0x20000000 /* CPU-specific state of a task */ struct thread_struct { @@ -103,6 +110,7 @@ struct thread_struct { struct __riscv_d_ext_state fstate; unsigned long bad_cause; unsigned long envcfg; + unsigned long sum; u32 riscv_v_flags; u32 vstate_ctrl; struct __riscv_v_ext_state vstate; @@ -136,6 +144,27 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, #define KSTK_EIP(tsk) (task_pt_regs(tsk)->epc) #define KSTK_ESP(tsk) (task_pt_regs(tsk)->sp) +#define PREFETCH_ASM(x) \ + ALTERNATIVE(__nops(1), PREFETCH_R(x, 0), 0, \ + RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP) + +#define PREFETCHW_ASM(x) \ + ALTERNATIVE(__nops(1), PREFETCH_W(x, 0), 0, \ + RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP) + +#ifdef CONFIG_RISCV_ISA_ZICBOP +#define ARCH_HAS_PREFETCH +static inline void prefetch(const void *x) +{ + __asm__ __volatile__(PREFETCH_ASM(%0) : : "r" (x) : "memory"); +} + +#define ARCH_HAS_PREFETCHW +static inline void prefetchw(const void *x) +{ + __asm__ __volatile__(PREFETCHW_ASM(%0) : : "r" (x) : "memory"); +} +#endif /* CONFIG_RISCV_ISA_ZICBOP */ /* Do necessary setup to start up a newly executed thread. */ extern void start_thread(struct pt_regs *regs, diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h index 2910231977cb..a7dc0e330757 100644 --- a/arch/riscv/include/asm/ptrace.h +++ b/arch/riscv/include/asm/ptrace.h @@ -175,7 +175,7 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, return 0; } -static inline int regs_irqs_disabled(struct pt_regs *regs) +static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) { return !(regs->status & SR_PIE); } diff --git a/arch/riscv/include/asm/runtime-const.h b/arch/riscv/include/asm/runtime-const.h index 451fd76b8811..d766e2b9e6df 100644 --- a/arch/riscv/include/asm/runtime-const.h +++ b/arch/riscv/include/asm/runtime-const.h @@ -206,7 +206,7 @@ static inline void __runtime_fixup_32(__le16 *lui_parcel, __le16 *addi_parcel, u addi_insn_mask &= 0x07fff; } - if (lower_immediate & 0x00000fff) { + if (lower_immediate & 0x00000fff || lui_insn == RISCV_INSN_NOP4) { /* replace upper 12 bits of addi with lower 12 bits of val */ addi_insn &= addi_insn_mask; addi_insn |= (lower_immediate & 0x00000fff) << 20; diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 3d250824178b..341e74238aa0 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -35,6 +35,7 @@ enum sbi_ext_id { SBI_EXT_DBCN = 0x4442434E, SBI_EXT_STA = 0x535441, SBI_EXT_NACL = 0x4E41434C, + SBI_EXT_FWFT = 0x46574654, /* Experimentals extensions must lie within this range */ SBI_EXT_EXPERIMENTAL_START = 0x08000000, @@ -402,6 +403,33 @@ enum sbi_ext_nacl_feature { #define SBI_NACL_SHMEM_SRET_X(__i) ((__riscv_xlen / 8) * (__i)) #define SBI_NACL_SHMEM_SRET_X_LAST 31 +/* SBI function IDs for FW feature extension */ +#define SBI_EXT_FWFT_SET 0x0 +#define SBI_EXT_FWFT_GET 0x1 + +enum sbi_fwft_feature_t { + SBI_FWFT_MISALIGNED_EXC_DELEG = 0x0, + SBI_FWFT_LANDING_PAD = 0x1, + SBI_FWFT_SHADOW_STACK = 0x2, + SBI_FWFT_DOUBLE_TRAP = 0x3, + SBI_FWFT_PTE_AD_HW_UPDATING = 0x4, + SBI_FWFT_POINTER_MASKING_PMLEN = 0x5, + SBI_FWFT_LOCAL_RESERVED_START = 0x6, + SBI_FWFT_LOCAL_RESERVED_END = 0x3fffffff, + SBI_FWFT_LOCAL_PLATFORM_START = 0x40000000, + SBI_FWFT_LOCAL_PLATFORM_END = 0x7fffffff, + + SBI_FWFT_GLOBAL_RESERVED_START = 0x80000000, + SBI_FWFT_GLOBAL_RESERVED_END = 0xbfffffff, + SBI_FWFT_GLOBAL_PLATFORM_START = 0xc0000000, + SBI_FWFT_GLOBAL_PLATFORM_END = 0xffffffff, +}; + +#define SBI_FWFT_PLATFORM_FEATURE_BIT BIT(30) +#define SBI_FWFT_GLOBAL_FEATURE_BIT BIT(31) + +#define SBI_FWFT_SET_FLAG_LOCK BIT(0) + /* SBI spec version fields */ #define SBI_SPEC_VERSION_DEFAULT 0x1 #define SBI_SPEC_VERSION_MAJOR_SHIFT 24 @@ -419,6 +447,11 @@ enum sbi_ext_nacl_feature { #define SBI_ERR_ALREADY_STARTED -7 #define SBI_ERR_ALREADY_STOPPED -8 #define SBI_ERR_NO_SHMEM -9 +#define SBI_ERR_INVALID_STATE -10 +#define SBI_ERR_BAD_RANGE -11 +#define SBI_ERR_TIMEOUT -12 +#define SBI_ERR_IO -13 +#define SBI_ERR_DENIED_LOCKED -14 extern unsigned long sbi_spec_version; struct sbiret { @@ -470,6 +503,23 @@ int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask, unsigned long asid); long sbi_probe_extension(int ext); +int sbi_fwft_set(u32 feature, unsigned long value, unsigned long flags); +int sbi_fwft_set_cpumask(const cpumask_t *mask, u32 feature, + unsigned long value, unsigned long flags); +/** + * sbi_fwft_set_online_cpus() - Set a feature on all online cpus + * @feature: The feature to be set + * @value: The feature value to be set + * @flags: FWFT feature set flags + * + * Return: 0 on success, appropriate linux error code otherwise. + */ +static inline int sbi_fwft_set_online_cpus(u32 feature, unsigned long value, + unsigned long flags) +{ + return sbi_fwft_set_cpumask(cpu_online_mask, feature, value, flags); +} + /* Check if current SBI specification version is 0.1 or not */ static inline int sbi_spec_is_0_1(void) { @@ -503,11 +553,21 @@ static inline int sbi_err_map_linux_errno(int err) case SBI_SUCCESS: return 0; case SBI_ERR_DENIED: + case SBI_ERR_DENIED_LOCKED: return -EPERM; case SBI_ERR_INVALID_PARAM: + case SBI_ERR_INVALID_STATE: return -EINVAL; + case SBI_ERR_BAD_RANGE: + return -ERANGE; case SBI_ERR_INVALID_ADDRESS: return -EFAULT; + case SBI_ERR_NO_SHMEM: + return -ENOMEM; + case SBI_ERR_TIMEOUT: + return -ETIMEDOUT; + case SBI_ERR_IO: + return -EIO; case SBI_ERR_NOT_SUPPORTED: case SBI_ERR_FAILURE: default: diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index ce0dd0fed764..1a20dd746a49 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -56,6 +56,8 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +void flush_pud_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); #endif bool arch_tlbbatch_should_defer(struct mm_struct *mm); diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index fee56b0c8058..b88a6218b7f2 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -62,6 +62,19 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, unsigne __asm__ __volatile__ ("csrc sstatus, %0" : : "r" (SR_SUM) : "memory") /* + * This is the smallest unsigned integer type that can fit a value + * (up to 'long long') + */ +#define __inttype(x) __typeof__( \ + __typefits(x, char, \ + __typefits(x, short, \ + __typefits(x, int, \ + __typefits(x, long, 0ULL))))) + +#define __typefits(x, type, not) \ + __builtin_choose_expr(sizeof(x) <= sizeof(type), (unsigned type)0, not) + +/* * The exception table consists of pairs of addresses: the first is the * address of an instruction that is allowed to fault, and the second is * the address at which the program should continue. No registers are @@ -83,27 +96,59 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, unsigne * call. */ -#define __get_user_asm(insn, x, ptr, err) \ +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT +#define __get_user_asm(insn, x, ptr, label) \ + asm_goto_output( \ + "1:\n" \ + " " insn " %0, %1\n" \ + _ASM_EXTABLE_UACCESS_ERR(1b, %l2, %0) \ + : "=&r" (x) \ + : "m" (*(ptr)) : : label) +#else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ +#define __get_user_asm(insn, x, ptr, label) \ do { \ - __typeof__(x) __x; \ + long __gua_err = 0; \ __asm__ __volatile__ ( \ "1:\n" \ " " insn " %1, %2\n" \ "2:\n" \ _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %0, %1) \ - : "+r" (err), "=&r" (__x) \ + : "+r" (__gua_err), "=&r" (x) \ : "m" (*(ptr))); \ - (x) = __x; \ + if (__gua_err) \ + goto label; \ } while (0) +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ #ifdef CONFIG_64BIT -#define __get_user_8(x, ptr, err) \ - __get_user_asm("ld", x, ptr, err) +#define __get_user_8(x, ptr, label) \ + __get_user_asm("ld", x, ptr, label) #else /* !CONFIG_64BIT */ -#define __get_user_8(x, ptr, err) \ + +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT +#define __get_user_8(x, ptr, label) \ do { \ u32 __user *__ptr = (u32 __user *)(ptr); \ u32 __lo, __hi; \ + asm_goto_output( \ + "1:\n" \ + " lw %0, %2\n" \ + "2:\n" \ + " lw %1, %3\n" \ + _ASM_EXTABLE_UACCESS_ERR(1b, %l4, %0) \ + _ASM_EXTABLE_UACCESS_ERR(2b, %l4, %0) \ + : "=&r" (__lo), "=r" (__hi) \ + : "m" (__ptr[__LSW]), "m" (__ptr[__MSW]) \ + : : label); \ + (x) = (__typeof__(x))((__typeof__((x) - (x)))( \ + (((u64)__hi << 32) | __lo))); \ +} while (0) +#else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ +#define __get_user_8(x, ptr, label) \ +do { \ + u32 __user *__ptr = (u32 __user *)(ptr); \ + u32 __lo, __hi; \ + long __gu8_err = 0; \ __asm__ __volatile__ ( \ "1:\n" \ " lw %1, %3\n" \ @@ -112,35 +157,62 @@ do { \ "3:\n" \ _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 3b, %0, %1) \ _ASM_EXTABLE_UACCESS_ERR_ZERO(2b, 3b, %0, %1) \ - : "+r" (err), "=&r" (__lo), "=r" (__hi) \ + : "+r" (__gu8_err), "=&r" (__lo), "=r" (__hi) \ : "m" (__ptr[__LSW]), "m" (__ptr[__MSW])); \ - if (err) \ + if (__gu8_err) { \ __hi = 0; \ - (x) = (__typeof__(x))((__typeof__((x)-(x)))( \ + goto label; \ + } \ + (x) = (__typeof__(x))((__typeof__((x) - (x)))( \ (((u64)__hi << 32) | __lo))); \ } while (0) +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + #endif /* CONFIG_64BIT */ -#define __get_user_nocheck(x, __gu_ptr, __gu_err) \ +unsigned long __must_check __asm_copy_to_user_sum_enabled(void __user *to, + const void *from, unsigned long n); +unsigned long __must_check __asm_copy_from_user_sum_enabled(void *to, + const void __user *from, unsigned long n); + +#define __get_user_nocheck(x, __gu_ptr, label) \ do { \ + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \ + !IS_ALIGNED((uintptr_t)__gu_ptr, sizeof(*__gu_ptr))) { \ + if (__asm_copy_from_user_sum_enabled(&(x), __gu_ptr, sizeof(*__gu_ptr))) \ + goto label; \ + break; \ + } \ switch (sizeof(*__gu_ptr)) { \ case 1: \ - __get_user_asm("lb", (x), __gu_ptr, __gu_err); \ + __get_user_asm("lb", (x), __gu_ptr, label); \ break; \ case 2: \ - __get_user_asm("lh", (x), __gu_ptr, __gu_err); \ + __get_user_asm("lh", (x), __gu_ptr, label); \ break; \ case 4: \ - __get_user_asm("lw", (x), __gu_ptr, __gu_err); \ + __get_user_asm("lw", (x), __gu_ptr, label); \ break; \ case 8: \ - __get_user_8((x), __gu_ptr, __gu_err); \ + __get_user_8((x), __gu_ptr, label); \ break; \ default: \ BUILD_BUG(); \ } \ } while (0) +#define __get_user_error(x, ptr, err) \ +do { \ + __label__ __gu_failed; \ + \ + __get_user_nocheck(x, ptr, __gu_failed); \ + err = 0; \ + break; \ +__gu_failed: \ + x = 0; \ + err = -EFAULT; \ +} while (0) + /** * __get_user: - Get a simple variable from user space, with less checking. * @x: Variable to store result. @@ -165,13 +237,16 @@ do { \ ({ \ const __typeof__(*(ptr)) __user *__gu_ptr = untagged_addr(ptr); \ long __gu_err = 0; \ + __typeof__(x) __gu_val; \ \ __chk_user_ptr(__gu_ptr); \ \ __enable_user_access(); \ - __get_user_nocheck(x, __gu_ptr, __gu_err); \ + __get_user_error(__gu_val, __gu_ptr, __gu_err); \ __disable_user_access(); \ \ + (x) = __gu_val; \ + \ __gu_err; \ }) @@ -201,61 +276,73 @@ do { \ ((x) = (__force __typeof__(x))0, -EFAULT); \ }) -#define __put_user_asm(insn, x, ptr, err) \ +#define __put_user_asm(insn, x, ptr, label) \ do { \ __typeof__(*(ptr)) __x = x; \ - __asm__ __volatile__ ( \ + asm goto( \ "1:\n" \ - " " insn " %z2, %1\n" \ - "2:\n" \ - _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %0) \ - : "+r" (err), "=m" (*(ptr)) \ - : "rJ" (__x)); \ + " " insn " %z0, %1\n" \ + _ASM_EXTABLE(1b, %l2) \ + : : "rJ" (__x), "m"(*(ptr)) : : label); \ } while (0) #ifdef CONFIG_64BIT -#define __put_user_8(x, ptr, err) \ - __put_user_asm("sd", x, ptr, err) +#define __put_user_8(x, ptr, label) \ + __put_user_asm("sd", x, ptr, label) #else /* !CONFIG_64BIT */ -#define __put_user_8(x, ptr, err) \ +#define __put_user_8(x, ptr, label) \ do { \ u32 __user *__ptr = (u32 __user *)(ptr); \ u64 __x = (__typeof__((x)-(x)))(x); \ - __asm__ __volatile__ ( \ + asm goto( \ "1:\n" \ - " sw %z3, %1\n" \ + " sw %z0, %2\n" \ "2:\n" \ - " sw %z4, %2\n" \ - "3:\n" \ - _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %0) \ - _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %0) \ - : "+r" (err), \ - "=m" (__ptr[__LSW]), \ - "=m" (__ptr[__MSW]) \ - : "rJ" (__x), "rJ" (__x >> 32)); \ + " sw %z1, %3\n" \ + _ASM_EXTABLE(1b, %l4) \ + _ASM_EXTABLE(2b, %l4) \ + : : "rJ" (__x), "rJ" (__x >> 32), \ + "m" (__ptr[__LSW]), \ + "m" (__ptr[__MSW]) : : label); \ } while (0) #endif /* CONFIG_64BIT */ -#define __put_user_nocheck(x, __gu_ptr, __pu_err) \ +#define __put_user_nocheck(x, __gu_ptr, label) \ do { \ + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \ + !IS_ALIGNED((uintptr_t)__gu_ptr, sizeof(*__gu_ptr))) { \ + __inttype(x) ___val = (__inttype(x))x; \ + if (__asm_copy_to_user_sum_enabled(__gu_ptr, &(___val), sizeof(*__gu_ptr))) \ + goto label; \ + break; \ + } \ switch (sizeof(*__gu_ptr)) { \ case 1: \ - __put_user_asm("sb", (x), __gu_ptr, __pu_err); \ + __put_user_asm("sb", (x), __gu_ptr, label); \ break; \ case 2: \ - __put_user_asm("sh", (x), __gu_ptr, __pu_err); \ + __put_user_asm("sh", (x), __gu_ptr, label); \ break; \ case 4: \ - __put_user_asm("sw", (x), __gu_ptr, __pu_err); \ + __put_user_asm("sw", (x), __gu_ptr, label); \ break; \ case 8: \ - __put_user_8((x), __gu_ptr, __pu_err); \ + __put_user_8((x), __gu_ptr, label); \ break; \ default: \ BUILD_BUG(); \ } \ } while (0) +#define __put_user_error(x, ptr, err) \ +do { \ + __label__ err_label; \ + __put_user_nocheck(x, ptr, err_label); \ + break; \ +err_label: \ + (err) = -EFAULT; \ +} while (0) + /** * __put_user: - Write a simple value into user space, with less checking. * @x: Value to copy to user space. @@ -286,7 +373,7 @@ do { \ __chk_user_ptr(__gu_ptr); \ \ __enable_user_access(); \ - __put_user_nocheck(__val, __gu_ptr, __pu_err); \ + __put_user_error(__val, __gu_ptr, __pu_err); \ __disable_user_access(); \ \ __pu_err; \ @@ -351,23 +438,45 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) } #define __get_kernel_nofault(dst, src, type, err_label) \ -do { \ - long __kr_err = 0; \ - \ - __get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \ - if (unlikely(__kr_err)) \ - goto err_label; \ -} while (0) + __get_user_nocheck(*((type *)(dst)), (type *)(src), err_label) #define __put_kernel_nofault(dst, src, type, err_label) \ -do { \ - long __kr_err = 0; \ - \ - __put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \ - if (unlikely(__kr_err)) \ - goto err_label; \ + __put_user_nocheck(*((type *)(src)), (type *)(dst), err_label) + +static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len) +{ + if (unlikely(!access_ok(ptr, len))) + return 0; + __enable_user_access(); + return 1; +} +#define user_access_begin user_access_begin +#define user_access_end __disable_user_access + +static inline unsigned long user_access_save(void) { return 0UL; } +static inline void user_access_restore(unsigned long enabled) { } + +/* + * We want the unsafe accessors to always be inlined and use + * the error labels - thus the macro games. + */ +#define unsafe_put_user(x, ptr, label) \ + __put_user_nocheck(x, (ptr), label) + +#define unsafe_get_user(x, ptr, label) do { \ + __inttype(*(ptr)) __gu_val; \ + __get_user_nocheck(__gu_val, (ptr), label); \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) +#define unsafe_copy_to_user(_dst, _src, _len, label) \ + if (__asm_copy_to_user_sum_enabled(_dst, _src, _len)) \ + goto label; + +#define unsafe_copy_from_user(_dst, _src, _len, label) \ + if (__asm_copy_from_user_sum_enabled(_dst, _src, _len)) \ + goto label; + #else /* CONFIG_MMU */ #include <asm-generic/uaccess.h> #endif /* CONFIG_MMU */ diff --git a/arch/riscv/include/asm/vdso/getrandom.h b/arch/riscv/include/asm/vdso/getrandom.h new file mode 100644 index 000000000000..c6d66895c1f5 --- /dev/null +++ b/arch/riscv/include/asm/vdso/getrandom.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved. + */ +#ifndef __ASM_VDSO_GETRANDOM_H +#define __ASM_VDSO_GETRANDOM_H + +#ifndef __ASSEMBLY__ + +#include <asm/unistd.h> + +static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, unsigned int _flags) +{ + register long ret asm("a0"); + register long nr asm("a7") = __NR_getrandom; + register void *buffer asm("a0") = _buffer; + register size_t len asm("a1") = _len; + register unsigned int flags asm("a2") = _flags; + + asm volatile ("ecall\n" + : "=r" (ret) + : "r" (nr), "r" (buffer), "r" (len), "r" (flags) + : "memory"); + + return ret; +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index e8a83f55be2b..b61786d43c20 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -120,6 +120,11 @@ static __always_inline void riscv_v_disable(void) csr_clear(CSR_SSTATUS, SR_VS); } +static __always_inline bool riscv_v_is_on(void) +{ + return !!(csr_read(CSR_SSTATUS) & SR_VS); +} + static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) { asm volatile ( @@ -200,11 +205,11 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, THEAD_VSETVLI_T4X0E8M8D1 THEAD_VSB_V_V0T0 "add t0, t0, t4\n\t" - THEAD_VSB_V_V0T0 + THEAD_VSB_V_V8T0 "add t0, t0, t4\n\t" - THEAD_VSB_V_V0T0 + THEAD_VSB_V_V16T0 "add t0, t0, t4\n\t" - THEAD_VSB_V_V0T0 + THEAD_VSB_V_V24T0 : : "r" (datap) : "memory", "t0", "t4"); } else { asm volatile ( @@ -236,11 +241,11 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ THEAD_VSETVLI_T4X0E8M8D1 THEAD_VLB_V_V0T0 "add t0, t0, t4\n\t" - THEAD_VLB_V_V0T0 + THEAD_VLB_V_V8T0 "add t0, t0, t4\n\t" - THEAD_VLB_V_V0T0 + THEAD_VLB_V_V16T0 "add t0, t0, t4\n\t" - THEAD_VLB_V_V0T0 + THEAD_VLB_V_V24T0 : : "r" (datap) : "memory", "t0", "t4"); } else { asm volatile ( @@ -366,6 +371,11 @@ static inline void __switch_to_vector(struct task_struct *prev, struct pt_regs *regs; if (riscv_preempt_v_started(prev)) { + if (riscv_v_is_on()) { + WARN_ON(prev->thread.riscv_v_flags & RISCV_V_CTX_DEPTH_MASK); + riscv_v_disable(); + prev->thread.riscv_v_flags |= RISCV_PREEMPT_V_IN_SCHEDULE; + } if (riscv_preempt_v_dirty(prev)) { __riscv_v_vstate_save(&prev->thread.kernel_vstate, prev->thread.kernel_vstate.datap); @@ -376,10 +386,16 @@ static inline void __switch_to_vector(struct task_struct *prev, riscv_v_vstate_save(&prev->thread.vstate, regs); } - if (riscv_preempt_v_started(next)) - riscv_preempt_v_set_restore(next); - else + if (riscv_preempt_v_started(next)) { + if (next->thread.riscv_v_flags & RISCV_PREEMPT_V_IN_SCHEDULE) { + next->thread.riscv_v_flags &= ~RISCV_PREEMPT_V_IN_SCHEDULE; + riscv_v_enable(); + } else { + riscv_preempt_v_set_restore(next); + } + } else { riscv_v_vstate_set_restore(next, task_pt_regs(next)); + } } void riscv_v_vstate_ctrl_init(struct task_struct *tsk); diff --git a/arch/riscv/include/asm/vendor_extensions/sifive.h b/arch/riscv/include/asm/vendor_extensions/sifive.h new file mode 100644 index 000000000000..ac00e500361c --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions/sifive.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_SIFIVE_H +#define _ASM_RISCV_VENDOR_EXTENSIONS_SIFIVE_H + +#include <asm/vendor_extensions.h> + +#include <linux/types.h> + +#define RISCV_ISA_VENDOR_EXT_XSFVQMACCDOD 0 +#define RISCV_ISA_VENDOR_EXT_XSFVQMACCQOQ 1 +#define RISCV_ISA_VENDOR_EXT_XSFVFNRCLIPXFQF 2 +#define RISCV_ISA_VENDOR_EXT_XSFVFWMACCQQQ 3 + +extern struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_sifive; + +#endif diff --git a/arch/riscv/include/asm/vendor_extensions/sifive_hwprobe.h b/arch/riscv/include/asm/vendor_extensions/sifive_hwprobe.h new file mode 100644 index 000000000000..90a61abd033c --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions/sifive_hwprobe.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_SIFIVE_HWPROBE_H +#define _ASM_RISCV_VENDOR_EXTENSIONS_SIFIVE_HWPROBE_H + +#include <linux/cpumask.h> + +#include <uapi/asm/hwprobe.h> + +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE +void hwprobe_isa_vendor_ext_sifive_0(struct riscv_hwprobe *pair, const struct cpumask *cpus); +#else +static inline void hwprobe_isa_vendor_ext_sifive_0(struct riscv_hwprobe *pair, + const struct cpumask *cpus) +{ + pair->value = 0; +} +#endif + +#endif diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 3c2fce939673..aaf6ad970499 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -81,6 +81,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZICBOM (1ULL << 55) #define RISCV_HWPROBE_EXT_ZAAMO (1ULL << 56) #define RISCV_HWPROBE_EXT_ZALRSC (1ULL << 57) +#define RISCV_HWPROBE_EXT_ZABHA (1ULL << 58) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) @@ -104,6 +105,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED 4 #define RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0 11 #define RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE 12 +#define RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0 13 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Flags */ diff --git a/arch/riscv/include/uapi/asm/vendor/sifive.h b/arch/riscv/include/uapi/asm/vendor/sifive.h new file mode 100644 index 000000000000..9f3278a4b298 --- /dev/null +++ b/arch/riscv/include/uapi/asm/vendor/sifive.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#define RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCDOD (1 << 0) +#define RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCQOQ (1 << 1) +#define RISCV_HWPROBE_VENDOR_EXT_XSFVFNRCLIPXFQF (1 << 2) +#define RISCV_HWPROBE_VENDOR_EXT_XSFVFWMACCQQQ (1 << 3) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index f7480c9c6f8d..c7b542573407 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -43,7 +43,7 @@ CFLAGS_sbi_ecall.o += -D__NO_FORTIFY endif endif -extra-y += vmlinux.lds +always-$(KBUILD_BUILTIN) += vmlinux.lds obj-y += head.o obj-y += soc.o @@ -107,7 +107,7 @@ obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o obj-$(CONFIG_PARAVIRT) += paravirt.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o -obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o +obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o kexec_image.o machine_kexec_file.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index 16490755304e..6e8c0d6feae9 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -34,6 +34,7 @@ void asm_offsets(void) OFFSET(TASK_THREAD_S9, task_struct, thread.s[9]); OFFSET(TASK_THREAD_S10, task_struct, thread.s[10]); OFFSET(TASK_THREAD_S11, task_struct, thread.s[11]); + OFFSET(TASK_THREAD_SUM, task_struct, thread.sum); OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu); OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count); @@ -346,6 +347,10 @@ void asm_offsets(void) offsetof(struct task_struct, thread.s[11]) - offsetof(struct task_struct, thread.ra) ); + DEFINE(TASK_THREAD_SUM_RA, + offsetof(struct task_struct, thread.sum) + - offsetof(struct task_struct, thread.ra) + ); DEFINE(TASK_THREAD_F0_F0, offsetof(struct task_struct, thread.fstate.f[0]) @@ -493,6 +498,12 @@ void asm_offsets(void) DEFINE(STACKFRAME_SIZE_ON_STACK, ALIGN(sizeof(struct stackframe), STACK_ALIGN)); OFFSET(STACKFRAME_FP, stackframe, fp); OFFSET(STACKFRAME_RA, stackframe, ra); +#ifdef CONFIG_FUNCTION_TRACER + DEFINE(FTRACE_OPS_FUNC, offsetof(struct ftrace_ops, func)); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call)); +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ +#endif #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS DEFINE(FREGS_SIZE_ON_STACK, ALIGN(sizeof(struct __arch_ftrace_regs), STACK_ALIGN)); @@ -501,6 +512,13 @@ void asm_offsets(void) DEFINE(FREGS_SP, offsetof(struct __arch_ftrace_regs, sp)); DEFINE(FREGS_S0, offsetof(struct __arch_ftrace_regs, s0)); DEFINE(FREGS_T1, offsetof(struct __arch_ftrace_regs, t1)); +#ifdef CONFIG_CC_IS_CLANG + DEFINE(FREGS_T2, offsetof(struct __arch_ftrace_regs, t2)); + DEFINE(FREGS_T3, offsetof(struct __arch_ftrace_regs, t3)); + DEFINE(FREGS_T4, offsetof(struct __arch_ftrace_regs, t4)); + DEFINE(FREGS_T5, offsetof(struct __arch_ftrace_regs, t5)); + DEFINE(FREGS_T6, offsetof(struct __arch_ftrace_regs, t6)); +#endif DEFINE(FREGS_A0, offsetof(struct __arch_ftrace_regs, a0)); DEFINE(FREGS_A1, offsetof(struct __arch_ftrace_regs, a1)); DEFINE(FREGS_A2, offsetof(struct __arch_ftrace_regs, a2)); diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c index e6fbaaf54956..87d655944803 100644 --- a/arch/riscv/kernel/cpu_ops_sbi.c +++ b/arch/riscv/kernel/cpu_ops_sbi.c @@ -18,10 +18,10 @@ const struct cpu_operations cpu_ops_sbi; /* * Ordered booting via HSM brings one cpu at a time. However, cpu hotplug can - * be invoked from multiple threads in parallel. Define a per cpu data + * be invoked from multiple threads in parallel. Define an array of boot data * to handle that. */ -static DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data); +static struct sbi_hart_boot_data boot_data[NR_CPUS]; static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr, unsigned long priv) @@ -67,7 +67,7 @@ static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle) unsigned long boot_addr = __pa_symbol(secondary_start_sbi); unsigned long hartid = cpuid_to_hartid_map(cpuid); unsigned long hsm_data; - struct sbi_hart_boot_data *bdata = &per_cpu(boot_data, cpuid); + struct sbi_hart_boot_data *bdata = &boot_data[cpuid]; /* Make sure tidle is updated */ smp_mb(); diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 2054f6c4b0ae..743d53415572 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -32,6 +32,7 @@ #define NUM_ALPHA_EXTS ('z' - 'a' + 1) static bool any_cpu_has_zicboz; +static bool any_cpu_has_zicbop; static bool any_cpu_has_zicbom; unsigned long elf_hwcap __read_mostly; @@ -119,6 +120,21 @@ static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data, return 0; } +static int riscv_ext_zicbop_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!riscv_cbop_block_size) { + pr_err("Zicbop detected in ISA string, disabling as no cbop-block-size found\n"); + return -EINVAL; + } + if (!is_power_of_2(riscv_cbop_block_size)) { + pr_err("Zicbop disabled as cbop-block-size present, but is not a power-of-2\n"); + return -EINVAL; + } + any_cpu_has_zicbop = true; + return 0; +} + static int riscv_ext_f_validate(const struct riscv_isa_ext_data *data, const unsigned long *isa_bitmap) { @@ -442,6 +458,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_SUPERSET_VALIDATE(v, RISCV_ISA_EXT_v, riscv_v_exts, riscv_ext_vector_float_validate), __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h), __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts, riscv_ext_zicbom_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zicbop, RISCV_ISA_EXT_ZICBOP, riscv_ext_zicbop_validate), __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts, riscv_ext_zicboz_validate), __RISCV_ISA_EXT_DATA(ziccrse, RISCV_ISA_EXT_ZICCRSE), __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), @@ -1112,6 +1129,10 @@ void __init riscv_user_isa_enable(void) current->thread.envcfg |= ENVCFG_CBCFE; else if (any_cpu_has_zicbom) pr_warn("Zicbom disabled as it is unavailable on some harts\n"); + + if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOP) && + any_cpu_has_zicbop) + pr_warn("Zicbop disabled as it is unavailable on some harts\n"); } #ifdef CONFIG_RISCV_ALTERNATIVE diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c deleted file mode 100644 index e783a72d051f..000000000000 --- a/arch/riscv/kernel/elf_kexec.c +++ /dev/null @@ -1,485 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Load ELF vmlinux file for the kexec_file_load syscall. - * - * Copyright (C) 2021 Huawei Technologies Co, Ltd. - * - * Author: Liao Chang (liaochang1@huawei.com) - * - * Based on kexec-tools' kexec-elf-riscv.c, heavily modified - * for kernel. - */ - -#define pr_fmt(fmt) "kexec_image: " fmt - -#include <linux/elf.h> -#include <linux/kexec.h> -#include <linux/slab.h> -#include <linux/of.h> -#include <linux/libfdt.h> -#include <linux/types.h> -#include <linux/memblock.h> -#include <linux/vmalloc.h> -#include <asm/setup.h> - -int arch_kimage_file_post_load_cleanup(struct kimage *image) -{ - kvfree(image->arch.fdt); - image->arch.fdt = NULL; - - vfree(image->elf_headers); - image->elf_headers = NULL; - image->elf_headers_sz = 0; - - return kexec_image_post_load_cleanup_default(image); -} - -static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, - struct kexec_elf_info *elf_info, unsigned long old_pbase, - unsigned long new_pbase) -{ - int i; - int ret = 0; - size_t size; - struct kexec_buf kbuf; - const struct elf_phdr *phdr; - - kbuf.image = image; - - for (i = 0; i < ehdr->e_phnum; i++) { - phdr = &elf_info->proghdrs[i]; - if (phdr->p_type != PT_LOAD) - continue; - - size = phdr->p_filesz; - if (size > phdr->p_memsz) - size = phdr->p_memsz; - - kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset; - kbuf.bufsz = size; - kbuf.buf_align = phdr->p_align; - kbuf.mem = phdr->p_paddr - old_pbase + new_pbase; - kbuf.memsz = phdr->p_memsz; - kbuf.top_down = false; - ret = kexec_add_buffer(&kbuf); - if (ret) - break; - } - - return ret; -} - -/* - * Go through the available phsyical memory regions and find one that hold - * an image of the specified size. - */ -static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, - struct elfhdr *ehdr, struct kexec_elf_info *elf_info, - unsigned long *old_pbase, unsigned long *new_pbase) -{ - int i; - int ret; - struct kexec_buf kbuf; - const struct elf_phdr *phdr; - unsigned long lowest_paddr = ULONG_MAX; - unsigned long lowest_vaddr = ULONG_MAX; - - for (i = 0; i < ehdr->e_phnum; i++) { - phdr = &elf_info->proghdrs[i]; - if (phdr->p_type != PT_LOAD) - continue; - - if (lowest_paddr > phdr->p_paddr) - lowest_paddr = phdr->p_paddr; - - if (lowest_vaddr > phdr->p_vaddr) - lowest_vaddr = phdr->p_vaddr; - } - - kbuf.image = image; - kbuf.buf_min = lowest_paddr; - kbuf.buf_max = ULONG_MAX; - - /* - * Current riscv boot protocol requires 2MB alignment for - * RV64 and 4MB alignment for RV32 - * - */ - kbuf.buf_align = PMD_SIZE; - kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE); - kbuf.top_down = false; - ret = arch_kexec_locate_mem_hole(&kbuf); - if (!ret) { - *old_pbase = lowest_paddr; - *new_pbase = kbuf.mem; - image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem; - } - return ret; -} - -#ifdef CONFIG_CRASH_DUMP -static int get_nr_ram_ranges_callback(struct resource *res, void *arg) -{ - unsigned int *nr_ranges = arg; - - (*nr_ranges)++; - return 0; -} - -static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) -{ - struct crash_mem *cmem = arg; - - cmem->ranges[cmem->nr_ranges].start = res->start; - cmem->ranges[cmem->nr_ranges].end = res->end; - cmem->nr_ranges++; - - return 0; -} - -static int prepare_elf_headers(void **addr, unsigned long *sz) -{ - struct crash_mem *cmem; - unsigned int nr_ranges; - int ret; - - nr_ranges = 1; /* For exclusion of crashkernel region */ - walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); - - cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); - if (!cmem) - return -ENOMEM; - - cmem->max_nr_ranges = nr_ranges; - cmem->nr_ranges = 0; - ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); - if (ret) - goto out; - - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (!ret) - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); - -out: - kfree(cmem); - return ret; -} - -static char *setup_kdump_cmdline(struct kimage *image, char *cmdline, - unsigned long cmdline_len) -{ - int elfcorehdr_strlen; - char *cmdline_ptr; - - cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL); - if (!cmdline_ptr) - return NULL; - - elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ", - image->elf_load_addr); - - if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) { - pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n"); - kfree(cmdline_ptr); - return NULL; - } - - memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len); - /* Ensure it's nul terminated */ - cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0'; - return cmdline_ptr; -} -#endif - -static void *elf_kexec_load(struct kimage *image, char *kernel_buf, - unsigned long kernel_len, char *initrd, - unsigned long initrd_len, char *cmdline, - unsigned long cmdline_len) -{ - int ret; - void *fdt; - unsigned long old_kernel_pbase = ULONG_MAX; - unsigned long new_kernel_pbase = 0UL; - unsigned long initrd_pbase = 0UL; - unsigned long kernel_start; - struct elfhdr ehdr; - struct kexec_buf kbuf; - struct kexec_elf_info elf_info; - char *modified_cmdline = NULL; - - ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info); - if (ret) - return ERR_PTR(ret); - - ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info, - &old_kernel_pbase, &new_kernel_pbase); - if (ret) - goto out; - kernel_start = image->start; - - /* Add the kernel binary to the image */ - ret = riscv_kexec_elf_load(image, &ehdr, &elf_info, - old_kernel_pbase, new_kernel_pbase); - if (ret) - goto out; - - kbuf.image = image; - kbuf.buf_min = new_kernel_pbase + kernel_len; - kbuf.buf_max = ULONG_MAX; - -#ifdef CONFIG_CRASH_DUMP - /* Add elfcorehdr */ - if (image->type == KEXEC_TYPE_CRASH) { - void *headers; - unsigned long headers_sz; - ret = prepare_elf_headers(&headers, &headers_sz); - if (ret) { - pr_err("Preparing elf core header failed\n"); - goto out; - } - - kbuf.buffer = headers; - kbuf.bufsz = headers_sz; - kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - kbuf.memsz = headers_sz; - kbuf.buf_align = ELF_CORE_HEADER_ALIGN; - kbuf.top_down = true; - - ret = kexec_add_buffer(&kbuf); - if (ret) { - vfree(headers); - goto out; - } - image->elf_headers = headers; - image->elf_load_addr = kbuf.mem; - image->elf_headers_sz = headers_sz; - - kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", - image->elf_load_addr, kbuf.bufsz, kbuf.memsz); - - /* Setup cmdline for kdump kernel case */ - modified_cmdline = setup_kdump_cmdline(image, cmdline, - cmdline_len); - if (!modified_cmdline) { - pr_err("Setting up cmdline for kdump kernel failed\n"); - ret = -EINVAL; - goto out; - } - cmdline = modified_cmdline; - } -#endif - -#ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY - /* Add purgatory to the image */ - kbuf.top_down = true; - kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - ret = kexec_load_purgatory(image, &kbuf); - if (ret) { - pr_err("Error loading purgatory ret=%d\n", ret); - goto out; - } - kexec_dprintk("Loaded purgatory at 0x%lx\n", kbuf.mem); - - ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry", - &kernel_start, - sizeof(kernel_start), 0); - if (ret) - pr_err("Error update purgatory ret=%d\n", ret); -#endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */ - - /* Add the initrd to the image */ - if (initrd != NULL) { - kbuf.buffer = initrd; - kbuf.bufsz = kbuf.memsz = initrd_len; - kbuf.buf_align = PAGE_SIZE; - kbuf.top_down = true; - kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - ret = kexec_add_buffer(&kbuf); - if (ret) - goto out; - initrd_pbase = kbuf.mem; - kexec_dprintk("Loaded initrd at 0x%lx\n", initrd_pbase); - } - - /* Add the DTB to the image */ - fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase, - initrd_len, cmdline, 0); - if (!fdt) { - pr_err("Error setting up the new device tree.\n"); - ret = -EINVAL; - goto out; - } - - fdt_pack(fdt); - kbuf.buffer = fdt; - kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt); - kbuf.buf_align = PAGE_SIZE; - kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - kbuf.top_down = true; - ret = kexec_add_buffer(&kbuf); - if (ret) { - pr_err("Error add DTB kbuf ret=%d\n", ret); - goto out_free_fdt; - } - /* Cache the fdt buffer address for memory cleanup */ - image->arch.fdt = fdt; - kexec_dprintk("Loaded device tree at 0x%lx\n", kbuf.mem); - goto out; - -out_free_fdt: - kvfree(fdt); -out: - kfree(modified_cmdline); - kexec_free_elf_info(&elf_info); - return ret ? ERR_PTR(ret) : NULL; -} - -#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1)) -#define RISCV_IMM_BITS 12 -#define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS) -#define RISCV_CONST_HIGH_PART(x) \ - (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1)) -#define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x)) - -#define ENCODE_ITYPE_IMM(x) \ - (RV_X(x, 0, 12) << 20) -#define ENCODE_BTYPE_IMM(x) \ - ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \ - (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31)) -#define ENCODE_UTYPE_IMM(x) \ - (RV_X(x, 12, 20) << 12) -#define ENCODE_JTYPE_IMM(x) \ - ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \ - (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31)) -#define ENCODE_CBTYPE_IMM(x) \ - ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \ - (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12)) -#define ENCODE_CJTYPE_IMM(x) \ - ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \ - (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \ - (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12)) -#define ENCODE_UJTYPE_IMM(x) \ - (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \ - (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32)) -#define ENCODE_UITYPE_IMM(x) \ - (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32)) - -#define CLEAN_IMM(type, x) \ - ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x)) - -int arch_kexec_apply_relocations_add(struct purgatory_info *pi, - Elf_Shdr *section, - const Elf_Shdr *relsec, - const Elf_Shdr *symtab) -{ - const char *strtab, *name, *shstrtab; - const Elf_Shdr *sechdrs; - Elf64_Rela *relas; - int i, r_type; - - /* String & section header string table */ - sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; - strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset; - shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset; - - relas = (void *)pi->ehdr + relsec->sh_offset; - - for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) { - const Elf_Sym *sym; /* symbol to relocate */ - unsigned long addr; /* final location after relocation */ - unsigned long val; /* relocated symbol value */ - unsigned long sec_base; /* relocated symbol value */ - void *loc; /* tmp location to modify */ - - sym = (void *)pi->ehdr + symtab->sh_offset; - sym += ELF64_R_SYM(relas[i].r_info); - - if (sym->st_name) - name = strtab + sym->st_name; - else - name = shstrtab + sechdrs[sym->st_shndx].sh_name; - - loc = pi->purgatory_buf; - loc += section->sh_offset; - loc += relas[i].r_offset; - - if (sym->st_shndx == SHN_ABS) - sec_base = 0; - else if (sym->st_shndx >= pi->ehdr->e_shnum) { - pr_err("Invalid section %d for symbol %s\n", - sym->st_shndx, name); - return -ENOEXEC; - } else - sec_base = pi->sechdrs[sym->st_shndx].sh_addr; - - val = sym->st_value; - val += sec_base; - val += relas[i].r_addend; - - addr = section->sh_addr + relas[i].r_offset; - - r_type = ELF64_R_TYPE(relas[i].r_info); - - switch (r_type) { - case R_RISCV_BRANCH: - *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) | - ENCODE_BTYPE_IMM(val - addr); - break; - case R_RISCV_JAL: - *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) | - ENCODE_JTYPE_IMM(val - addr); - break; - /* - * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I - * sym is expected to be next to R_RISCV_PCREL_HI20 - * in purgatory relsec. Handle it like R_RISCV_CALL - * sym, instead of searching the whole relsec. - */ - case R_RISCV_PCREL_HI20: - case R_RISCV_CALL_PLT: - case R_RISCV_CALL: - *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) | - ENCODE_UJTYPE_IMM(val - addr); - break; - case R_RISCV_RVC_BRANCH: - *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) | - ENCODE_CBTYPE_IMM(val - addr); - break; - case R_RISCV_RVC_JUMP: - *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) | - ENCODE_CJTYPE_IMM(val - addr); - break; - case R_RISCV_ADD16: - *(u16 *)loc += val; - break; - case R_RISCV_SUB16: - *(u16 *)loc -= val; - break; - case R_RISCV_ADD32: - *(u32 *)loc += val; - break; - case R_RISCV_SUB32: - *(u32 *)loc -= val; - break; - /* It has been applied by R_RISCV_PCREL_HI20 sym */ - case R_RISCV_PCREL_LO12_I: - case R_RISCV_ALIGN: - case R_RISCV_RELAX: - break; - case R_RISCV_64: - *(u64 *)loc = val; - break; - default: - pr_err("Unknown rela relocation: %d\n", r_type); - return -ENOEXEC; - } - } - return 0; -} - -const struct kexec_file_ops elf_kexec_ops = { - .probe = kexec_elf_probe, - .load = elf_kexec_load, -}; diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 0fb338000c6d..75656afa2d6b 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -401,9 +401,18 @@ SYM_FUNC_START(__switch_to) REG_S s9, TASK_THREAD_S9_RA(a3) REG_S s10, TASK_THREAD_S10_RA(a3) REG_S s11, TASK_THREAD_S11_RA(a3) + + /* save the user space access flag */ + csrr s0, CSR_STATUS + REG_S s0, TASK_THREAD_SUM_RA(a3) + /* Save the kernel shadow call stack pointer */ scs_save_current /* Restore context from next->thread */ + REG_L s0, TASK_THREAD_SUM_RA(a4) + li s1, SR_SUM + and s0, s0, s1 + csrs CSR_STATUS, s0 REG_L ra, TASK_THREAD_RA_RA(a4) REG_L sp, TASK_THREAD_SP_RA(a4) REG_L s0, TASK_THREAD_S0_RA(a4) diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 674dcdfae7a1..8d18d6727f0f 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -8,98 +8,139 @@ #include <linux/ftrace.h> #include <linux/uaccess.h> #include <linux/memory.h> +#include <linux/irqflags.h> #include <linux/stop_machine.h> #include <asm/cacheflush.h> #include <asm/text-patching.h> #ifdef CONFIG_DYNAMIC_FTRACE -void ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex) +void ftrace_arch_code_modify_prepare(void) + __acquires(&text_mutex) { mutex_lock(&text_mutex); - - /* - * The code sequences we use for ftrace can't be patched while the - * kernel is running, so we need to use stop_machine() to modify them - * for now. This doesn't play nice with text_mutex, we use this flag - * to elide the check. - */ - riscv_patch_in_stop_machine = true; } -void ftrace_arch_code_modify_post_process(void) __releases(&text_mutex) +void ftrace_arch_code_modify_post_process(void) + __releases(&text_mutex) { - riscv_patch_in_stop_machine = false; mutex_unlock(&text_mutex); } -static int ftrace_check_current_call(unsigned long hook_pos, - unsigned int *expected) +unsigned long ftrace_call_adjust(unsigned long addr) { - unsigned int replaced[2]; - unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4}; + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + return addr + 8 + MCOUNT_AUIPC_SIZE; - /* we expect nops at the hook position */ - if (!expected) - expected = nops; + return addr + MCOUNT_AUIPC_SIZE; +} - /* - * Read the text we want to modify; - * return must be -EFAULT on read error - */ - if (copy_from_kernel_nofault(replaced, (void *)hook_pos, - MCOUNT_INSN_SIZE)) - return -EFAULT; +unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip) +{ + return fentry_ip - MCOUNT_AUIPC_SIZE; +} - /* - * Make sure it is what we expect it to be; - * return must be -EINVAL on failed comparison - */ - if (memcmp(expected, replaced, sizeof(replaced))) { - pr_err("%p: expected (%08x %08x) but got (%08x %08x)\n", - (void *)hook_pos, expected[0], expected[1], replaced[0], - replaced[1]); - return -EINVAL; +void arch_ftrace_update_code(int command) +{ + command |= FTRACE_MAY_SLEEP; + ftrace_modify_all_code(command); + flush_icache_all(); +} + +static int __ftrace_modify_call(unsigned long source, unsigned long target, bool validate) +{ + unsigned int call[2], offset; + unsigned int replaced[2]; + + offset = target - source; + call[1] = to_jalr_t0(offset); + + if (validate) { + call[0] = to_auipc_t0(offset); + /* + * Read the text we want to modify; + * return must be -EFAULT on read error + */ + if (copy_from_kernel_nofault(replaced, (void *)source, 2 * MCOUNT_INSN_SIZE)) + return -EFAULT; + + if (replaced[0] != call[0]) { + pr_err("%p: expected (%08x) but got (%08x)\n", + (void *)source, call[0], replaced[0]); + return -EINVAL; + } } + /* Replace the jalr at once. Return -EPERM on write error. */ + if (patch_insn_write((void *)(source + MCOUNT_AUIPC_SIZE), call + 1, MCOUNT_JALR_SIZE)) + return -EPERM; + return 0; } -static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target, - bool enable, bool ra) +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS +static const struct ftrace_ops *riscv64_rec_get_ops(struct dyn_ftrace *rec) { - unsigned int call[2]; - unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4}; + const struct ftrace_ops *ops = NULL; - if (ra) - make_call_ra(hook_pos, target, call); - else - make_call_t0(hook_pos, target, call); + if (rec->flags & FTRACE_FL_CALL_OPS_EN) { + ops = ftrace_find_unique_ops(rec); + WARN_ON_ONCE(!ops); + } - /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */ - if (patch_insn_write((void *)hook_pos, enable ? call : nops, MCOUNT_INSN_SIZE)) - return -EPERM; + if (!ops) + ops = &ftrace_list_ops; - return 0; + return ops; +} + +static int ftrace_rec_set_ops(const struct dyn_ftrace *rec, const struct ftrace_ops *ops) +{ + unsigned long literal = ALIGN_DOWN(rec->ip - 12, 8); + + return patch_text_nosync((void *)literal, &ops, sizeof(ops)); } +static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) +{ + return ftrace_rec_set_ops(rec, &ftrace_nop_ops); +} + +static int ftrace_rec_update_ops(struct dyn_ftrace *rec) +{ + return ftrace_rec_set_ops(rec, riscv64_rec_get_ops(rec)); +} +#else +static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) { return 0; } +static int ftrace_rec_update_ops(struct dyn_ftrace *rec) { return 0; } +#endif + int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - unsigned int call[2]; + unsigned long distance, orig_addr, pc = rec->ip - MCOUNT_AUIPC_SIZE; + int ret; - make_call_t0(rec->ip, addr, call); + ret = ftrace_rec_update_ops(rec); + if (ret) + return ret; - if (patch_insn_write((void *)rec->ip, call, MCOUNT_INSN_SIZE)) - return -EPERM; + orig_addr = (unsigned long)&ftrace_caller; + distance = addr > orig_addr ? addr - orig_addr : orig_addr - addr; + if (distance > JALR_RANGE) + addr = FTRACE_ADDR; - return 0; + return __ftrace_modify_call(pc, addr, false); } -int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, - unsigned long addr) +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned int nops[2] = {RISCV_INSN_NOP4, RISCV_INSN_NOP4}; + u32 nop4 = RISCV_INSN_NOP4; + int ret; + + ret = ftrace_rec_set_nop_ops(rec); + if (ret) + return ret; - if (patch_insn_write((void *)rec->ip, nops, MCOUNT_INSN_SIZE)) + if (patch_insn_write((void *)rec->ip, &nop4, MCOUNT_NOP4_SIZE)) return -EPERM; return 0; @@ -114,75 +155,71 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, */ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) { - int out; + unsigned long pc = rec->ip - MCOUNT_AUIPC_SIZE; + unsigned int nops[2], offset; + int ret; - mutex_lock(&text_mutex); - out = ftrace_make_nop(mod, rec, MCOUNT_ADDR); - mutex_unlock(&text_mutex); + guard(mutex)(&text_mutex); - return out; -} + ret = ftrace_rec_set_nop_ops(rec); + if (ret) + return ret; -int ftrace_update_ftrace_func(ftrace_func_t func) -{ - int ret = __ftrace_modify_call((unsigned long)&ftrace_call, - (unsigned long)func, true, true); + offset = (unsigned long) &ftrace_caller - pc; + nops[0] = to_auipc_t0(offset); + nops[1] = RISCV_INSN_NOP4; + + ret = patch_insn_write((void *)pc, nops, 2 * MCOUNT_INSN_SIZE); return ret; } -struct ftrace_modify_param { - int command; - atomic_t cpu_count; -}; - -static int __ftrace_modify_code(void *data) +ftrace_func_t ftrace_call_dest = ftrace_stub; +int ftrace_update_ftrace_func(ftrace_func_t func) { - struct ftrace_modify_param *param = data; - - if (atomic_inc_return(¶m->cpu_count) == num_online_cpus()) { - ftrace_modify_all_code(param->command); - /* - * Make sure the patching store is effective *before* we - * increment the counter which releases all waiting CPUs - * by using the release variant of atomic increment. The - * release pairs with the call to local_flush_icache_all() - * on the waiting CPU. - */ - atomic_inc_return_release(¶m->cpu_count); - } else { - while (atomic_read(¶m->cpu_count) <= num_online_cpus()) - cpu_relax(); - - local_flush_icache_all(); - } + /* + * When using CALL_OPS, the function to call is associated with the + * call site, and we don't have a global function pointer to update. + */ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + return 0; + WRITE_ONCE(ftrace_call_dest, func); + /* + * The data fence ensure that the update to ftrace_call_dest happens + * before the write to function_trace_op later in the generic ftrace. + * If the sequence is not enforced, then an old ftrace_call_dest may + * race loading a new function_trace_op set in ftrace_modify_all_code + */ + smp_wmb(); + /* + * Updating ftrace dpes not take stop_machine path, so irqs should not + * be disabled. + */ + WARN_ON(irqs_disabled()); + smp_call_function(ftrace_sync_ipi, NULL, 1); return 0; } -void arch_ftrace_update_code(int command) +#else /* CONFIG_DYNAMIC_FTRACE */ +unsigned long ftrace_call_adjust(unsigned long addr) { - struct ftrace_modify_param param = { command, ATOMIC_INIT(0) }; - - stop_machine(__ftrace_modify_code, ¶m, cpu_online_mask); + return addr; } -#endif +#endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { - unsigned int call[2]; - unsigned long caller = rec->ip; + unsigned long caller = rec->ip - MCOUNT_AUIPC_SIZE; int ret; - make_call_t0(caller, old_addr, call); - ret = ftrace_check_current_call(caller, call); - + ret = ftrace_rec_update_ops(rec); if (ret) return ret; - return __ftrace_modify_call(caller, addr, true, false); + return __ftrace_modify_call(caller, FTRACE_ADDR, true); } #endif @@ -210,7 +247,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, } #ifdef CONFIG_DYNAMIC_FTRACE -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { @@ -231,19 +267,5 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, if (!function_graph_enter_regs(old, ip, frame_pointer, parent, fregs)) *parent = return_hooker; } -#else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ -extern void ftrace_graph_call(void); -int ftrace_enable_ftrace_graph_caller(void) -{ - return __ftrace_modify_call((unsigned long)&ftrace_graph_call, - (unsigned long)&prepare_ftrace_return, true, true); -} - -int ftrace_disable_ftrace_graph_caller(void) -{ - return __ftrace_modify_call((unsigned long)&ftrace_graph_call, - (unsigned long)&prepare_ftrace_return, false, true); -} -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/riscv/kernel/kexec_elf.c b/arch/riscv/kernel/kexec_elf.c new file mode 100644 index 000000000000..f4755d49b89e --- /dev/null +++ b/arch/riscv/kernel/kexec_elf.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Load ELF vmlinux file for the kexec_file_load syscall. + * + * Copyright (C) 2021 Huawei Technologies Co, Ltd. + * + * Author: Liao Chang (liaochang1@huawei.com) + * + * Based on kexec-tools' kexec-elf-riscv.c, heavily modified + * for kernel. + */ + +#define pr_fmt(fmt) "kexec_image: " fmt + +#include <linux/elf.h> +#include <linux/kexec.h> +#include <linux/slab.h> +#include <linux/of.h> +#include <linux/libfdt.h> +#include <linux/types.h> +#include <linux/memblock.h> +#include <asm/setup.h> + +static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, + struct kexec_elf_info *elf_info, unsigned long old_pbase, + unsigned long new_pbase) +{ + int i; + int ret = 0; + size_t size; + struct kexec_buf kbuf; + const struct elf_phdr *phdr; + + kbuf.image = image; + + for (i = 0; i < ehdr->e_phnum; i++) { + phdr = &elf_info->proghdrs[i]; + if (phdr->p_type != PT_LOAD) + continue; + + size = phdr->p_filesz; + if (size > phdr->p_memsz) + size = phdr->p_memsz; + + kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset; + kbuf.bufsz = size; + kbuf.buf_align = phdr->p_align; + kbuf.mem = phdr->p_paddr - old_pbase + new_pbase; + kbuf.memsz = phdr->p_memsz; + kbuf.top_down = false; + ret = kexec_add_buffer(&kbuf); + if (ret) + break; + } + + return ret; +} + +/* + * Go through the available phsyical memory regions and find one that hold + * an image of the specified size. + */ +static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, + struct elfhdr *ehdr, struct kexec_elf_info *elf_info, + unsigned long *old_pbase, unsigned long *new_pbase) +{ + int i; + int ret; + struct kexec_buf kbuf; + const struct elf_phdr *phdr; + unsigned long lowest_paddr = ULONG_MAX; + unsigned long lowest_vaddr = ULONG_MAX; + + for (i = 0; i < ehdr->e_phnum; i++) { + phdr = &elf_info->proghdrs[i]; + if (phdr->p_type != PT_LOAD) + continue; + + if (lowest_paddr > phdr->p_paddr) + lowest_paddr = phdr->p_paddr; + + if (lowest_vaddr > phdr->p_vaddr) + lowest_vaddr = phdr->p_vaddr; + } + + kbuf.image = image; + kbuf.buf_min = lowest_paddr; + kbuf.buf_max = ULONG_MAX; + + /* + * Current riscv boot protocol requires 2MB alignment for + * RV64 and 4MB alignment for RV32 + * + */ + kbuf.buf_align = PMD_SIZE; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE); + kbuf.top_down = false; + ret = arch_kexec_locate_mem_hole(&kbuf); + if (!ret) { + *old_pbase = lowest_paddr; + *new_pbase = kbuf.mem; + image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem; + } + return ret; +} + +static void *elf_kexec_load(struct kimage *image, char *kernel_buf, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len) +{ + int ret; + unsigned long old_kernel_pbase = ULONG_MAX; + unsigned long new_kernel_pbase = 0UL; + struct elfhdr ehdr; + struct kexec_elf_info elf_info; + + ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info); + if (ret) + return ERR_PTR(ret); + + ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info, + &old_kernel_pbase, &new_kernel_pbase); + if (ret) + goto out; + + /* Add the kernel binary to the image */ + ret = riscv_kexec_elf_load(image, &ehdr, &elf_info, + old_kernel_pbase, new_kernel_pbase); + if (ret) + goto out; + + ret = load_extra_segments(image, image->start, kernel_len, + initrd, initrd_len, cmdline, cmdline_len); +out: + kexec_free_elf_info(&elf_info); + return ret ? ERR_PTR(ret) : NULL; +} + +const struct kexec_file_ops elf_kexec_ops = { + .probe = kexec_elf_probe, + .load = elf_kexec_load, +}; diff --git a/arch/riscv/kernel/kexec_image.c b/arch/riscv/kernel/kexec_image.c new file mode 100644 index 000000000000..26a81774a78a --- /dev/null +++ b/arch/riscv/kernel/kexec_image.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RISC-V Kexec image loader + * + */ + +#define pr_fmt(fmt) "kexec_file(Image): " fmt + +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/kexec.h> +#include <linux/pe.h> +#include <linux/string.h> +#include <asm/byteorder.h> +#include <asm/image.h> + +static int image_probe(const char *kernel_buf, unsigned long kernel_len) +{ + const struct riscv_image_header *h = (const struct riscv_image_header *)kernel_buf; + + if (!h || kernel_len < sizeof(*h)) + return -EINVAL; + + /* According to Documentation/riscv/boot-image-header.rst, + * use "magic2" field to check when version >= 0.2. + */ + + if (h->version >= RISCV_HEADER_VERSION && + memcmp(&h->magic2, RISCV_IMAGE_MAGIC2, sizeof(h->magic2))) + return -EINVAL; + + return 0; +} + +static void *image_load(struct kimage *image, + char *kernel, unsigned long kernel_len, + char *initrd, unsigned long initrd_len, + char *cmdline, unsigned long cmdline_len) +{ + struct riscv_image_header *h; + u64 flags; + bool be_image, be_kernel; + struct kexec_buf kbuf; + int ret; + + /* Check Image header */ + h = (struct riscv_image_header *)kernel; + if (!h->image_size) { + ret = -EINVAL; + goto out; + } + + /* Check endianness */ + flags = le64_to_cpu(h->flags); + be_image = riscv_image_flag_field(flags, RISCV_IMAGE_FLAG_BE); + be_kernel = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); + if (be_image != be_kernel) { + ret = -EINVAL; + goto out; + } + + /* Load the kernel image */ + kbuf.image = image; + kbuf.buf_min = 0; + kbuf.buf_max = ULONG_MAX; + kbuf.top_down = false; + + kbuf.buffer = kernel; + kbuf.bufsz = kernel_len; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.memsz = le64_to_cpu(h->image_size); + kbuf.buf_align = le64_to_cpu(h->text_offset); + + ret = kexec_add_buffer(&kbuf); + if (ret) { + pr_err("Error add kernel image ret=%d\n", ret); + goto out; + } + + image->start = kbuf.mem; + + pr_info("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + kbuf.mem, kbuf.bufsz, kbuf.memsz); + + ret = load_extra_segments(image, kbuf.mem, kbuf.memsz, + initrd, initrd_len, cmdline, cmdline_len); + +out: + return ret ? ERR_PTR(ret) : NULL; +} + +const struct kexec_file_ops image_kexec_ops = { + .probe = image_probe, + .load = image_load, +}; diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c index b0bf8c1722c0..e36104af2e24 100644 --- a/arch/riscv/kernel/machine_kexec_file.c +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -7,8 +7,369 @@ * Author: Liao Chang (liaochang1@huawei.com) */ #include <linux/kexec.h> +#include <linux/elf.h> +#include <linux/slab.h> +#include <linux/of.h> +#include <linux/libfdt.h> +#include <linux/types.h> +#include <linux/memblock.h> +#include <linux/vmalloc.h> +#include <asm/setup.h> const struct kexec_file_ops * const kexec_file_loaders[] = { &elf_kexec_ops, + &image_kexec_ops, NULL }; + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + kvfree(image->arch.fdt); + image->arch.fdt = NULL; + + vfree(image->elf_headers); + image->elf_headers = NULL; + image->elf_headers_sz = 0; + + return kexec_image_post_load_cleanup_default(image); +} + +#ifdef CONFIG_CRASH_DUMP +static int get_nr_ram_ranges_callback(struct resource *res, void *arg) +{ + unsigned int *nr_ranges = arg; + + (*nr_ranges)++; + return 0; +} + +static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) +{ + struct crash_mem *cmem = arg; + + cmem->ranges[cmem->nr_ranges].start = res->start; + cmem->ranges[cmem->nr_ranges].end = res->end; + cmem->nr_ranges++; + + return 0; +} + +static int prepare_elf_headers(void **addr, unsigned long *sz) +{ + struct crash_mem *cmem; + unsigned int nr_ranges; + int ret; + + nr_ranges = 1; /* For exclusion of crashkernel region */ + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); + + cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); + if (!cmem) + return -ENOMEM; + + cmem->max_nr_ranges = nr_ranges; + cmem->nr_ranges = 0; + ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); + if (ret) + goto out; + + /* Exclude crashkernel region */ + ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + if (!ret) + ret = crash_prepare_elf64_headers(cmem, true, addr, sz); + +out: + kfree(cmem); + return ret; +} + +static char *setup_kdump_cmdline(struct kimage *image, char *cmdline, + unsigned long cmdline_len) +{ + int elfcorehdr_strlen; + char *cmdline_ptr; + + cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL); + if (!cmdline_ptr) + return NULL; + + elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ", + image->elf_load_addr); + + if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) { + pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n"); + kfree(cmdline_ptr); + return NULL; + } + + memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len); + /* Ensure it's nul terminated */ + cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0'; + return cmdline_ptr; +} +#endif + +#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1)) +#define RISCV_IMM_BITS 12 +#define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS) +#define RISCV_CONST_HIGH_PART(x) \ + (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1)) +#define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x)) + +#define ENCODE_ITYPE_IMM(x) \ + (RV_X(x, 0, 12) << 20) +#define ENCODE_BTYPE_IMM(x) \ + ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \ + (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31)) +#define ENCODE_UTYPE_IMM(x) \ + (RV_X(x, 12, 20) << 12) +#define ENCODE_JTYPE_IMM(x) \ + ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \ + (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31)) +#define ENCODE_CBTYPE_IMM(x) \ + ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \ + (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12)) +#define ENCODE_CJTYPE_IMM(x) \ + ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \ + (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \ + (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12)) +#define ENCODE_UJTYPE_IMM(x) \ + (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \ + (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32)) +#define ENCODE_UITYPE_IMM(x) \ + (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32)) + +#define CLEAN_IMM(type, x) \ + ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x)) + +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab) +{ + const char *strtab, *name, *shstrtab; + const Elf_Shdr *sechdrs; + Elf64_Rela *relas; + int i, r_type; + + /* String & section header string table */ + sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; + strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset; + shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset; + + relas = (void *)pi->ehdr + relsec->sh_offset; + + for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) { + const Elf_Sym *sym; /* symbol to relocate */ + unsigned long addr; /* final location after relocation */ + unsigned long val; /* relocated symbol value */ + unsigned long sec_base; /* relocated symbol value */ + void *loc; /* tmp location to modify */ + + sym = (void *)pi->ehdr + symtab->sh_offset; + sym += ELF64_R_SYM(relas[i].r_info); + + if (sym->st_name) + name = strtab + sym->st_name; + else + name = shstrtab + sechdrs[sym->st_shndx].sh_name; + + loc = pi->purgatory_buf; + loc += section->sh_offset; + loc += relas[i].r_offset; + + if (sym->st_shndx == SHN_ABS) + sec_base = 0; + else if (sym->st_shndx >= pi->ehdr->e_shnum) { + pr_err("Invalid section %d for symbol %s\n", + sym->st_shndx, name); + return -ENOEXEC; + } else + sec_base = pi->sechdrs[sym->st_shndx].sh_addr; + + val = sym->st_value; + val += sec_base; + val += relas[i].r_addend; + + addr = section->sh_addr + relas[i].r_offset; + + r_type = ELF64_R_TYPE(relas[i].r_info); + + switch (r_type) { + case R_RISCV_BRANCH: + *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) | + ENCODE_BTYPE_IMM(val - addr); + break; + case R_RISCV_JAL: + *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) | + ENCODE_JTYPE_IMM(val - addr); + break; + /* + * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I + * sym is expected to be next to R_RISCV_PCREL_HI20 + * in purgatory relsec. Handle it like R_RISCV_CALL + * sym, instead of searching the whole relsec. + */ + case R_RISCV_PCREL_HI20: + case R_RISCV_CALL_PLT: + case R_RISCV_CALL: + *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) | + ENCODE_UJTYPE_IMM(val - addr); + break; + case R_RISCV_RVC_BRANCH: + *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) | + ENCODE_CBTYPE_IMM(val - addr); + break; + case R_RISCV_RVC_JUMP: + *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) | + ENCODE_CJTYPE_IMM(val - addr); + break; + case R_RISCV_ADD16: + *(u16 *)loc += val; + break; + case R_RISCV_SUB16: + *(u16 *)loc -= val; + break; + case R_RISCV_ADD32: + *(u32 *)loc += val; + break; + case R_RISCV_SUB32: + *(u32 *)loc -= val; + break; + /* It has been applied by R_RISCV_PCREL_HI20 sym */ + case R_RISCV_PCREL_LO12_I: + case R_RISCV_ALIGN: + case R_RISCV_RELAX: + break; + case R_RISCV_64: + *(u64 *)loc = val; + break; + default: + pr_err("Unknown rela relocation: %d\n", r_type); + return -ENOEXEC; + } + } + return 0; +} + + +int load_extra_segments(struct kimage *image, unsigned long kernel_start, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len) +{ + int ret; + void *fdt; + unsigned long initrd_pbase = 0UL; + struct kexec_buf kbuf; + char *modified_cmdline = NULL; + + kbuf.image = image; + kbuf.buf_min = kernel_start + kernel_len; + kbuf.buf_max = ULONG_MAX; + +#ifdef CONFIG_CRASH_DUMP + /* Add elfcorehdr */ + if (image->type == KEXEC_TYPE_CRASH) { + void *headers; + unsigned long headers_sz; + ret = prepare_elf_headers(&headers, &headers_sz); + if (ret) { + pr_err("Preparing elf core header failed\n"); + goto out; + } + + kbuf.buffer = headers; + kbuf.bufsz = headers_sz; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.memsz = headers_sz; + kbuf.buf_align = ELF_CORE_HEADER_ALIGN; + kbuf.top_down = true; + + ret = kexec_add_buffer(&kbuf); + if (ret) { + vfree(headers); + goto out; + } + image->elf_headers = headers; + image->elf_load_addr = kbuf.mem; + image->elf_headers_sz = headers_sz; + + kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + image->elf_load_addr, kbuf.bufsz, kbuf.memsz); + + /* Setup cmdline for kdump kernel case */ + modified_cmdline = setup_kdump_cmdline(image, cmdline, + cmdline_len); + if (!modified_cmdline) { + pr_err("Setting up cmdline for kdump kernel failed\n"); + ret = -EINVAL; + goto out; + } + cmdline = modified_cmdline; + } +#endif + +#ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY + /* Add purgatory to the image */ + kbuf.top_down = true; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + ret = kexec_load_purgatory(image, &kbuf); + if (ret) { + pr_err("Error loading purgatory ret=%d\n", ret); + goto out; + } + kexec_dprintk("Loaded purgatory at 0x%lx\n", kbuf.mem); + + ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry", + &kernel_start, + sizeof(kernel_start), 0); + if (ret) + pr_err("Error update purgatory ret=%d\n", ret); +#endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */ + + /* Add the initrd to the image */ + if (initrd != NULL) { + kbuf.buffer = initrd; + kbuf.bufsz = kbuf.memsz = initrd_len; + kbuf.buf_align = PAGE_SIZE; + kbuf.top_down = true; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + ret = kexec_add_buffer(&kbuf); + if (ret) + goto out; + initrd_pbase = kbuf.mem; + kexec_dprintk("Loaded initrd at 0x%lx\n", initrd_pbase); + } + + /* Add the DTB to the image */ + fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase, + initrd_len, cmdline, 0); + if (!fdt) { + pr_err("Error setting up the new device tree.\n"); + ret = -EINVAL; + goto out; + } + + fdt_pack(fdt); + kbuf.buffer = fdt; + kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt); + kbuf.buf_align = PAGE_SIZE; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.top_down = true; + ret = kexec_add_buffer(&kbuf); + if (ret) { + pr_err("Error add DTB kbuf ret=%d\n", ret); + goto out_free_fdt; + } + /* Cache the fdt buffer address for memory cleanup */ + image->arch.fdt = fdt; + kexec_dprintk("Loaded device tree at 0x%lx\n", kbuf.mem); + goto out; + +out_free_fdt: + kvfree(fdt); +out: + kfree(modified_cmdline); + return ret; +} diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index 745dd4c4a69c..48f6c4f7dca0 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -13,7 +13,6 @@ .text -#define FENTRY_RA_OFFSET 8 #define ABI_SIZE_ON_STACK 80 #define ABI_A0 0 #define ABI_A1 8 @@ -56,16 +55,13 @@ addi sp, sp, ABI_SIZE_ON_STACK .endm -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS - /** * SAVE_ABI_REGS - save regs against the ftrace_regs struct * * After the stack is established, * * 0(sp) stores the PC of the traced function which can be accessed -* by &(fregs)->epc in tracing function. Note that the real -* function entry address should be computed with -FENTRY_RA_OFFSET. +* by &(fregs)->epc in tracing function. * * 8(sp) stores the function return address (i.e. parent IP) that * can be accessed by &(fregs)->ra in tracing function. @@ -86,17 +82,20 @@ * +++++++++ **/ .macro SAVE_ABI_REGS - mv t4, sp // Save original SP in T4 addi sp, sp, -FREGS_SIZE_ON_STACK - REG_S t0, FREGS_EPC(sp) REG_S x1, FREGS_RA(sp) - REG_S t4, FREGS_SP(sp) // Put original SP on stack #ifdef HAVE_FUNCTION_GRAPH_FP_TEST REG_S x8, FREGS_S0(sp) #endif REG_S x6, FREGS_T1(sp) - +#ifdef CONFIG_CC_IS_CLANG + REG_S x7, FREGS_T2(sp) + REG_S x28, FREGS_T3(sp) + REG_S x29, FREGS_T4(sp) + REG_S x30, FREGS_T5(sp) + REG_S x31, FREGS_T6(sp) +#endif // save the arguments REG_S x10, FREGS_A0(sp) REG_S x11, FREGS_A1(sp) @@ -106,16 +105,25 @@ REG_S x15, FREGS_A5(sp) REG_S x16, FREGS_A6(sp) REG_S x17, FREGS_A7(sp) + mv a0, sp + addi a0, a0, FREGS_SIZE_ON_STACK + REG_S a0, FREGS_SP(sp) // Put original SP on stack .endm - .macro RESTORE_ABI_REGS, all=0 + .macro RESTORE_ABI_REGS REG_L t0, FREGS_EPC(sp) REG_L x1, FREGS_RA(sp) #ifdef HAVE_FUNCTION_GRAPH_FP_TEST REG_L x8, FREGS_S0(sp) #endif REG_L x6, FREGS_T1(sp) - +#ifdef CONFIG_CC_IS_CLANG + REG_L x7, FREGS_T2(sp) + REG_L x28, FREGS_T3(sp) + REG_L x29, FREGS_T4(sp) + REG_L x30, FREGS_T5(sp) + REG_L x31, FREGS_T6(sp) +#endif // restore the arguments REG_L x10, FREGS_A0(sp) REG_L x11, FREGS_A1(sp) @@ -130,60 +138,71 @@ .endm .macro PREPARE_ARGS - addi a0, t0, -FENTRY_RA_OFFSET + addi a0, t0, -MCOUNT_JALR_SIZE // ip (callsite's jalr insn) +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + mv a1, ra // parent_ip + REG_L a2, -16(t0) // op + REG_L ra, FTRACE_OPS_FUNC(a2) // op->func +#else la a1, function_trace_op - REG_L a2, 0(a1) - mv a1, ra - mv a3, sp + REG_L a2, 0(a1) // op + mv a1, ra // parent_ip +#endif + mv a3, sp // regs .endm -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ - -#ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS SYM_FUNC_START(ftrace_caller) - SAVE_ABI - - addi a0, t0, -FENTRY_RA_OFFSET - la a1, function_trace_op - REG_L a2, 0(a1) - mv a1, ra - mv a3, sp +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + /* + * When CALL_OPS is enabled (2 or 4) nops [8B] are placed before the + * function entry, these are later overwritten with the pointer to the + * associated struct ftrace_ops. + * + * -8: &ftrace_ops of the associated tracer function. + *<ftrace enable>: + * 0: auipc t0/ra, 0x? + * 4: jalr t0/ra, ?(t0/ra) + * + * -8: &ftrace_nop_ops + *<ftrace disable>: + * 0: nop + * 4: nop + * + * t0 is set to ip+8 after the jalr is executed at the callsite, + * so we find the associated op at t0-16. + */ + REG_L t1, -16(t0) // op Should be SZ_REG instead of 16 -SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) - call ftrace_stub - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - addi a0, sp, ABI_RA - REG_L a1, ABI_T0(sp) - addi a1, a1, -FENTRY_RA_OFFSET -#ifdef HAVE_FUNCTION_GRAPH_FP_TEST - mv a2, s0 +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* + * If the op has a direct call, handle it immediately without + * saving/restoring registers. + */ + REG_L t1, FTRACE_OPS_DIRECT_CALL(t1) + bnez t1, ftrace_caller_direct #endif -SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) - call ftrace_stub #endif - RESTORE_ABI - jr t0 -SYM_FUNC_END(ftrace_caller) - -#else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ -SYM_FUNC_START(ftrace_caller) - mv t1, zero SAVE_ABI_REGS PREPARE_ARGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + jalr ra +#else SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) - call ftrace_stub - + REG_L ra, ftrace_call_dest + jalr ra, 0(ra) +#endif RESTORE_ABI_REGS - bnez t1, .Ldirect +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + bnez t1, ftrace_caller_direct +#endif jr t0 -.Ldirect: +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +SYM_INNER_LABEL(ftrace_caller_direct, SYM_L_LOCAL) jr t1 +#endif SYM_FUNC_END(ftrace_caller) -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ - #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS SYM_CODE_START(ftrace_stub_direct_tramp) jr t0 diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c index 91d0b355ceef..75551ac6504c 100644 --- a/arch/riscv/kernel/module-sections.c +++ b/arch/riscv/kernel/module-sections.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/moduleloader.h> +#include <linux/sort.h> unsigned long module_emit_got_entry(struct module *mod, unsigned long val) { @@ -55,44 +56,70 @@ unsigned long module_emit_plt_entry(struct module *mod, unsigned long val) return (unsigned long)&plt[i]; } -static int is_rela_equal(const Elf_Rela *x, const Elf_Rela *y) +#define cmp_3way(a, b) ((a) < (b) ? -1 : (a) > (b)) + +static int cmp_rela(const void *a, const void *b) { - return x->r_info == y->r_info && x->r_addend == y->r_addend; + const Elf_Rela *x = a, *y = b; + int i; + + /* sort by type, symbol index and addend */ + i = cmp_3way(x->r_info, y->r_info); + if (i == 0) + i = cmp_3way(x->r_addend, y->r_addend); + return i; } static bool duplicate_rela(const Elf_Rela *rela, int idx) { - int i; - for (i = 0; i < idx; i++) { - if (is_rela_equal(&rela[i], &rela[idx])) - return true; - } - return false; + /* + * Entries are sorted by type, symbol index and addend. That means + * that, if a duplicate entry exists, it must be in the preceding slot. + */ + return idx > 0 && cmp_rela(rela + idx, rela + idx - 1) == 0; } -static void count_max_entries(Elf_Rela *relas, int num, +static void count_max_entries(const Elf_Rela *relas, size_t num, unsigned int *plts, unsigned int *gots) { - for (int i = 0; i < num; i++) { + for (size_t i = 0; i < num; i++) { + if (duplicate_rela(relas, i)) + continue; + switch (ELF_R_TYPE(relas[i].r_info)) { case R_RISCV_CALL_PLT: case R_RISCV_PLT32: - if (!duplicate_rela(relas, i)) - (*plts)++; + (*plts)++; break; case R_RISCV_GOT_HI20: - if (!duplicate_rela(relas, i)) - (*gots)++; + (*gots)++; break; + default: + unreachable(); } } } +static bool rela_needs_plt_got_entry(const Elf_Rela *rela) +{ + switch (ELF_R_TYPE(rela->r_info)) { + case R_RISCV_CALL_PLT: + case R_RISCV_GOT_HI20: + case R_RISCV_PLT32: + return true; + default: + return false; + } +} + int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { + size_t num_scratch_relas = 0; unsigned int num_plts = 0; unsigned int num_gots = 0; + Elf_Rela *scratch = NULL; + size_t scratch_size = 0; int i; /* @@ -122,9 +149,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, /* Calculate the maxinum number of entries */ for (i = 0; i < ehdr->e_shnum; i++) { + size_t num_relas = sechdrs[i].sh_size / sizeof(Elf_Rela); Elf_Rela *relas = (void *)ehdr + sechdrs[i].sh_offset; - int num_rela = sechdrs[i].sh_size / sizeof(Elf_Rela); Elf_Shdr *dst_sec = sechdrs + sechdrs[i].sh_info; + size_t scratch_size_needed; if (sechdrs[i].sh_type != SHT_RELA) continue; @@ -133,7 +161,28 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, if (!(dst_sec->sh_flags & SHF_EXECINSTR)) continue; - count_max_entries(relas, num_rela, &num_plts, &num_gots); + /* + * apply_relocate_add() relies on HI20 and LO12 relocation pairs being + * close together, so sort a copy of the section to avoid interfering. + */ + scratch_size_needed = (num_scratch_relas + num_relas) * sizeof(*scratch); + if (scratch_size_needed > scratch_size) { + scratch_size = scratch_size_needed; + scratch = kvrealloc(scratch, scratch_size, GFP_KERNEL); + if (!scratch) + return -ENOMEM; + } + + for (size_t j = 0; j < num_relas; j++) + if (rela_needs_plt_got_entry(&relas[j])) + scratch[num_scratch_relas++] = relas[j]; + } + + if (scratch) { + /* sort the accumulated PLT/GOT relocations so duplicates are adjacent */ + sort(scratch, num_scratch_relas, sizeof(*scratch), cmp_rela, NULL); + count_max_entries(scratch, num_scratch_relas, &num_plts, &num_gots); + kvfree(scratch); } mod->arch.plt.shdr->sh_type = SHT_NOBITS; diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index bbf7ec6a75c0..a0a40889d79a 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -60,7 +60,7 @@ int get_unalign_ctl(struct task_struct *tsk, unsigned long adr) if (!unaligned_ctl_available()) return -EINVAL; - return put_user(tsk->thread.align_ctl, (unsigned long __user *)adr); + return put_user(tsk->thread.align_ctl, (unsigned int __user *)adr); } void __show_regs(struct pt_regs *regs) diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index 1989b8cade1b..53836a9235e3 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -299,6 +299,76 @@ static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask, return 0; } +static bool sbi_fwft_supported; + +struct fwft_set_req { + u32 feature; + unsigned long value; + unsigned long flags; + atomic_t error; +}; + +static void cpu_sbi_fwft_set(void *arg) +{ + struct fwft_set_req *req = arg; + int ret; + + ret = sbi_fwft_set(req->feature, req->value, req->flags); + if (ret) + atomic_set(&req->error, ret); +} + +/** + * sbi_fwft_set() - Set a feature on the local hart + * @feature: The feature ID to be set + * @value: The feature value to be set + * @flags: FWFT feature set flags + * + * Return: 0 on success, appropriate linux error code otherwise. + */ +int sbi_fwft_set(u32 feature, unsigned long value, unsigned long flags) +{ + struct sbiret ret; + + if (!sbi_fwft_supported) + return -EOPNOTSUPP; + + ret = sbi_ecall(SBI_EXT_FWFT, SBI_EXT_FWFT_SET, + feature, value, flags, 0, 0, 0); + + return sbi_err_map_linux_errno(ret.error); +} + +/** + * sbi_fwft_set_cpumask() - Set a feature for the specified cpumask + * @mask: CPU mask of cpus that need the feature to be set + * @feature: The feature ID to be set + * @value: The feature value to be set + * @flags: FWFT feature set flags + * + * Return: 0 on success, appropriate linux error code otherwise. + */ +int sbi_fwft_set_cpumask(const cpumask_t *mask, u32 feature, + unsigned long value, unsigned long flags) +{ + struct fwft_set_req req = { + .feature = feature, + .value = value, + .flags = flags, + .error = ATOMIC_INIT(0), + }; + + if (!sbi_fwft_supported) + return -EOPNOTSUPP; + + if (feature & SBI_FWFT_GLOBAL_FEATURE_BIT) + return -EINVAL; + + on_each_cpu_mask(mask, cpu_sbi_fwft_set, &req, 1); + + return atomic_read(&req.error); +} + /** * sbi_set_timer() - Program the timer for next timer event. * @stime_value: The value after which next timer event should fire. @@ -609,7 +679,7 @@ void __init sbi_init(void) } else { __sbi_rfence = __sbi_rfence_v01; } - if ((sbi_spec_version >= sbi_mk_version(0, 3)) && + if (sbi_spec_version >= sbi_mk_version(0, 3) && sbi_probe_extension(SBI_EXT_SRST)) { pr_info("SBI SRST extension detected\n"); pm_power_off = sbi_srst_power_off; @@ -617,11 +687,16 @@ void __init sbi_init(void) sbi_srst_reboot_nb.priority = 192; register_restart_handler(&sbi_srst_reboot_nb); } - if ((sbi_spec_version >= sbi_mk_version(2, 0)) && - (sbi_probe_extension(SBI_EXT_DBCN) > 0)) { + if (sbi_spec_version >= sbi_mk_version(2, 0) && + sbi_probe_extension(SBI_EXT_DBCN) > 0) { pr_info("SBI DBCN extension detected\n"); sbi_debug_console_available = true; } + if (sbi_spec_version >= sbi_mk_version(3, 0) && + sbi_probe_extension(SBI_EXT_FWFT)) { + pr_info("SBI FWFT extension detected\n"); + sbi_fwft_supported = true; + } } else { __sbi_set_timer = __sbi_set_timer_v01; __sbi_send_ipi = __sbi_send_ipi_v01; diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index f7c9a1caa83e..14888e5ea19a 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -50,6 +50,7 @@ atomic_t hart_lottery __section(".sdata") #endif ; unsigned long boot_cpu_hartid; +EXPORT_SYMBOL_GPL(boot_cpu_hartid); /* * Place kernel memory regions on the resource tree so that diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 249aec8594a9..0b170e18a2be 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -15,6 +15,7 @@ #include <asm/uaccess.h> #include <asm/unistd.h> #include <asm/vector.h> +#include <asm/vendor_extensions/sifive_hwprobe.h> #include <asm/vendor_extensions/thead_hwprobe.h> #include <vdso/vsyscall.h> @@ -96,6 +97,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, * presence in the hart_isa bitmap, are made. */ EXT_KEY(ZAAMO); + EXT_KEY(ZABHA); EXT_KEY(ZACAS); EXT_KEY(ZALRSC); EXT_KEY(ZAWRS); @@ -300,6 +302,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, pair->value = riscv_timebase; break; + case RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0: + hwprobe_isa_vendor_ext_sifive_0(pair, cpus); + break; + case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0: hwprobe_isa_vendor_ext_thead_0(pair, cpus); break; diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 9c83848797a7..80230de167de 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -6,6 +6,7 @@ #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/init.h> +#include <linux/irqflags.h> #include <linux/randomize_kstack.h> #include <linux/sched.h> #include <linux/sched/debug.h> @@ -151,7 +152,9 @@ asmlinkage __visible __trap_section void name(struct pt_regs *regs) \ { \ if (user_mode(regs)) { \ irqentry_enter_from_user_mode(regs); \ + local_irq_enable(); \ do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \ + local_irq_disable(); \ irqentry_exit_to_user_mode(regs); \ } else { \ irqentry_state_t state = irqentry_nmi_enter(regs); \ @@ -173,17 +176,14 @@ asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *re if (user_mode(regs)) { irqentry_enter_from_user_mode(regs); - local_irq_enable(); handled = riscv_v_first_use_handler(regs); - - local_irq_disable(); - if (!handled) do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->epc, "Oops - illegal instruction"); + local_irq_disable(); irqentry_exit_to_user_mode(regs); } else { irqentry_state_t state = irqentry_nmi_enter(regs); @@ -308,9 +308,11 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs) { if (user_mode(regs)) { irqentry_enter_from_user_mode(regs); + local_irq_enable(); handle_break(regs); + local_irq_disable(); irqentry_exit_to_user_mode(regs); } else { irqentry_state_t state = irqentry_nmi_enter(regs); diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 77c788660223..f760e4fcc052 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -16,6 +16,7 @@ #include <asm/entry-common.h> #include <asm/hwprobe.h> #include <asm/cpufeature.h> +#include <asm/sbi.h> #include <asm/vector.h> #define INSN_MATCH_LB 0x3 @@ -368,9 +369,7 @@ static int handle_scalar_misaligned_load(struct pt_regs *regs) perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); -#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED; -#endif if (!unaligned_enabled) return -1; @@ -462,7 +461,7 @@ static int handle_scalar_misaligned_load(struct pt_regs *regs) } if (!fp) - SET_RD(insn, regs, val.data_ulong << shift >> shift); + SET_RD(insn, regs, (long)(val.data_ulong << shift) >> shift); else if (len == 8) set_f64_rd(insn, regs, val.data_u64); else @@ -626,6 +625,10 @@ bool __init check_vector_unaligned_access_emulated_all_cpus(void) { int cpu; + /* + * While being documented as very slow, schedule_on_each_cpu() is used since + * kernel_vector_begin() expects irqs to be enabled or it will panic() + */ schedule_on_each_cpu(check_vector_unaligned_access_emulated); for_each_online_cpu(cpu) @@ -642,11 +645,23 @@ bool __init check_vector_unaligned_access_emulated_all_cpus(void) } #endif +static bool all_cpus_unaligned_scalar_access_emulated(void) +{ + int cpu; + + for_each_online_cpu(cpu) + if (per_cpu(misaligned_access_speed, cpu) != + RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED) + return false; + + return true; +} + #ifdef CONFIG_RISCV_SCALAR_MISALIGNED static bool unaligned_ctl __read_mostly; -void check_unaligned_access_emulated(struct work_struct *work __always_unused) +static void check_unaligned_access_emulated(void *arg __always_unused) { int cpu = smp_processor_id(); long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu); @@ -657,6 +672,13 @@ void check_unaligned_access_emulated(struct work_struct *work __always_unused) __asm__ __volatile__ ( " "REG_L" %[tmp], 1(%[ptr])\n" : [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory"); +} + +static int cpu_online_check_unaligned_access_emulated(unsigned int cpu) +{ + long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu); + + check_unaligned_access_emulated(NULL); /* * If unaligned_ctl is already set, this means that we detected that all @@ -665,26 +687,23 @@ void check_unaligned_access_emulated(struct work_struct *work __always_unused) */ if (unlikely(unaligned_ctl && (*mas_ptr != RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED))) { pr_crit("CPU misaligned accesses non homogeneous (expected all emulated)\n"); - while (true) - cpu_relax(); + return -EINVAL; } + + return 0; } bool __init check_unaligned_access_emulated_all_cpus(void) { - int cpu; - /* * We can only support PR_UNALIGN controls if all CPUs have misaligned * accesses emulated since tasks requesting such control can run on any * CPU. */ - schedule_on_each_cpu(check_unaligned_access_emulated); + on_each_cpu(check_unaligned_access_emulated, NULL, 1); - for_each_online_cpu(cpu) - if (per_cpu(misaligned_access_speed, cpu) - != RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED) - return false; + if (!all_cpus_unaligned_scalar_access_emulated()) + return false; unaligned_ctl = true; return true; @@ -699,4 +718,73 @@ bool __init check_unaligned_access_emulated_all_cpus(void) { return false; } +static int cpu_online_check_unaligned_access_emulated(unsigned int cpu) +{ + return 0; +} +#endif + +static bool misaligned_traps_delegated; + +#ifdef CONFIG_RISCV_SBI + +static int cpu_online_sbi_unaligned_setup(unsigned int cpu) +{ + if (sbi_fwft_set(SBI_FWFT_MISALIGNED_EXC_DELEG, 1, 0) && + misaligned_traps_delegated) { + pr_crit("Misaligned trap delegation non homogeneous (expected delegated)"); + return -EINVAL; + } + + return 0; +} + +void __init unaligned_access_init(void) +{ + int ret; + + ret = sbi_fwft_set_online_cpus(SBI_FWFT_MISALIGNED_EXC_DELEG, 1, 0); + if (ret) + return; + + misaligned_traps_delegated = true; + pr_info("SBI misaligned access exception delegation ok\n"); + /* + * Note that we don't have to take any specific action here, if + * the delegation is successful, then + * check_unaligned_access_emulated() will verify that indeed the + * platform traps on misaligned accesses. + */ +} +#else +void __init unaligned_access_init(void) {} + +static int cpu_online_sbi_unaligned_setup(unsigned int cpu __always_unused) +{ + return 0; +} + #endif + +int cpu_online_unaligned_access_init(unsigned int cpu) +{ + int ret; + + ret = cpu_online_sbi_unaligned_setup(cpu); + if (ret) + return ret; + + return cpu_online_check_unaligned_access_emulated(cpu); +} + +bool misaligned_traps_can_delegate(void) +{ + /* + * Either we successfully requested misaligned traps delegation for all + * CPUs, or the SBI does not implement the FWFT extension but delegated + * the exception by default. + */ + return misaligned_traps_delegated || + all_cpus_unaligned_scalar_access_emulated(); +} +EXPORT_SYMBOL_GPL(misaligned_traps_can_delegate); diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index b8ba13819d05..ae2068425fbc 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -236,6 +236,11 @@ arch_initcall_sync(lock_and_set_unaligned_access_static_branch); static int riscv_online_cpu(unsigned int cpu) { + int ret = cpu_online_unaligned_access_init(cpu); + + if (ret) + return ret; + /* We are already set since the last check */ if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) { goto exit; @@ -248,7 +253,6 @@ static int riscv_online_cpu(unsigned int cpu) { static struct page *buf; - check_unaligned_access_emulated(NULL); buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); if (!buf) { pr_warn("Allocation failure, not measuring misaligned performance\n"); @@ -439,6 +443,8 @@ static int __init check_unaligned_access_all_cpus(void) { int cpu; + unaligned_access_init(); + if (unaligned_scalar_speed_param != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) { pr_info("scalar unaligned access speed set to '%s' (%lu) by command line\n", speed_str[unaligned_scalar_speed_param], unaligned_scalar_speed_param); diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index cc2895d1fbc2..3a8e038b10a2 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -136,7 +136,7 @@ static int __setup_additional_pages(struct mm_struct *mm, ret = _install_special_mapping(mm, vdso_base, vdso_text_len, - (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC), + (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | VM_SEALED_SYSMAP), vdso_info->cm); if (IS_ERR(ret)) diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index ad73607abc28..9ebb5e590f93 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -13,9 +13,17 @@ vdso-syms += flush_icache vdso-syms += hwprobe vdso-syms += sys_hwprobe +ifdef CONFIG_VDSO_GETRANDOM +vdso-syms += getrandom +endif + # Files to link into the vdso obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o +ifdef CONFIG_VDSO_GETRANDOM +obj-vdso += vgetrandom-chacha.o +endif + ccflags-y := -fno-stack-protector ccflags-y += -DDISABLE_BRANCH_PROFILING ccflags-y += -fno-builtin @@ -24,6 +32,10 @@ ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) endif +ifneq ($(c-getrandom-y),) + CFLAGS_getrandom.o += -fPIC -include $(c-getrandom-y) +endif + CFLAGS_hwprobe.o += -fPIC # Build rules @@ -38,6 +50,7 @@ endif # Disable -pg to prevent insert call site CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) +CFLAGS_REMOVE_getrandom.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) # Force dependency @@ -47,7 +60,7 @@ $(obj)/vdso.o: $(obj)/vdso.so $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE $(call if_changed,vdsold_and_check) LDFLAGS_vdso.so.dbg = -shared -soname=linux-vdso.so.1 \ - --build-id=sha1 --hash-style=both --eh-frame-hdr + --build-id=sha1 --eh-frame-hdr # strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S diff --git a/arch/riscv/kernel/vdso/getrandom.c b/arch/riscv/kernel/vdso/getrandom.c new file mode 100644 index 000000000000..f21922e8cebd --- /dev/null +++ b/arch/riscv/kernel/vdso/getrandom.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved. + */ +#include <linux/types.h> + +ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) +{ + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); +} diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 8e86965a8aae..c29ef12a63bb 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -30,7 +30,7 @@ SECTIONS *(.data .data.* .gnu.linkonce.d.*) *(.dynbss) *(.bss .bss.* .gnu.linkonce.b.*) - } + } :text .note : { *(.note.*) } :text :note @@ -80,6 +80,9 @@ VERSION #ifndef COMPAT_VDSO __vdso_riscv_hwprobe; #endif +#if defined(CONFIG_VDSO_GETRANDOM) && !defined(COMPAT_VDSO) + __vdso_getrandom; +#endif local: *; }; } diff --git a/arch/riscv/kernel/vdso/vgetrandom-chacha.S b/arch/riscv/kernel/vdso/vgetrandom-chacha.S new file mode 100644 index 000000000000..5f0dad8f2373 --- /dev/null +++ b/arch/riscv/kernel/vdso/vgetrandom-chacha.S @@ -0,0 +1,249 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved. + * + * Based on arch/loongarch/vdso/vgetrandom-chacha.S. + */ + +#include <asm/asm.h> +#include <linux/linkage.h> + +.text + +.macro ROTRI rd rs imm + slliw t0, \rs, 32 - \imm + srliw \rd, \rs, \imm + or \rd, \rd, t0 +.endm + +.macro OP_4REG op d0 d1 d2 d3 s0 s1 s2 s3 + \op \d0, \d0, \s0 + \op \d1, \d1, \s1 + \op \d2, \d2, \s2 + \op \d3, \d3, \s3 +.endm + +/* + * a0: output bytes + * a1: 32-byte key input + * a2: 8-byte counter input/output + * a3: number of 64-byte blocks to write to output + */ +SYM_FUNC_START(__arch_chacha20_blocks_nostack) + +#define output a0 +#define key a1 +#define counter a2 +#define nblocks a3 +#define i a4 +#define state0 s0 +#define state1 s1 +#define state2 s2 +#define state3 s3 +#define state4 s4 +#define state5 s5 +#define state6 s6 +#define state7 s7 +#define state8 s8 +#define state9 s9 +#define state10 s10 +#define state11 s11 +#define state12 a5 +#define state13 a6 +#define state14 a7 +#define state15 t1 +#define cnt t2 +#define copy0 t3 +#define copy1 t4 +#define copy2 t5 +#define copy3 t6 + +/* Packs to be used with OP_4REG */ +#define line0 state0, state1, state2, state3 +#define line1 state4, state5, state6, state7 +#define line2 state8, state9, state10, state11 +#define line3 state12, state13, state14, state15 + +#define line1_perm state5, state6, state7, state4 +#define line2_perm state10, state11, state8, state9 +#define line3_perm state15, state12, state13, state14 + +#define copy copy0, copy1, copy2, copy3 + +#define _16 16, 16, 16, 16 +#define _20 20, 20, 20, 20 +#define _24 24, 24, 24, 24 +#define _25 25, 25, 25, 25 + + /* + * The ABI requires s0-s9 saved. + * This does not violate the stack-less requirement: no sensitive data + * is spilled onto the stack. + */ + addi sp, sp, -12*SZREG + REG_S s0, (sp) + REG_S s1, SZREG(sp) + REG_S s2, 2*SZREG(sp) + REG_S s3, 3*SZREG(sp) + REG_S s4, 4*SZREG(sp) + REG_S s5, 5*SZREG(sp) + REG_S s6, 6*SZREG(sp) + REG_S s7, 7*SZREG(sp) + REG_S s8, 8*SZREG(sp) + REG_S s9, 9*SZREG(sp) + REG_S s10, 10*SZREG(sp) + REG_S s11, 11*SZREG(sp) + + ld cnt, (counter) + + li copy0, 0x61707865 + li copy1, 0x3320646e + li copy2, 0x79622d32 + li copy3, 0x6b206574 + +.Lblock: + /* state[0,1,2,3] = "expand 32-byte k" */ + mv state0, copy0 + mv state1, copy1 + mv state2, copy2 + mv state3, copy3 + + /* state[4,5,..,11] = key */ + lw state4, (key) + lw state5, 4(key) + lw state6, 8(key) + lw state7, 12(key) + lw state8, 16(key) + lw state9, 20(key) + lw state10, 24(key) + lw state11, 28(key) + + /* state[12,13] = counter */ + mv state12, cnt + srli state13, cnt, 32 + + /* state[14,15] = 0 */ + mv state14, zero + mv state15, zero + + li i, 10 +.Lpermute: + /* odd round */ + OP_4REG addw line0, line1 + OP_4REG xor line3, line0 + OP_4REG ROTRI line3, _16 + + OP_4REG addw line2, line3 + OP_4REG xor line1, line2 + OP_4REG ROTRI line1, _20 + + OP_4REG addw line0, line1 + OP_4REG xor line3, line0 + OP_4REG ROTRI line3, _24 + + OP_4REG addw line2, line3 + OP_4REG xor line1, line2 + OP_4REG ROTRI line1, _25 + + /* even round */ + OP_4REG addw line0, line1_perm + OP_4REG xor line3_perm, line0 + OP_4REG ROTRI line3_perm, _16 + + OP_4REG addw line2_perm, line3_perm + OP_4REG xor line1_perm, line2_perm + OP_4REG ROTRI line1_perm, _20 + + OP_4REG addw line0, line1_perm + OP_4REG xor line3_perm, line0 + OP_4REG ROTRI line3_perm, _24 + + OP_4REG addw line2_perm, line3_perm + OP_4REG xor line1_perm, line2_perm + OP_4REG ROTRI line1_perm, _25 + + addi i, i, -1 + bnez i, .Lpermute + + /* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */ + OP_4REG addw line0, copy + sw state0, (output) + sw state1, 4(output) + sw state2, 8(output) + sw state3, 12(output) + + /* from now on state[0,1,2,3] are scratch registers */ + + /* state[0,1,2,3] = lo(key) */ + lw state0, (key) + lw state1, 4(key) + lw state2, 8(key) + lw state3, 12(key) + + /* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */ + OP_4REG addw line1, line0 + sw state4, 16(output) + sw state5, 20(output) + sw state6, 24(output) + sw state7, 28(output) + + /* state[0,1,2,3] = hi(key) */ + lw state0, 16(key) + lw state1, 20(key) + lw state2, 24(key) + lw state3, 28(key) + + /* output[8,9,10,11] = tmp[0,1,2,3] + state[8,9,10,11] */ + OP_4REG addw line2, line0 + sw state8, 32(output) + sw state9, 36(output) + sw state10, 40(output) + sw state11, 44(output) + + /* output[12,13,14,15] = state[12,13,14,15] + [cnt_lo, cnt_hi, 0, 0] */ + addw state12, state12, cnt + srli state0, cnt, 32 + addw state13, state13, state0 + sw state12, 48(output) + sw state13, 52(output) + sw state14, 56(output) + sw state15, 60(output) + + /* ++counter */ + addi cnt, cnt, 1 + + /* output += 64 */ + addi output, output, 64 + /* --nblocks */ + addi nblocks, nblocks, -1 + bnez nblocks, .Lblock + + /* counter = [cnt_lo, cnt_hi] */ + sd cnt, (counter) + + /* Zero out the potentially sensitive regs, in case nothing uses these + * again. As at now copy[0,1,2,3] just contains "expand 32-byte k" and + * state[0,...,11] are s0-s11 those we'll restore in the epilogue, we + * only need to zero state[12,...,15]. + */ + mv state12, zero + mv state13, zero + mv state14, zero + mv state15, zero + + REG_L s0, (sp) + REG_L s1, SZREG(sp) + REG_L s2, 2*SZREG(sp) + REG_L s3, 3*SZREG(sp) + REG_L s4, 4*SZREG(sp) + REG_L s5, 5*SZREG(sp) + REG_L s6, 6*SZREG(sp) + REG_L s7, 7*SZREG(sp) + REG_L s8, 8*SZREG(sp) + REG_L s9, 9*SZREG(sp) + REG_L s10, 10*SZREG(sp) + REG_L s11, 11*SZREG(sp) + addi sp, sp, 12*SZREG + + ret +SYM_FUNC_END(__arch_chacha20_blocks_nostack) diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c index 9feb7f67a0a3..92d8ff81f42c 100644 --- a/arch/riscv/kernel/vendor_extensions.c +++ b/arch/riscv/kernel/vendor_extensions.c @@ -6,6 +6,7 @@ #include <asm/vendorid_list.h> #include <asm/vendor_extensions.h> #include <asm/vendor_extensions/andes.h> +#include <asm/vendor_extensions/sifive.h> #include <asm/vendor_extensions/thead.h> #include <linux/array_size.h> @@ -15,6 +16,9 @@ struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[] = { #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES &riscv_isa_vendor_ext_list_andes, #endif +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE + &riscv_isa_vendor_ext_list_sifive, +#endif #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD &riscv_isa_vendor_ext_list_thead, #endif @@ -45,6 +49,12 @@ bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsig cpu_bmap = riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap; break; #endif + #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE + case SIFIVE_VENDOR_ID: + bmap = &riscv_isa_vendor_ext_list_sifive.all_harts_isa_bitmap; + cpu_bmap = riscv_isa_vendor_ext_list_sifive.per_hart_isa_bitmap; + break; + #endif #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD case THEAD_VENDOR_ID: bmap = &riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap; diff --git a/arch/riscv/kernel/vendor_extensions/Makefile b/arch/riscv/kernel/vendor_extensions/Makefile index 866414c81a9f..a4eca96d1c8a 100644 --- a/arch/riscv/kernel/vendor_extensions/Makefile +++ b/arch/riscv/kernel/vendor_extensions/Makefile @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_ANDES) += andes.o +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE) += sifive.o +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE) += sifive_hwprobe.o obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead.o obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead_hwprobe.o diff --git a/arch/riscv/kernel/vendor_extensions/sifive.c b/arch/riscv/kernel/vendor_extensions/sifive.c new file mode 100644 index 000000000000..8fcf67e8c07f --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/sifive.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/cpufeature.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/sifive.h> + +#include <linux/array_size.h> +#include <linux/types.h> + +/* All SiFive vendor extensions supported in Linux */ +static const struct riscv_isa_ext_data riscv_isa_vendor_ext_sifive[] = { + __RISCV_ISA_EXT_DATA(xsfvfnrclipxfqf, RISCV_ISA_VENDOR_EXT_XSFVFNRCLIPXFQF), + __RISCV_ISA_EXT_DATA(xsfvfwmaccqqq, RISCV_ISA_VENDOR_EXT_XSFVFWMACCQQQ), + __RISCV_ISA_EXT_DATA(xsfvqmaccdod, RISCV_ISA_VENDOR_EXT_XSFVQMACCDOD), + __RISCV_ISA_EXT_DATA(xsfvqmaccqoq, RISCV_ISA_VENDOR_EXT_XSFVQMACCQOQ), +}; + +struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_sifive = { + .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_sifive), + .ext_data = riscv_isa_vendor_ext_sifive, +}; diff --git a/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c b/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c new file mode 100644 index 000000000000..1f77f6309763 --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/vendor_extensions/sifive.h> +#include <asm/vendor_extensions/sifive_hwprobe.h> +#include <asm/vendor_extensions/vendor_hwprobe.h> + +#include <linux/cpumask.h> +#include <linux/types.h> + +#include <uapi/asm/hwprobe.h> +#include <uapi/asm/vendor/sifive.h> + +void hwprobe_isa_vendor_ext_sifive_0(struct riscv_hwprobe *pair, const struct cpumask *cpus) +{ + VENDOR_EXTENSION_SUPPORTED(pair, cpus, + riscv_isa_vendor_ext_list_sifive.per_hart_isa_bitmap, { + VENDOR_EXT_KEY(XSFVQMACCDOD); + VENDOR_EXT_KEY(XSFVQMACCQOQ); + VENDOR_EXT_KEY(XSFVFNRCLIPXFQF); + VENDOR_EXT_KEY(XSFVFWMACCQQQ); + }); +} diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index 19afd1f23537..dad318185660 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -30,28 +30,6 @@ unsigned int kvm_riscv_aia_nr_hgei; unsigned int kvm_riscv_aia_max_ids; DEFINE_STATIC_KEY_FALSE(kvm_riscv_aia_available); -static int aia_find_hgei(struct kvm_vcpu *owner) -{ - int i, hgei; - unsigned long flags; - struct aia_hgei_control *hgctrl = get_cpu_ptr(&aia_hgei); - - raw_spin_lock_irqsave(&hgctrl->lock, flags); - - hgei = -1; - for (i = 1; i <= kvm_riscv_aia_nr_hgei; i++) { - if (hgctrl->owners[i] == owner) { - hgei = i; - break; - } - } - - raw_spin_unlock_irqrestore(&hgctrl->lock, flags); - - put_cpu_ptr(&aia_hgei); - return hgei; -} - static inline unsigned long aia_hvictl_value(bool ext_irq_pending) { unsigned long hvictl; @@ -95,7 +73,6 @@ void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu) bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) { - int hgei; unsigned long seip; if (!kvm_riscv_aia_available()) @@ -114,11 +91,7 @@ bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) if (!kvm_riscv_aia_initialized(vcpu->kvm) || !seip) return false; - hgei = aia_find_hgei(vcpu); - if (hgei > 0) - return !!(ncsr_read(CSR_HGEIP) & BIT(hgei)); - - return false; + return kvm_riscv_vcpu_aia_imsic_has_interrupt(vcpu); } void kvm_riscv_vcpu_aia_update_hvip(struct kvm_vcpu *vcpu) @@ -164,6 +137,9 @@ void kvm_riscv_vcpu_aia_load(struct kvm_vcpu *vcpu, int cpu) csr_write(CSR_HVIPRIO2H, csr->hviprio2h); #endif } + + if (kvm_riscv_aia_initialized(vcpu->kvm)) + kvm_riscv_vcpu_aia_imsic_load(vcpu, cpu); } void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu) @@ -174,6 +150,9 @@ void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu) if (!kvm_riscv_aia_available()) return; + if (kvm_riscv_aia_initialized(vcpu->kvm)) + kvm_riscv_vcpu_aia_imsic_put(vcpu); + if (kvm_riscv_nacl_available()) { nsh = nacl_shmem(); csr->vsiselect = nacl_csr_read(nsh, CSR_VSISELECT); @@ -472,22 +451,6 @@ void kvm_riscv_aia_free_hgei(int cpu, int hgei) raw_spin_unlock_irqrestore(&hgctrl->lock, flags); } -void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable) -{ - int hgei; - - if (!kvm_riscv_aia_available()) - return; - - hgei = aia_find_hgei(owner); - if (hgei > 0) { - if (enable) - csr_set(CSR_HGEIE, BIT(hgei)); - else - csr_clear(CSR_HGEIE, BIT(hgei)); - } -} - static irqreturn_t hgei_interrupt(int irq, void *dev_id) { int i; diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index 29ef9c2133a9..2ff865943ebb 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -676,6 +676,48 @@ static void imsic_swfile_update(struct kvm_vcpu *vcpu, imsic_swfile_extirq_update(vcpu); } +bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu) +{ + struct imsic *imsic = vcpu->arch.aia_context.imsic_state; + unsigned long flags; + bool ret = false; + + /* + * The IMSIC SW-file directly injects interrupt via hvip so + * only check for interrupt when IMSIC VS-file is being used. + */ + + read_lock_irqsave(&imsic->vsfile_lock, flags); + if (imsic->vsfile_cpu > -1) + ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei)); + read_unlock_irqrestore(&imsic->vsfile_lock, flags); + + return ret; +} + +void kvm_riscv_vcpu_aia_imsic_load(struct kvm_vcpu *vcpu, int cpu) +{ + /* + * No need to explicitly clear HGEIE CSR bits because the + * hgei interrupt handler (aka hgei_interrupt()) will always + * clear it for us. + */ +} + +void kvm_riscv_vcpu_aia_imsic_put(struct kvm_vcpu *vcpu) +{ + struct imsic *imsic = vcpu->arch.aia_context.imsic_state; + unsigned long flags; + + if (!kvm_vcpu_is_blocking(vcpu)) + return; + + read_lock_irqsave(&imsic->vsfile_lock, flags); + if (imsic->vsfile_cpu > -1) + csr_set(CSR_HGEIE, BIT(imsic->vsfile_hgei)); + read_unlock_irqrestore(&imsic->vsfile_lock, flags); +} + void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu) { unsigned long flags; @@ -781,6 +823,9 @@ int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu) * producers to the new IMSIC VS-file. */ + /* Ensure HGEIE CSR bit is zero before using the new IMSIC VS-file */ + csr_clear(CSR_HGEIE, BIT(new_vsfile_hgei)); + /* Zero-out new IMSIC VS-file */ imsic_vsfile_local_clear(new_vsfile_hgei, imsic->nr_hw_eix); diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index e0a01af426ff..0462863206ca 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -207,16 +207,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return kvm_riscv_vcpu_timer_pending(vcpu); } -void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) -{ - kvm_riscv_aia_wakeon_hgei(vcpu, true); -} - -void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) -{ - kvm_riscv_aia_wakeon_hgei(vcpu, false); -} - int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) && diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c index 5fbf3f94f1e8..b17fad091bab 100644 --- a/arch/riscv/kvm/vcpu_sbi_replace.c +++ b/arch/riscv/kvm/vcpu_sbi_replace.c @@ -103,7 +103,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_FENCE_I_SENT); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: - if (cp->a2 == 0 && cp->a3 == 0) + if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL) kvm_riscv_hfence_vvma_all(vcpu->kvm, hbase, hmask); else kvm_riscv_hfence_vvma_gva(vcpu->kvm, hbase, hmask, @@ -111,7 +111,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_SENT); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: - if (cp->a2 == 0 && cp->a3 == 0) + if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL) kvm_riscv_hfence_vvma_asid_all(vcpu->kvm, hbase, hmask, cp->a4); else @@ -127,9 +127,9 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID: /* * Until nested virtualization is implemented, the - * SBI HFENCE calls should be treated as NOPs + * SBI HFENCE calls should return not supported + * hence fallthrough. */ - break; default: retdata->err_val = SBI_ERR_NOT_SUPPORTED; } diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c index ff672fa71fcc..85a7262115e1 100644 --- a/arch/riscv/kvm/vcpu_timer.c +++ b/arch/riscv/kvm/vcpu_timer.c @@ -345,8 +345,24 @@ void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu) /* * The vstimecmp CSRs are saved by kvm_riscv_vcpu_timer_sync() * upon every VM exit so no need to save here. + * + * If VS-timer expires when no VCPU running on a host CPU then + * WFI executed by such host CPU will be effective NOP resulting + * in no power savings. This is because as-per RISC-V Privileged + * specificaiton: "WFI is also required to resume execution for + * locally enabled interrupts pending at any privilege level, + * regardless of the global interrupt enable at each privilege + * level." + * + * To address the above issue, vstimecmp CSR must be set to -1UL + * over here when VCPU is scheduled-out or exits to user space. */ + csr_write(CSR_VSTIMECMP, -1UL); +#if defined(CONFIG_32BIT) + csr_write(CSR_VSTIMECMPH, -1UL); +#endif + /* timer should be enabled for the remaining operations */ if (unlikely(!t->init_done)) return; diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c index be38a93cedae..7bbdfc6d4552 100644 --- a/arch/riscv/lib/riscv_v_helpers.c +++ b/arch/riscv/lib/riscv_v_helpers.c @@ -16,8 +16,11 @@ #ifdef CONFIG_MMU size_t riscv_v_usercopy_threshold = CONFIG_RISCV_ISA_V_UCOPY_THRESHOLD; int __asm_vector_usercopy(void *dst, void *src, size_t n); +int __asm_vector_usercopy_sum_enabled(void *dst, void *src, size_t n); int fallback_scalar_usercopy(void *dst, void *src, size_t n); -asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n) +int fallback_scalar_usercopy_sum_enabled(void *dst, void *src, size_t n); +asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n, + bool enable_sum) { size_t remain, copied; @@ -26,7 +29,8 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n) goto fallback; kernel_vector_begin(); - remain = __asm_vector_usercopy(dst, src, n); + remain = enable_sum ? __asm_vector_usercopy(dst, src, n) : + __asm_vector_usercopy_sum_enabled(dst, src, n); kernel_vector_end(); if (remain) { @@ -40,6 +44,7 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n) return remain; fallback: - return fallback_scalar_usercopy(dst, src, n); + return enable_sum ? fallback_scalar_usercopy(dst, src, n) : + fallback_scalar_usercopy_sum_enabled(dst, src, n); } #endif diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index 6a9f116bb545..4efea1b3326c 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -17,14 +17,43 @@ SYM_FUNC_START(__asm_copy_to_user) ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_ZVE32X, CONFIG_RISCV_ISA_V) REG_L t0, riscv_v_usercopy_threshold bltu a2, t0, fallback_scalar_usercopy - tail enter_vector_usercopy + li a3, 1 + tail enter_vector_usercopy #endif -SYM_FUNC_START(fallback_scalar_usercopy) +SYM_FUNC_END(__asm_copy_to_user) +EXPORT_SYMBOL(__asm_copy_to_user) +SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user) +EXPORT_SYMBOL(__asm_copy_from_user) +SYM_FUNC_START(fallback_scalar_usercopy) /* Enable access to user memory */ - li t6, SR_SUM - csrs CSR_STATUS, t6 + li t6, SR_SUM + csrs CSR_STATUS, t6 + mv t6, ra + call fallback_scalar_usercopy_sum_enabled + + /* Disable access to user memory */ + mv ra, t6 + li t6, SR_SUM + csrc CSR_STATUS, t6 + ret +SYM_FUNC_END(fallback_scalar_usercopy) + +SYM_FUNC_START(__asm_copy_to_user_sum_enabled) +#ifdef CONFIG_RISCV_ISA_V + ALTERNATIVE("j fallback_scalar_usercopy_sum_enabled", "nop", 0, RISCV_ISA_EXT_ZVE32X, CONFIG_RISCV_ISA_V) + REG_L t0, riscv_v_usercopy_threshold + bltu a2, t0, fallback_scalar_usercopy_sum_enabled + li a3, 0 + tail enter_vector_usercopy +#endif +SYM_FUNC_END(__asm_copy_to_user_sum_enabled) +SYM_FUNC_ALIAS(__asm_copy_from_user_sum_enabled, __asm_copy_to_user_sum_enabled) +EXPORT_SYMBOL(__asm_copy_from_user_sum_enabled) +EXPORT_SYMBOL(__asm_copy_to_user_sum_enabled) + +SYM_FUNC_START(fallback_scalar_usercopy_sum_enabled) /* * Save the terminal address which will be used to compute the number * of bytes copied in case of a fixup exception. @@ -178,23 +207,12 @@ SYM_FUNC_START(fallback_scalar_usercopy) bltu a0, t0, 4b /* t0 - end of dst */ .Lout_copy_user: - /* Disable access to user memory */ - csrc CSR_STATUS, t6 li a0, 0 ret - - /* Exception fixup code */ 10: - /* Disable access to user memory */ - csrc CSR_STATUS, t6 sub a0, t5, a0 ret -SYM_FUNC_END(__asm_copy_to_user) -SYM_FUNC_END(fallback_scalar_usercopy) -EXPORT_SYMBOL(__asm_copy_to_user) -SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user) -EXPORT_SYMBOL(__asm_copy_from_user) - +SYM_FUNC_END(fallback_scalar_usercopy_sum_enabled) SYM_FUNC_START(__clear_user) diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S index 7c45f26de4f7..03b5560609a2 100644 --- a/arch/riscv/lib/uaccess_vector.S +++ b/arch/riscv/lib/uaccess_vector.S @@ -24,7 +24,18 @@ SYM_FUNC_START(__asm_vector_usercopy) /* Enable access to user memory */ li t6, SR_SUM csrs CSR_STATUS, t6 + mv t6, ra + call __asm_vector_usercopy_sum_enabled + + /* Disable access to user memory */ + mv ra, t6 + li t6, SR_SUM + csrc CSR_STATUS, t6 + ret +SYM_FUNC_END(__asm_vector_usercopy) + +SYM_FUNC_START(__asm_vector_usercopy_sum_enabled) loop: vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma fixup vle8.v vData, (pSrc), 10f @@ -36,8 +47,6 @@ loop: /* Exception fixup for vector load is shared with normal exit */ 10: - /* Disable access to user memory */ - csrc CSR_STATUS, t6 mv a0, iNum ret @@ -49,4 +58,4 @@ loop: csrr t2, CSR_VSTART sub iNum, iNum, t2 j 10b -SYM_FUNC_END(__asm_vector_usercopy) +SYM_FUNC_END(__asm_vector_usercopy_sum_enabled) diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index b8e96dfff19d..4ca5aafce22e 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -24,7 +24,20 @@ void flush_icache_all(void) if (num_online_cpus() < 2) return; - else if (riscv_use_sbi_for_rfence()) + + /* + * Make sure all previous writes to the D$ are ordered before making + * the IPI. The RISC-V spec states that a hart must execute a data fence + * before triggering a remote fence.i in order to make the modification + * visable for remote harts. + * + * IPIs on RISC-V are triggered by MMIO writes to either CLINT or + * S-IMSIC, so the fence ensures previous data writes "happen before" + * the MMIO. + */ + RISCV_FENCE(w, o); + + if (riscv_use_sbi_for_rfence()) sbi_remote_fence_i(NULL); else on_each_cpu(ipi_remote_fence_i, NULL, 1); @@ -101,6 +114,9 @@ EXPORT_SYMBOL_GPL(riscv_cbom_block_size); unsigned int riscv_cboz_block_size; EXPORT_SYMBOL_GPL(riscv_cboz_block_size); +unsigned int riscv_cbop_block_size; +EXPORT_SYMBOL_GPL(riscv_cbop_block_size); + static void __init cbo_get_block_size(struct device_node *node, const char *name, u32 *block_size, unsigned long *first_hartid) @@ -125,8 +141,8 @@ static void __init cbo_get_block_size(struct device_node *node, void __init riscv_init_cbo_blocksizes(void) { - unsigned long cbom_hartid, cboz_hartid; - u32 cbom_block_size = 0, cboz_block_size = 0; + unsigned long cbom_hartid, cboz_hartid, cbop_hartid; + u32 cbom_block_size = 0, cboz_block_size = 0, cbop_block_size = 0; struct device_node *node; struct acpi_table_header *rhct; acpi_status status; @@ -138,13 +154,15 @@ void __init riscv_init_cbo_blocksizes(void) &cbom_block_size, &cbom_hartid); cbo_get_block_size(node, "riscv,cboz-block-size", &cboz_block_size, &cboz_hartid); + cbo_get_block_size(node, "riscv,cbop-block-size", + &cbop_block_size, &cbop_hartid); } } else { status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct); if (ACPI_FAILURE(status)) return; - acpi_get_cbo_block_size(rhct, &cbom_block_size, &cboz_block_size, NULL); + acpi_get_cbo_block_size(rhct, &cbom_block_size, &cboz_block_size, &cbop_block_size); acpi_put_table((struct acpi_table_header *)rhct); } @@ -153,6 +171,9 @@ void __init riscv_init_cbo_blocksizes(void) if (cboz_block_size) riscv_cboz_block_size = cboz_block_size; + + if (cbop_block_size) + riscv_cbop_block_size = cbop_block_size; } #ifdef CONFIG_SMP diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c index 4ae67324f992..8b6c0a112a8d 100644 --- a/arch/riscv/mm/pgtable.c +++ b/arch/riscv/mm/pgtable.c @@ -154,4 +154,14 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, flush_tlb_mm(vma->vm_mm); return pmd; } + +pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address, + pud_t *pudp) +{ + VM_WARN_ON_ONCE(!pud_present(*pudp)); + pud_t old = pudp_establish(vma, address, pudp, pud_mkinvalid(*pudp)); + + flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE); + return old; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index f9e27ba1df99..e737ba7949b1 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -7,6 +7,27 @@ #include <linux/mmu_notifier.h> #include <asm/sbi.h> #include <asm/mmu_context.h> +#include <asm/cpufeature.h> + +#define has_svinval() riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL) + +static inline void local_sfence_inval_ir(void) +{ + asm volatile(SFENCE_INVAL_IR() ::: "memory"); +} + +static inline void local_sfence_w_inval(void) +{ + asm volatile(SFENCE_W_INVAL() ::: "memory"); +} + +static inline void local_sinval_vma(unsigned long vma, unsigned long asid) +{ + if (asid != FLUSH_TLB_NO_ASID) + asm volatile(SINVAL_VMA(%0, %1) : : "r" (vma), "r" (asid) : "memory"); + else + asm volatile(SINVAL_VMA(%0, zero) : : "r" (vma) : "memory"); +} /* * Flush entire TLB if number of entries to be flushed is greater @@ -27,6 +48,16 @@ static void local_flush_tlb_range_threshold_asid(unsigned long start, return; } + if (has_svinval()) { + local_sfence_w_inval(); + for (i = 0; i < nr_ptes_in_range; ++i) { + local_sinval_vma(start, asid); + start += stride; + } + local_sfence_inval_ir(); + return; + } + for (i = 0; i < nr_ptes_in_range; ++i) { local_flush_tlb_page_asid(start, asid); start += stride; @@ -182,6 +213,13 @@ void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, __flush_tlb_range(vma->vm_mm, mm_cpumask(vma->vm_mm), start, end - start, PMD_SIZE); } + +void flush_pud_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + __flush_tlb_range(vma->vm_mm, mm_cpumask(vma->vm_mm), + start, end - start, PUD_SIZE); +} #endif bool arch_tlbbatch_should_defer(struct mm_struct *mm) diff --git a/arch/riscv/tools/relocs_check.sh b/arch/riscv/tools/relocs_check.sh index baeb2e7b2290..742993e6a8cb 100755 --- a/arch/riscv/tools/relocs_check.sh +++ b/arch/riscv/tools/relocs_check.sh @@ -14,7 +14,9 @@ bad_relocs=$( ${srctree}/scripts/relocs_check.sh "$@" | # These relocations are okay # R_RISCV_RELATIVE - grep -F -w -v 'R_RISCV_RELATIVE' + # R_RISCV_NONE + grep -F -w -v 'R_RISCV_RELATIVE +R_RISCV_NONE' ) if [ -z "$bad_relocs" ]; then diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index d229cbd2ba22..9b0d55be1239 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -38,6 +38,7 @@ static int s390_sha1_init(struct shash_desc *desc) sctx->state[4] = SHA1_H4; sctx->count = 0; sctx->func = CPACF_KIMD_SHA_1; + sctx->first_message_part = 0; return 0; } @@ -60,6 +61,7 @@ static int s390_sha1_import(struct shash_desc *desc, const void *in) sctx->count = ictx->count; memcpy(sctx->state, ictx->state, sizeof(ictx->state)); sctx->func = CPACF_KIMD_SHA_1; + sctx->first_message_part = 0; return 0; } diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 33711a29618c..6cbbf5e8555f 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -32,6 +32,7 @@ static int sha512_init(struct shash_desc *desc) ctx->count = 0; ctx->sha512.count_hi = 0; ctx->func = CPACF_KIMD_SHA_512; + ctx->first_message_part = 0; return 0; } @@ -57,6 +58,7 @@ static int sha512_import(struct shash_desc *desc, const void *in) memcpy(sctx->state, ictx->state, sizeof(ictx->state)); sctx->func = CPACF_KIMD_SHA_512; + sctx->first_message_part = 0; return 0; } @@ -97,6 +99,7 @@ static int sha384_init(struct shash_desc *desc) ctx->count = 0; ctx->sha512.count_hi = 0; ctx->func = CPACF_KIMD_SHA_512; + ctx->first_message_part = 0; return 0; } diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 1c661ac62ce8..6d8bc27a366e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -915,7 +915,7 @@ static inline int pmd_protnone(pmd_t pmd) } #endif -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 62c0ab4a4b9d..0905fa99a31e 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -265,7 +265,7 @@ static __always_inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *r addr = kernel_stack_pointer(regs) + n * sizeof(long); if (!regs_within_kernel_stack(regs, addr)) return 0; - return READ_ONCE_NOCHECK(addr); + return READ_ONCE_NOCHECK(*(unsigned long *)addr); } /** diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index db5f3a3faefb..ea5ed6654050 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -46,7 +46,7 @@ obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o obj-y += diag/ -extra-y += vmlinux.lds +always-$(KBUILD_BUILTIN) += vmlinux.lds obj-$(CONFIG_SYSFS) += nospec-sysfs.o CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index e23670e1949c..21c2e61fece4 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -319,7 +319,7 @@ enum prot_type { PROT_TYPE_DAT = 3, PROT_TYPE_IEP = 4, /* Dummy value for passing an initialized value when code != PGM_PROTECTION */ - PROT_NONE, + PROT_TYPE_DUMMY, }; static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar, @@ -335,7 +335,7 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, switch (code) { case PGM_PROTECTION: switch (prot) { - case PROT_NONE: + case PROT_TYPE_DUMMY: /* We should never get here, acts like termination */ WARN_ON_ONCE(1); break; @@ -805,7 +805,7 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, gpa = kvm_s390_real_to_abs(vcpu, ga); if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) { rc = PGM_ADDRESSING; - prot = PROT_NONE; + prot = PROT_TYPE_DUMMY; } } if (rc) @@ -963,7 +963,7 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, if (rc == PGM_PROTECTION) prot = PROT_TYPE_KEYC; else - prot = PROT_NONE; + prot = PROT_TYPE_DUMMY; rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate); } out_unlock: diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 3829521450dd..e1ad05bfd28a 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -441,6 +441,8 @@ void do_secure_storage_access(struct pt_regs *regs) if (rc) BUG(); } else { + if (faulthandler_disabled()) + return handle_fault_error_nolock(regs, 0); mm = current->mm; mmap_read_lock(mm); vma = find_vma(mm, addr); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index c7f8313ba449..0c9a35782c83 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -566,7 +566,15 @@ static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target) { memcpy(plt, &bpf_plt, sizeof(*plt)); plt->ret = ret; - plt->target = target; + /* + * (target == NULL) implies that the branch to this PLT entry was + * patched and became a no-op. However, some CPU could have jumped + * to this PLT entry before patching and may be still executing it. + * + * Since the intention in this case is to make the PLT entry a no-op, + * make the target point to the return label instead of NULL. + */ + plt->target = target ?: ret; } /* diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 2fbee3887d13..d930416d4c90 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -54,6 +54,7 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res) case PCI_ERS_RESULT_CAN_RECOVER: case PCI_ERS_RESULT_RECOVERED: case PCI_ERS_RESULT_NEED_RESET: + case PCI_ERS_RESULT_NONE: return false; default: return true; @@ -78,10 +79,6 @@ static bool is_driver_supported(struct pci_driver *driver) return false; if (!driver->err_handler->error_detected) return false; - if (!driver->err_handler->slot_reset) - return false; - if (!driver->err_handler->resume) - return false; return true; } @@ -106,6 +103,10 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, struct zpci_dev *zdev = to_zpci(pdev); int rc; + /* The underlying device may have been disabled by the event */ + if (!zdev_enabled(zdev)) + return PCI_ERS_RESULT_NEED_RESET; + pr_info("%s: Unblocking device access for examination\n", pci_name(pdev)); rc = zpci_reset_load_store_blocked(zdev); if (rc) { @@ -114,16 +115,18 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, return PCI_ERS_RESULT_NEED_RESET; } - if (driver->err_handler->mmio_enabled) { + if (driver->err_handler->mmio_enabled) ers_res = driver->err_handler->mmio_enabled(pdev); - if (ers_result_indicates_abort(ers_res)) { - pr_info("%s: Automatic recovery failed after MMIO re-enable\n", - pci_name(pdev)); - return ers_res; - } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { - pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); - return ers_res; - } + else + ers_res = PCI_ERS_RESULT_NONE; + + if (ers_result_indicates_abort(ers_res)) { + pr_info("%s: Automatic recovery failed after MMIO re-enable\n", + pci_name(pdev)); + return ers_res; + } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { + pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); + return ers_res; } pr_debug("%s: Unblocking DMA\n", pci_name(pdev)); @@ -150,7 +153,12 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev, return ers_res; } pdev->error_state = pci_channel_io_normal; - ers_res = driver->err_handler->slot_reset(pdev); + + if (driver->err_handler->slot_reset) + ers_res = driver->err_handler->slot_reset(pdev); + else + ers_res = PCI_ERS_RESULT_NONE; + if (ers_result_indicates_abort(ers_res)) { pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev)); return ers_res; @@ -214,7 +222,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) goto out_unlock; } - if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) { + if (ers_res != PCI_ERS_RESULT_NEED_RESET) { ers_res = zpci_event_do_error_state_clear(pdev, driver); if (ers_result_indicates_abort(ers_res)) { status_str = "failed (abort on MMIO enable)"; @@ -225,6 +233,16 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) if (ers_res == PCI_ERS_RESULT_NEED_RESET) ers_res = zpci_event_do_reset(pdev, driver); + /* + * ers_res can be PCI_ERS_RESULT_NONE either because the driver + * decided to return it, indicating that it abstains from voting + * on how to recover, or because it didn't implement the callback. + * Both cases assume, that if there is nothing else causing a + * disconnect, we recovered successfully. + */ + if (ers_res == PCI_ERS_RESULT_NONE) + ers_res = PCI_ERS_RESULT_RECOVERED; + if (ers_res != PCI_ERS_RESULT_RECOVERED) { pr_err("%s: Automatic recovery failed; operator intervention is required\n", pci_name(pdev)); @@ -273,6 +291,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); struct pci_dev *pdev = NULL; pci_ers_result_t ers_res; + u32 fh = 0; + int rc; zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n", ccdf->fid, ccdf->fh, ccdf->pec); @@ -281,6 +301,15 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) if (zdev) { mutex_lock(&zdev->state_lock); + rc = clp_refresh_fh(zdev->fid, &fh); + if (rc) + goto no_pdev; + if (!fh || ccdf->fh != fh) { + /* Ignore events with stale handles */ + zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n", + ccdf->fid, fh, ccdf->fh); + goto no_pdev; + } zpci_update_fh(zdev, ccdf->fh); if (zdev->zbus->bus) pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index 6f13557eecd6..a641e26f8fdf 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -825,6 +825,7 @@ static struct spi_board_info spi_bus[] = { /* MSIOF0 */ static struct sh_msiof_spi_info msiof0_data = { .num_chipselect = 1, + .mode = MSIOF_SPI_HOST, }; static struct resource msiof0_resources[] = { diff --git a/arch/sh/drivers/heartbeat.c b/arch/sh/drivers/heartbeat.c index 24391b444b28..42103038a7d0 100644 --- a/arch/sh/drivers/heartbeat.c +++ b/arch/sh/drivers/heartbeat.c @@ -58,7 +58,7 @@ static inline void heartbeat_toggle_bit(struct heartbeat_data *hd, static void heartbeat_timer(struct timer_list *t) { - struct heartbeat_data *hd = from_timer(hd, t, timer); + struct heartbeat_data *hd = timer_container_of(hd, t, timer); static unsigned bit = 0, up = 1; heartbeat_toggle_bit(hd, bit, hd->flags & HEARTBEAT_INVERTED); diff --git a/arch/sh/drivers/pci/common.c b/arch/sh/drivers/pci/common.c index 5442475d132e..9633b6147a05 100644 --- a/arch/sh/drivers/pci/common.c +++ b/arch/sh/drivers/pci/common.c @@ -88,7 +88,7 @@ int __init pci_is_66mhz_capable(struct pci_channel *hose, static void pcibios_enable_err(struct timer_list *t) { - struct pci_channel *hose = from_timer(hose, t, err_timer); + struct pci_channel *hose = timer_container_of(hose, t, err_timer); timer_delete(&hose->err_timer); printk(KERN_DEBUG "PCI: re-enabling error IRQ.\n"); @@ -97,7 +97,7 @@ static void pcibios_enable_err(struct timer_list *t) static void pcibios_enable_serr(struct timer_list *t) { - struct pci_channel *hose = from_timer(hose, t, serr_timer); + struct pci_channel *hose = timer_container_of(hose, t, serr_timer); timer_delete(&hose->serr_timer); printk(KERN_DEBUG "PCI: re-enabling system error IRQ.\n"); diff --git a/arch/sh/drivers/push-switch.c b/arch/sh/drivers/push-switch.c index 2b51ad9d5586..443cc6fd26a8 100644 --- a/arch/sh/drivers/push-switch.c +++ b/arch/sh/drivers/push-switch.c @@ -25,7 +25,7 @@ static DEVICE_ATTR_RO(switch); static void switch_timer(struct timer_list *t) { - struct push_switch *psw = from_timer(psw, t, debounce); + struct push_switch *psw = timer_container_of(psw, t, debounce); schedule_work(&psw->work); } diff --git a/arch/sh/include/asm/cache.h b/arch/sh/include/asm/cache.h index b38dbc975581..e7ac9c950275 100644 --- a/arch/sh/include/asm/cache.h +++ b/arch/sh/include/asm/cache.h @@ -22,7 +22,7 @@ #define __read_mostly __section(".data..read_mostly") -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct cache_info { unsigned int ways; /* Number of cache ways */ unsigned int sets; /* Number of cache sets */ @@ -48,5 +48,5 @@ struct cache_info { unsigned long flags; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_SH_CACHE_H */ diff --git a/arch/sh/include/asm/dwarf.h b/arch/sh/include/asm/dwarf.h index 571954474122..f46d18b84833 100644 --- a/arch/sh/include/asm/dwarf.h +++ b/arch/sh/include/asm/dwarf.h @@ -189,7 +189,7 @@ */ #define DWARF_ARCH_RA_REG 17 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/compiler.h> #include <linux/bug.h> @@ -379,7 +379,7 @@ extern int module_dwarf_finalize(const Elf_Ehdr *, const Elf_Shdr *, struct module *); extern void module_dwarf_cleanup(struct module *); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #define CFI_STARTPROC .cfi_startproc #define CFI_ENDPROC .cfi_endproc @@ -402,7 +402,7 @@ extern void module_dwarf_cleanup(struct module *); #define CFI_REL_OFFSET CFI_IGNORE #define CFI_UNDEFINED CFI_IGNORE -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static inline void dwarf_unwinder_init(void) { } diff --git a/arch/sh/include/asm/fpu.h b/arch/sh/include/asm/fpu.h index 0379f4cce5ed..a086e38b70ee 100644 --- a/arch/sh/include/asm/fpu.h +++ b/arch/sh/include/asm/fpu.h @@ -2,7 +2,7 @@ #ifndef __ASM_SH_FPU_H #define __ASM_SH_FPU_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/ptrace.h> @@ -67,6 +67,6 @@ static inline void clear_fpu(struct task_struct *tsk, struct pt_regs *regs) void float_raise(unsigned int flags); int float_rounding_mode(void); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_SH_FPU_H */ diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h index 1c10e1066390..d35781ab716e 100644 --- a/arch/sh/include/asm/ftrace.h +++ b/arch/sh/include/asm/ftrace.h @@ -7,7 +7,7 @@ #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ #define FTRACE_SYSCALL_MAX NR_syscalls -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern void mcount(void); #define MCOUNT_ADDR ((unsigned long)(mcount)) @@ -35,10 +35,10 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_FUNCTION_TRACER */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* arch/sh/kernel/return_address.c */ extern void *return_address(unsigned int); @@ -53,6 +53,6 @@ static inline void arch_ftrace_nmi_enter(void) { } static inline void arch_ftrace_nmi_exit(void) { } #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_SH_FTRACE_H */ diff --git a/arch/sh/include/asm/mmu.h b/arch/sh/include/asm/mmu.h index 172e329fd92d..b9c9f91e6616 100644 --- a/arch/sh/include/asm/mmu.h +++ b/arch/sh/include/asm/mmu.h @@ -33,7 +33,7 @@ #define PMB_NO_ENTRY (-1) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/errno.h> #include <linux/threads.h> #include <asm/page.h> @@ -102,6 +102,6 @@ pmb_remap(phys_addr_t phys, unsigned long size, pgprot_t prot) return pmb_remap_caller(phys, size, prot, __builtin_return_address(0)); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __MMU_H */ diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h index 3990cbd9aa04..def4205491ec 100644 --- a/arch/sh/include/asm/page.h +++ b/arch/sh/include/asm/page.h @@ -30,7 +30,7 @@ #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT-PAGE_SHIFT) #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/uncached.h> extern unsigned long shm_align_mask; @@ -85,7 +85,7 @@ typedef struct page *pgtable_t; #define pte_pgprot(x) __pgprot(pte_val(x) & PTE_FLAGS_MASK) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * __MEMORY_START and SIZE are the physical addresses and size of RAM. @@ -126,10 +126,10 @@ typedef struct page *pgtable_t; #define ___va(x) ((x)+PAGE_OFFSET) #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define __pa(x) ___pa((unsigned long)x) #define __va(x) (void *)___va((unsigned long)x) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #ifdef CONFIG_UNCACHED_MAPPING #if defined(CONFIG_29BIT) diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h index 729f5c6225fb..10fa8f2bb8d1 100644 --- a/arch/sh/include/asm/pgtable.h +++ b/arch/sh/include/asm/pgtable.h @@ -17,7 +17,7 @@ #include <asm/page.h> #include <asm/mmu.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/addrspace.h> #include <asm/fixmap.h> @@ -28,7 +28,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * Effective and physical address definitions, to aid with sign diff --git a/arch/sh/include/asm/pgtable_32.h b/arch/sh/include/asm/pgtable_32.h index db2e48366e0d..5f51af18997b 100644 --- a/arch/sh/include/asm/pgtable_32.h +++ b/arch/sh/include/asm/pgtable_32.h @@ -170,7 +170,7 @@ static inline unsigned long copy_ptea_attributes(unsigned long x) (PTE_MASK | _PAGE_ACCESSED | _PAGE_CACHABLE | \ _PAGE_DIRTY | _PAGE_SPECIAL) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #if defined(CONFIG_X2TLB) /* SH-X2 TLB */ #define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_CACHABLE | \ @@ -287,9 +287,9 @@ static inline unsigned long copy_ptea_attributes(unsigned long x) __pgprot(0) #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Certain architectures need to do special things when PTEs @@ -470,7 +470,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) /* In both cases, we borrow bit 6 to store the exclusive marker in swap PTEs. */ #define _PAGE_SWP_EXCLUSIVE _PAGE_USER -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte.pte_low & _PAGE_SWP_EXCLUSIVE; } @@ -478,5 +478,5 @@ static inline int pte_swp_exclusive(pte_t pte) PTE_BIT_FUNC(low, swp_mkexclusive, |= _PAGE_SWP_EXCLUSIVE); PTE_BIT_FUNC(low, swp_clear_exclusive, &= ~_PAGE_SWP_EXCLUSIVE); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_SH_PGTABLE_32_H */ diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h index 73fba7c922f9..2a0b5713ab80 100644 --- a/arch/sh/include/asm/processor.h +++ b/arch/sh/include/asm/processor.h @@ -5,7 +5,7 @@ #include <asm/cpu-features.h> #include <asm/cache.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * CPU type and hardware bug flags. Kept separately for each CPU. * @@ -168,7 +168,7 @@ extern unsigned int instruction_size(unsigned int insn); void select_idle_routine(void); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #include <asm/processor_32.h> diff --git a/arch/sh/include/asm/smc37c93x.h b/arch/sh/include/asm/smc37c93x.h index 891f2f8f2fd0..caf4cd8dd241 100644 --- a/arch/sh/include/asm/smc37c93x.h +++ b/arch/sh/include/asm/smc37c93x.h @@ -67,7 +67,7 @@ #define UART_DLL 0x0 /* Divisor Latch (LS) */ #define UART_DLM 0x2 /* Divisor Latch (MS) */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef struct uart_reg { volatile __u16 rbr; volatile __u16 ier; @@ -78,7 +78,7 @@ typedef struct uart_reg { volatile __u16 msr; volatile __u16 scr; } uart_reg; -#endif /* ! __ASSEMBLY__ */ +#endif /* ! __ASSEMBLER__ */ /* Alias for Write Only Register */ diff --git a/arch/sh/include/asm/suspend.h b/arch/sh/include/asm/suspend.h index 47db17520261..0f991babc559 100644 --- a/arch/sh/include/asm/suspend.h +++ b/arch/sh/include/asm/suspend.h @@ -2,7 +2,7 @@ #ifndef _ASM_SH_SUSPEND_H #define _ASM_SH_SUSPEND_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/notifier.h> #include <asm/ptrace.h> diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h index 9f19a682d315..471db5173036 100644 --- a/arch/sh/include/asm/thread_info.h +++ b/arch/sh/include/asm/thread_info.h @@ -21,7 +21,7 @@ #define FAULT_CODE_PROT (1 << 3) /* protection fault */ #define FAULT_CODE_USER (1 << 4) /* user-mode access */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/processor.h> struct thread_info { @@ -49,7 +49,7 @@ struct thread_info { /* * macros/functions for gaining access to the thread information structure */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define INIT_THREAD_INFO(tsk) \ { \ .task = &tsk, \ @@ -86,7 +86,7 @@ static inline struct thread_info *current_thread_info(void) extern void init_thread_xstate(void); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* * Thread information flags @@ -144,7 +144,7 @@ extern void init_thread_xstate(void); */ #define TS_USEDFPU 0x0002 /* FPU used by this task this quantum */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define TI_FLAG_FAULT_CODE_SHIFT 24 @@ -164,5 +164,5 @@ static inline unsigned int get_thread_fault_code(void) return ti->flags >> TI_FLAG_FAULT_CODE_SHIFT; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_SH_THREAD_INFO_H */ diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index ddf324bfb9a0..39df40d0ebc2 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h @@ -2,7 +2,7 @@ #ifndef __ASM_SH_TLB_H #define __ASM_SH_TLB_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/pagemap.h> #include <asm-generic/tlb.h> @@ -29,5 +29,5 @@ asmlinkage int handle_tlbmiss(struct pt_regs *regs, unsigned long error_code, unsigned long address); #endif /* CONFIG_MMU */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_SH_TLB_H */ diff --git a/arch/sh/include/asm/types.h b/arch/sh/include/asm/types.h index 9b3fc923ee28..fec3e89df0b1 100644 --- a/arch/sh/include/asm/types.h +++ b/arch/sh/include/asm/types.h @@ -7,10 +7,10 @@ /* * These aren't exported outside the kernel to avoid name space clashes */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef u16 insn_size_t; typedef u32 reg_size_t; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_SH_TYPES_H */ diff --git a/arch/sh/include/mach-common/mach/romimage.h b/arch/sh/include/mach-common/mach/romimage.h index 1915714263aa..22fb47ec9b15 100644 --- a/arch/sh/include/mach-common/mach/romimage.h +++ b/arch/sh/include/mach-common/mach/romimage.h @@ -1,12 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* do nothing here by default */ -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ static inline void mmcif_update_progress(int nr) { } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ diff --git a/arch/sh/include/mach-ecovec24/mach/romimage.h b/arch/sh/include/mach-ecovec24/mach/romimage.h index 2da6ff326cbd..f93d494736c3 100644 --- a/arch/sh/include/mach-ecovec24/mach/romimage.h +++ b/arch/sh/include/mach-ecovec24/mach/romimage.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* EcoVec board specific boot code: * converts the "partner-jet-script.txt" script into assembly @@ -22,7 +22,7 @@ 1 : .long 0xa8000000 2 : -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ /* Ecovec board specific information: * @@ -45,4 +45,4 @@ static inline void mmcif_update_progress(int nr) __raw_writeb(1 << (nr - 1), PGDR); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ diff --git a/arch/sh/include/mach-kfr2r09/mach/romimage.h b/arch/sh/include/mach-kfr2r09/mach/romimage.h index 209275872ff0..f68bb480d378 100644 --- a/arch/sh/include/mach-kfr2r09/mach/romimage.h +++ b/arch/sh/include/mach-kfr2r09/mach/romimage.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* kfr2r09 board specific boot code: * converts the "partner-jet-script.txt" script into assembly @@ -22,10 +22,10 @@ 1: .long 0xa8000000 2: -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ static inline void mmcif_update_progress(int nr) { } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile index 7b453592adaf..5ef123bc63f8 100644 --- a/arch/sh/kernel/Makefile +++ b/arch/sh/kernel/Makefile @@ -3,7 +3,7 @@ # Makefile for the Linux/SuperH kernel. # -extra-y := vmlinux.lds +always-$(KBUILD_BUILTIN) := vmlinux.lds ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c index 49c4ffd782d6..a250fb1b9420 100644 --- a/arch/sh/kernel/kprobes.c +++ b/arch/sh/kernel/kprobes.c @@ -404,13 +404,10 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) int __kprobes kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { - struct kprobe *p = NULL; struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; - kprobe_opcode_t *addr = NULL; struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - addr = (kprobe_opcode_t *) (args->regs->pc); if (val == DIE_TRAP && args->trapnr == (BREAKPOINT_INSTRUCTION & 0xff)) { if (!kprobe_running()) { @@ -421,7 +418,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, ret = NOTIFY_DONE; } } else { - p = get_kprobe(addr); if ((kcb->kprobe_status == KPROBE_HIT_SS) || (kcb->kprobe_status == KPROBE_REENTER)) { if (post_kprobe_handler(args->regs)) diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 1454ebe91539..7c199c003ffe 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -348,7 +348,7 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & SRMMU_SWP_EXCLUSIVE; } diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 4af03e3c161b..669cd02469a1 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -1023,7 +1023,7 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 3453f330e363..36f2727e1445 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -9,7 +9,7 @@ asflags-y := -ansi # Undefine sparc when processing vmlinux.lds - it is used # And teach CPP we are doing $(BITS) builds (for this case) CPPFLAGS_vmlinux.lds := -Usparc -m$(BITS) -extra-y += vmlinux.lds +always-$(KBUILD_BUILTIN) += vmlinux.lds ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c index e27afd233bf5..8fb2e7ca5015 100644 --- a/arch/sparc/kernel/viohs.c +++ b/arch/sparc/kernel/viohs.c @@ -804,7 +804,7 @@ EXPORT_SYMBOL(vio_port_up); static void vio_port_timer(struct timer_list *t) { - struct vio_driver_state *vio = from_timer(vio, t, timer); + struct vio_driver_state *vio = timer_container_of(vio, t, timer); vio_port_up(vio); } diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 79509c7f39de..f08e8a7fac93 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -52,13 +52,7 @@ config NO_IOMEM config UML_IOMEM_EMULATION bool select INDIRECT_IOMEM - select HAS_IOPORT select GENERIC_PCI_IOMAP - select GENERIC_IOMAP - select NO_GENERIC_PCI_IOPORT_MAP - -config NO_IOPORT_MAP - def_bool !UML_IOMEM_EMULATION config ISA bool diff --git a/arch/um/configs/i386_defconfig b/arch/um/configs/i386_defconfig index 1ffa088739f4..29d9666eceae 100644 --- a/arch/um/configs/i386_defconfig +++ b/arch/um/configs/i386_defconfig @@ -52,13 +52,6 @@ CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y # CONFIG_IPV6 is not set -CONFIG_UML_NET=y -CONFIG_UML_NET_ETHERTAP=y -CONFIG_UML_NET_TUNTAP=y -CONFIG_UML_NET_SLIP=y -CONFIG_UML_NET_DAEMON=y -CONFIG_UML_NET_MCAST=y -CONFIG_UML_NET_SLIRP=y CONFIG_EXT4_FS=y CONFIG_QUOTA=y CONFIG_AUTOFS_FS=m diff --git a/arch/um/configs/x86_64_defconfig b/arch/um/configs/x86_64_defconfig index 03b10d3f6816..cf309c5406a2 100644 --- a/arch/um/configs/x86_64_defconfig +++ b/arch/um/configs/x86_64_defconfig @@ -51,13 +51,6 @@ CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y # CONFIG_IPV6 is not set -CONFIG_UML_NET=y -CONFIG_UML_NET_ETHERTAP=y -CONFIG_UML_NET_TUNTAP=y -CONFIG_UML_NET_SLIP=y -CONFIG_UML_NET_DAEMON=y -CONFIG_UML_NET_MCAST=y -CONFIG_UML_NET_SLIRP=y CONFIG_EXT4_FS=y CONFIG_QUOTA=y CONFIG_AUTOFS_FS=m diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig index 9cb196070614..34085bfc6d41 100644 --- a/arch/um/drivers/Kconfig +++ b/arch/um/drivers/Kconfig @@ -124,206 +124,18 @@ endmenu menu "UML Network Devices" depends on NET -# UML virtual driver -config UML_NET - bool "Virtual network device" - help - While the User-Mode port cannot directly talk to any physical - hardware devices, this choice and the following transport options - provide one or more virtual network devices through which the UML - kernels can talk to each other, the host, and with the host's help, - machines on the outside world. - - For more information, including explanations of the networking and - sample configurations, see - <http://user-mode-linux.sourceforge.net/old/networking.html>. - - If you'd like to be able to enable networking in the User-Mode - linux environment, say Y; otherwise say N. Note that you must - enable at least one of the following transport options to actually - make use of UML networking. - -config UML_NET_ETHERTAP - bool "Ethertap transport (obsolete)" - depends on UML_NET - help - The Ethertap User-Mode Linux network transport allows a single - running UML to exchange packets with its host over one of the - host's Ethertap devices, such as /dev/tap0. Additional running - UMLs can use additional Ethertap devices, one per running UML. - While the UML believes it's on a (multi-device, broadcast) virtual - Ethernet network, it's in fact communicating over a point-to-point - link with the host. - - To use this, your host kernel must have support for Ethertap - devices. Also, if your host kernel is 2.4.x, it must have - CONFIG_NETLINK_DEV configured as Y or M. - - For more information, see - <http://user-mode-linux.sourceforge.net/old/networking.html> That site - has examples of the UML command line to use to enable Ethertap - networking. - - NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please - migrate to UML_NET_VECTOR. - - If unsure, say N. - -config UML_NET_TUNTAP - bool "TUN/TAP transport (obsolete)" - depends on UML_NET - help - The UML TUN/TAP network transport allows a UML instance to exchange - packets with the host over a TUN/TAP device. This option will only - work with a 2.4 host, unless you've applied the TUN/TAP patch to - your 2.2 host kernel. - - To use this transport, your host kernel must have support for TUN/TAP - devices, either built-in or as a module. - - NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please - migrate to UML_NET_VECTOR. - - If unsure, say N. - -config UML_NET_SLIP - bool "SLIP transport (obsolete)" - depends on UML_NET - help - The slip User-Mode Linux network transport allows a running UML to - network with its host over a point-to-point link. Unlike Ethertap, - which can carry any Ethernet frame (and hence even non-IP packets), - the slip transport can only carry IP packets. - - To use this, your host must support slip devices. - - For more information, see - <http://user-mode-linux.sourceforge.net/old/networking.html>. - has examples of the UML command line to use to enable slip - networking, and details of a few quirks with it. - - NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please - migrate to UML_NET_VECTOR. - - If unsure, say N. - -config UML_NET_DAEMON - bool "Daemon transport (obsolete)" - depends on UML_NET - help - This User-Mode Linux network transport allows one or more running - UMLs on a single host to communicate with each other, but not to - the host. - - To use this form of networking, you'll need to run the UML - networking daemon on the host. - - For more information, see - <http://user-mode-linux.sourceforge.net/old/networking.html> That site - has examples of the UML command line to use to enable Daemon - networking. - - NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please - migrate to UML_NET_VECTOR. - - If unsure, say N. - -config UML_NET_DAEMON_DEFAULT_SOCK - string "Default socket for daemon transport" - default "/tmp/uml.ctl" - depends on UML_NET_DAEMON - help - This option allows setting the default socket for the daemon - transport, normally it defaults to /tmp/uml.ctl. - config UML_NET_VECTOR bool "Vector I/O high performance network devices" - depends on UML_NET select MAY_HAVE_RUNTIME_DEPS help This User-Mode Linux network driver uses multi-message send and receive functions. The host running the UML guest must have a linux kernel version above 3.0 and a libc version > 2.13. - This driver provides tap, raw, gre and l2tpv3 network transports - with up to 4 times higher network throughput than the UML network - drivers. - -config UML_NET_VDE - bool "VDE transport (obsolete)" - depends on UML_NET - depends on !MODVERSIONS - select MAY_HAVE_RUNTIME_DEPS - help - This User-Mode Linux network transport allows one or more running - UMLs on a single host to communicate with each other and also - with the rest of the world using Virtual Distributed Ethernet, - an improved fork of uml_switch. + This driver provides tap, raw, gre and l2tpv3 network transports. - You must have libvdeplug installed in order to build the vde - transport into UML. - - To use this form of networking, you will need to run vde_switch - on the host. - - For more information, see <http://wiki.virtualsquare.org/> - That site has a good overview of what VDE is and also examples - of the UML command line to use to enable VDE networking. - - NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please - migrate to UML_NET_VECTOR. - - If unsure, say N. - -config UML_NET_MCAST - bool "Multicast transport (obsolete)" - depends on UML_NET - help - This Multicast User-Mode Linux network transport allows multiple - UMLs (even ones running on different host machines!) to talk to - each other over a virtual ethernet network. However, it requires - at least one UML with one of the other transports to act as a - bridge if any of them need to be able to talk to their hosts or any - other IP machines. - - To use this, your host kernel(s) must support IP Multicasting. - - For more information, see - <http://user-mode-linux.sourceforge.net/old/networking.html> That site - has examples of the UML command line to use to enable Multicast - networking, and notes about the security of this approach. - - NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please - migrate to UML_NET_VECTOR. - - If unsure, say N. - -config UML_NET_SLIRP - bool "SLiRP transport (obsolete)" - depends on UML_NET - help - The SLiRP User-Mode Linux network transport allows a running UML - to network by invoking a program that can handle SLIP encapsulated - packets. This is commonly (but not limited to) the application - known as SLiRP, a program that can re-socket IP packets back onto - he host on which it is run. Only IP packets are supported, - unlike other network transports that can handle all Ethernet - frames. In general, slirp allows the UML the same IP connectivity - to the outside world that the host user is permitted, and unlike - other transports, SLiRP works without the need of root level - privileges, setuid binaries, or SLIP devices on the host. This - also means not every type of connection is possible, but most - situations can be accommodated with carefully crafted slirp - commands that can be passed along as part of the network device's - setup string. The effect of this transport on the UML is similar - that of a host behind a firewall that masquerades all network - connections passing through it (but is less secure). - - NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please - migrate to UML_NET_VECTOR. - - If unsure, say N. - - Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp" + For more information, including explanations of the networking + and sample configurations, see + <file:Documentation/virt/uml/user_mode_linux_howto_v2.rst>. endmenu @@ -367,3 +179,11 @@ config UML_PCI_OVER_VIRTIO_DEVICE_ID There's no official device ID assigned (yet), set the one you wish to use for experimentation here. The default of -1 is not valid and will cause the driver to fail at probe. + +config UML_PCI_OVER_VFIO + bool "Enable VFIO-based PCI passthrough" + select UML_PCI + help + This driver provides support for VFIO-based PCI passthrough. + Currently, only MSI-X capable devices are supported, and it + is assumed that drivers will use MSI-X. diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index 0a5820343ad3..6bf8cbf71d3c 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -6,12 +6,7 @@ # pcap is broken in 2.5 because kbuild doesn't allow pcap.a to be linked # in to pcap.o -slip-objs := slip_kern.o slip_user.o -slirp-objs := slirp_kern.o slirp_user.o -daemon-objs := daemon_kern.o daemon_user.o vector-objs := vector_kern.o vector_user.o vector_transports.o -umcast-objs := umcast_kern.o umcast_user.o -net-objs := net_kern.o net_user.o mconsole-objs := mconsole_kern.o mconsole_user.o hostaudio-objs := hostaudio_kern.o ubd-objs := ubd_kern.o ubd_user.o @@ -19,13 +14,7 @@ port-objs := port_kern.o port_user.o harddog-objs := harddog_kern.o harddog-builtin-$(CONFIG_UML_WATCHDOG) := harddog_user.o harddog_user_exp.o rtc-objs := rtc_kern.o rtc_user.o - -LDFLAGS_vde.o = $(shell $(CC) $(CFLAGS) -print-file-name=libvdeplug.a) - -targets := vde_kern.o vde_user.o - -$(obj)/vde.o: $(obj)/vde_kern.o $(obj)/vde_user.o - $(LD) -r -dp -o $@ $^ $(ld_flags) +vfio_uml-objs := vfio_kern.o vfio_user.o #XXX: The call below does not work because the flags are added before the # object name, so nothing from the library gets linked. @@ -38,13 +27,7 @@ obj-y := stdio_console.o fd.o chan_kern.o chan_user.o line.o obj-$(CONFIG_SSL) += ssl.o obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o -obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o -obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o -obj-$(CONFIG_UML_NET_DAEMON) += daemon.o obj-$(CONFIG_UML_NET_VECTOR) += vector.o -obj-$(CONFIG_UML_NET_VDE) += vde.o -obj-$(CONFIG_UML_NET_MCAST) += umcast.o -obj-$(CONFIG_UML_NET) += net.o obj-$(CONFIG_MCONSOLE) += mconsole.o obj-$(CONFIG_MMAPPER) += mmapper_kern.o obj-$(CONFIG_BLK_DEV_UBD) += ubd.o @@ -62,9 +45,10 @@ obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o obj-$(CONFIG_UML_RTC) += rtc.o obj-$(CONFIG_UML_PCI) += virt-pci.o obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virtio_pcidev.o +obj-$(CONFIG_UML_PCI_OVER_VFIO) += vfio_uml.o # pcap_user.o must be added explicitly. -USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o vde_user.o vector_user.o +USER_OBJS := fd.o null.o pty.o tty.o xterm.o vector_user.o CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH) CFLAGS_xterm.o += '-DCONFIG_XTERM_CHAN_DEFAULT_EMULATOR="$(CONFIG_XTERM_CHAN_DEFAULT_EMULATOR)"' diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c index e78a99816c86..26442db7d608 100644 --- a/arch/um/drivers/chan_kern.c +++ b/arch/um/drivers/chan_kern.c @@ -212,7 +212,7 @@ int enable_chan(struct line *line) * be permanently disabled. This is discovered in IRQ context, but * the freeing of the IRQ must be done later. */ -static DEFINE_SPINLOCK(irqs_to_free_lock); +static DEFINE_RAW_SPINLOCK(irqs_to_free_lock); static LIST_HEAD(irqs_to_free); void free_irqs(void) @@ -222,9 +222,9 @@ void free_irqs(void) struct list_head *ele; unsigned long flags; - spin_lock_irqsave(&irqs_to_free_lock, flags); + raw_spin_lock_irqsave(&irqs_to_free_lock, flags); list_splice_init(&irqs_to_free, &list); - spin_unlock_irqrestore(&irqs_to_free_lock, flags); + raw_spin_unlock_irqrestore(&irqs_to_free_lock, flags); list_for_each(ele, &list) { chan = list_entry(ele, struct chan, free_list); @@ -246,9 +246,9 @@ static void close_one_chan(struct chan *chan, int delay_free_irq) return; if (delay_free_irq) { - spin_lock_irqsave(&irqs_to_free_lock, flags); + raw_spin_lock_irqsave(&irqs_to_free_lock, flags); list_add(&chan->free_list, &irqs_to_free); - spin_unlock_irqrestore(&irqs_to_free_lock, flags); + raw_spin_unlock_irqrestore(&irqs_to_free_lock, flags); } else { if (chan->input && chan->enabled) um_free_irq(chan->line->read_irq, chan); diff --git a/arch/um/drivers/daemon.h b/arch/um/drivers/daemon.h deleted file mode 100644 index 1509cc7eb907..000000000000 --- a/arch/um/drivers/daemon.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#ifndef __DAEMON_H__ -#define __DAEMON_H__ - -#include <net_user.h> - -#define SWITCH_VERSION 3 - -struct daemon_data { - char *sock_type; - char *ctl_sock; - void *ctl_addr; - void *data_addr; - void *local_addr; - int fd; - int control; - void *dev; -}; - -extern const struct net_user_info daemon_user_info; - -extern int daemon_user_write(int fd, void *buf, int len, - struct daemon_data *pri); - -#endif diff --git a/arch/um/drivers/daemon_kern.c b/arch/um/drivers/daemon_kern.c deleted file mode 100644 index afde1e82c056..000000000000 --- a/arch/um/drivers/daemon_kern.c +++ /dev/null @@ -1,95 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and - * James Leu (jleu@mindspring.net). - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Copyright (C) 2001 by various other people who didn't put their name here. - */ - -#include <linux/init.h> -#include <linux/netdevice.h> -#include <net_kern.h> -#include "daemon.h" - -struct daemon_init { - char *sock_type; - char *ctl_sock; -}; - -static void daemon_init(struct net_device *dev, void *data) -{ - struct uml_net_private *pri; - struct daemon_data *dpri; - struct daemon_init *init = data; - - pri = netdev_priv(dev); - dpri = (struct daemon_data *) pri->user; - dpri->sock_type = init->sock_type; - dpri->ctl_sock = init->ctl_sock; - dpri->fd = -1; - dpri->control = -1; - dpri->dev = dev; - /* We will free this pointer. If it contains crap we're burned. */ - dpri->ctl_addr = NULL; - dpri->data_addr = NULL; - dpri->local_addr = NULL; - - printk("daemon backend (uml_switch version %d) - %s:%s", - SWITCH_VERSION, dpri->sock_type, dpri->ctl_sock); - printk("\n"); -} - -static int daemon_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return net_recvfrom(fd, skb_mac_header(skb), - skb->dev->mtu + ETH_HEADER_OTHER); -} - -static int daemon_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return daemon_user_write(fd, skb->data, skb->len, - (struct daemon_data *) &lp->user); -} - -static const struct net_kern_info daemon_kern_info = { - .init = daemon_init, - .protocol = eth_protocol, - .read = daemon_read, - .write = daemon_write, -}; - -static int daemon_setup(char *str, char **mac_out, void *data) -{ - struct daemon_init *init = data; - char *remain; - - *init = ((struct daemon_init) - { .sock_type = "unix", - .ctl_sock = CONFIG_UML_NET_DAEMON_DEFAULT_SOCK }); - - remain = split_if_spec(str, mac_out, &init->sock_type, &init->ctl_sock, - NULL); - if (remain != NULL) - printk(KERN_WARNING "daemon_setup : Ignoring data socket " - "specification\n"); - - return 1; -} - -static struct transport daemon_transport = { - .list = LIST_HEAD_INIT(daemon_transport.list), - .name = "daemon", - .setup = daemon_setup, - .user = &daemon_user_info, - .kern = &daemon_kern_info, - .private_size = sizeof(struct daemon_data), - .setup_size = sizeof(struct daemon_init), -}; - -static int register_daemon(void) -{ - register_transport(&daemon_transport); - return 0; -} - -late_initcall(register_daemon); diff --git a/arch/um/drivers/daemon_user.c b/arch/um/drivers/daemon_user.c deleted file mode 100644 index 785baedc3555..000000000000 --- a/arch/um/drivers/daemon_user.c +++ /dev/null @@ -1,194 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and - * James Leu (jleu@mindspring.net). - * Copyright (C) 2001 by various other people who didn't put their name here. - */ - -#include <stdint.h> -#include <string.h> -#include <unistd.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <sys/time.h> -#include <sys/un.h> -#include "daemon.h" -#include <net_user.h> -#include <os.h> -#include <um_malloc.h> - -enum request_type { REQ_NEW_CONTROL }; - -#define SWITCH_MAGIC 0xfeedface - -struct request_v3 { - uint32_t magic; - uint32_t version; - enum request_type type; - struct sockaddr_un sock; -}; - -static struct sockaddr_un *new_addr(void *name, int len) -{ - struct sockaddr_un *sun; - - sun = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL); - if (sun == NULL) { - printk(UM_KERN_ERR "new_addr: allocation of sockaddr_un " - "failed\n"); - return NULL; - } - sun->sun_family = AF_UNIX; - memcpy(sun->sun_path, name, len); - return sun; -} - -static int connect_to_switch(struct daemon_data *pri) -{ - struct sockaddr_un *ctl_addr = pri->ctl_addr; - struct sockaddr_un *local_addr = pri->local_addr; - struct sockaddr_un *sun; - struct request_v3 req; - int fd, n, err; - - pri->control = socket(AF_UNIX, SOCK_STREAM, 0); - if (pri->control < 0) { - err = -errno; - printk(UM_KERN_ERR "daemon_open : control socket failed, " - "errno = %d\n", -err); - return err; - } - - if (connect(pri->control, (struct sockaddr *) ctl_addr, - sizeof(*ctl_addr)) < 0) { - err = -errno; - printk(UM_KERN_ERR "daemon_open : control connect failed, " - "errno = %d\n", -err); - goto out; - } - - fd = socket(AF_UNIX, SOCK_DGRAM, 0); - if (fd < 0) { - err = -errno; - printk(UM_KERN_ERR "daemon_open : data socket failed, " - "errno = %d\n", -err); - goto out; - } - if (bind(fd, (struct sockaddr *) local_addr, sizeof(*local_addr)) < 0) { - err = -errno; - printk(UM_KERN_ERR "daemon_open : data bind failed, " - "errno = %d\n", -err); - goto out_close; - } - - sun = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL); - if (sun == NULL) { - printk(UM_KERN_ERR "new_addr: allocation of sockaddr_un " - "failed\n"); - err = -ENOMEM; - goto out_close; - } - - req.magic = SWITCH_MAGIC; - req.version = SWITCH_VERSION; - req.type = REQ_NEW_CONTROL; - req.sock = *local_addr; - n = write(pri->control, &req, sizeof(req)); - if (n != sizeof(req)) { - printk(UM_KERN_ERR "daemon_open : control setup request " - "failed, err = %d\n", -errno); - err = -ENOTCONN; - goto out_free; - } - - n = read(pri->control, sun, sizeof(*sun)); - if (n != sizeof(*sun)) { - printk(UM_KERN_ERR "daemon_open : read of data socket failed, " - "err = %d\n", -errno); - err = -ENOTCONN; - goto out_free; - } - - pri->data_addr = sun; - return fd; - - out_free: - kfree(sun); - out_close: - close(fd); - out: - close(pri->control); - return err; -} - -static int daemon_user_init(void *data, void *dev) -{ - struct daemon_data *pri = data; - struct timeval tv; - struct { - char zero; - int pid; - int usecs; - } name; - - if (!strcmp(pri->sock_type, "unix")) - pri->ctl_addr = new_addr(pri->ctl_sock, - strlen(pri->ctl_sock) + 1); - name.zero = 0; - name.pid = os_getpid(); - gettimeofday(&tv, NULL); - name.usecs = tv.tv_usec; - pri->local_addr = new_addr(&name, sizeof(name)); - pri->dev = dev; - pri->fd = connect_to_switch(pri); - if (pri->fd < 0) { - kfree(pri->local_addr); - pri->local_addr = NULL; - return pri->fd; - } - - return 0; -} - -static int daemon_open(void *data) -{ - struct daemon_data *pri = data; - return pri->fd; -} - -static void daemon_remove(void *data) -{ - struct daemon_data *pri = data; - - close(pri->fd); - pri->fd = -1; - close(pri->control); - pri->control = -1; - - kfree(pri->data_addr); - pri->data_addr = NULL; - kfree(pri->ctl_addr); - pri->ctl_addr = NULL; - kfree(pri->local_addr); - pri->local_addr = NULL; -} - -int daemon_user_write(int fd, void *buf, int len, struct daemon_data *pri) -{ - struct sockaddr_un *data_addr = pri->data_addr; - - return net_sendto(fd, buf, len, data_addr, sizeof(*data_addr)); -} - -const struct net_user_info daemon_user_info = { - .init = daemon_user_init, - .open = daemon_open, - .close = NULL, - .remove = daemon_remove, - .add_address = NULL, - .delete_address = NULL, - .mtu = ETH_MAX_PACKET, - .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, -}; diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c deleted file mode 100644 index d5a9c5aabaec..000000000000 --- a/arch/um/drivers/net_kern.c +++ /dev/null @@ -1,889 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and - * James Leu (jleu@mindspring.net). - * Copyright (C) 2001 by various other people who didn't put their name here. - */ - -#include <linux/memblock.h> -#include <linux/etherdevice.h> -#include <linux/ethtool.h> -#include <linux/inetdevice.h> -#include <linux/init.h> -#include <linux/list.h> -#include <linux/netdevice.h> -#include <linux/platform_device.h> -#include <linux/rtnetlink.h> -#include <linux/skbuff.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <init.h> -#include <irq_kern.h> -#include <irq_user.h> -#include "mconsole_kern.h" -#include <net_kern.h> -#include <net_user.h> - -#define DRIVER_NAME "uml-netdev" - -static DEFINE_SPINLOCK(opened_lock); -static LIST_HEAD(opened); - -/* - * The drop_skb is used when we can't allocate an skb. The - * packet is read into drop_skb in order to get the data off the - * connection to the host. - * It is reallocated whenever a maximum packet size is seen which is - * larger than any seen before. update_drop_skb is called from - * eth_configure when a new interface is added. - */ -static DEFINE_SPINLOCK(drop_lock); -static struct sk_buff *drop_skb; -static int drop_max; - -static int update_drop_skb(int max) -{ - struct sk_buff *new; - unsigned long flags; - int err = 0; - - spin_lock_irqsave(&drop_lock, flags); - - if (max <= drop_max) - goto out; - - err = -ENOMEM; - new = dev_alloc_skb(max); - if (new == NULL) - goto out; - - skb_put(new, max); - - kfree_skb(drop_skb); - drop_skb = new; - drop_max = max; - err = 0; -out: - spin_unlock_irqrestore(&drop_lock, flags); - - return err; -} - -static int uml_net_rx(struct net_device *dev) -{ - struct uml_net_private *lp = netdev_priv(dev); - int pkt_len; - struct sk_buff *skb; - - /* If we can't allocate memory, try again next round. */ - skb = dev_alloc_skb(lp->max_packet); - if (skb == NULL) { - drop_skb->dev = dev; - /* Read a packet into drop_skb and don't do anything with it. */ - (*lp->read)(lp->fd, drop_skb, lp); - dev->stats.rx_dropped++; - return 0; - } - - skb->dev = dev; - skb_put(skb, lp->max_packet); - skb_reset_mac_header(skb); - pkt_len = (*lp->read)(lp->fd, skb, lp); - - if (pkt_len > 0) { - skb_trim(skb, pkt_len); - skb->protocol = (*lp->protocol)(skb); - - dev->stats.rx_bytes += skb->len; - dev->stats.rx_packets++; - netif_rx(skb); - return pkt_len; - } - - kfree_skb(skb); - return pkt_len; -} - -static void uml_dev_close(struct work_struct *work) -{ - struct uml_net_private *lp = - container_of(work, struct uml_net_private, work); - dev_close(lp->dev); -} - -static irqreturn_t uml_net_interrupt(int irq, void *dev_id) -{ - struct net_device *dev = dev_id; - struct uml_net_private *lp = netdev_priv(dev); - int err; - - if (!netif_running(dev)) - return IRQ_NONE; - - spin_lock(&lp->lock); - while ((err = uml_net_rx(dev)) > 0) ; - if (err < 0) { - printk(KERN_ERR - "Device '%s' read returned %d, shutting it down\n", - dev->name, err); - /* dev_close can't be called in interrupt context, and takes - * again lp->lock. - * And dev_close() can be safely called multiple times on the - * same device, since it tests for (dev->flags & IFF_UP). So - * there's no harm in delaying the device shutdown. - * Furthermore, the workqueue will not re-enqueue an already - * enqueued work item. */ - schedule_work(&lp->work); - goto out; - } -out: - spin_unlock(&lp->lock); - return IRQ_HANDLED; -} - -static int uml_net_open(struct net_device *dev) -{ - struct uml_net_private *lp = netdev_priv(dev); - int err; - - if (lp->fd >= 0) { - err = -ENXIO; - goto out; - } - - lp->fd = (*lp->open)(&lp->user); - if (lp->fd < 0) { - err = lp->fd; - goto out; - } - - err = um_request_irq(dev->irq, lp->fd, IRQ_READ, uml_net_interrupt, - IRQF_SHARED, dev->name, dev); - if (err < 0) { - printk(KERN_ERR "uml_net_open: failed to get irq(%d)\n", err); - err = -ENETUNREACH; - goto out_close; - } - - netif_start_queue(dev); - - /* clear buffer - it can happen that the host side of the interface - * is full when we get here. In this case, new data is never queued, - * SIGIOs never arrive, and the net never works. - */ - while ((err = uml_net_rx(dev)) > 0) ; - - spin_lock(&opened_lock); - list_add(&lp->list, &opened); - spin_unlock(&opened_lock); - - return 0; -out_close: - if (lp->close != NULL) (*lp->close)(lp->fd, &lp->user); - lp->fd = -1; -out: - return err; -} - -static int uml_net_close(struct net_device *dev) -{ - struct uml_net_private *lp = netdev_priv(dev); - - netif_stop_queue(dev); - - um_free_irq(dev->irq, dev); - if (lp->close != NULL) - (*lp->close)(lp->fd, &lp->user); - lp->fd = -1; - - spin_lock(&opened_lock); - list_del(&lp->list); - spin_unlock(&opened_lock); - - return 0; -} - -static netdev_tx_t uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct uml_net_private *lp = netdev_priv(dev); - unsigned long flags; - int len; - - netif_stop_queue(dev); - - spin_lock_irqsave(&lp->lock, flags); - - len = (*lp->write)(lp->fd, skb, lp); - skb_tx_timestamp(skb); - - if (len == skb->len) { - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; - netif_trans_update(dev); - netif_start_queue(dev); - - /* this is normally done in the interrupt when tx finishes */ - netif_wake_queue(dev); - } - else if (len == 0) { - netif_start_queue(dev); - dev->stats.tx_dropped++; - } - else { - netif_start_queue(dev); - printk(KERN_ERR "uml_net_start_xmit: failed(%d)\n", len); - } - - spin_unlock_irqrestore(&lp->lock, flags); - - dev_consume_skb_any(skb); - - return NETDEV_TX_OK; -} - -static void uml_net_set_multicast_list(struct net_device *dev) -{ - return; -} - -static void uml_net_tx_timeout(struct net_device *dev, unsigned int txqueue) -{ - netif_trans_update(dev); - netif_wake_queue(dev); -} - -#ifdef CONFIG_NET_POLL_CONTROLLER -static void uml_net_poll_controller(struct net_device *dev) -{ - disable_irq(dev->irq); - uml_net_interrupt(dev->irq, dev); - enable_irq(dev->irq); -} -#endif - -static void uml_net_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *info) -{ - strscpy(info->driver, DRIVER_NAME); -} - -static const struct ethtool_ops uml_net_ethtool_ops = { - .get_drvinfo = uml_net_get_drvinfo, - .get_link = ethtool_op_get_link, - .get_ts_info = ethtool_op_get_ts_info, -}; - -void uml_net_setup_etheraddr(struct net_device *dev, char *str) -{ - u8 addr[ETH_ALEN]; - char *end; - int i; - - if (str == NULL) - goto random; - - for (i = 0; i < 6; i++) { - addr[i] = simple_strtoul(str, &end, 16); - if ((end == str) || - ((*end != ':') && (*end != ',') && (*end != '\0'))) { - printk(KERN_ERR - "setup_etheraddr: failed to parse '%s' " - "as an ethernet address\n", str); - goto random; - } - str = end + 1; - } - if (is_multicast_ether_addr(addr)) { - printk(KERN_ERR - "Attempt to assign a multicast ethernet address to a " - "device disallowed\n"); - goto random; - } - if (!is_valid_ether_addr(addr)) { - printk(KERN_ERR - "Attempt to assign an invalid ethernet address to a " - "device disallowed\n"); - goto random; - } - if (!is_local_ether_addr(addr)) { - printk(KERN_WARNING - "Warning: Assigning a globally valid ethernet " - "address to a device\n"); - printk(KERN_WARNING "You should set the 2nd rightmost bit in " - "the first byte of the MAC,\n"); - printk(KERN_WARNING "i.e. %02x:%02x:%02x:%02x:%02x:%02x\n", - addr[0] | 0x02, addr[1], addr[2], addr[3], addr[4], - addr[5]); - } - eth_hw_addr_set(dev, addr); - return; - -random: - printk(KERN_INFO - "Choosing a random ethernet address for device %s\n", dev->name); - eth_hw_addr_random(dev); -} - -static DEFINE_SPINLOCK(devices_lock); -static LIST_HEAD(devices); - -static struct platform_driver uml_net_driver = { - .driver = { - .name = DRIVER_NAME, - }, -}; - -static void net_device_release(struct device *dev) -{ - struct uml_net *device = container_of(dev, struct uml_net, pdev.dev); - struct net_device *netdev = device->dev; - struct uml_net_private *lp = netdev_priv(netdev); - - if (lp->remove != NULL) - (*lp->remove)(&lp->user); - list_del(&device->list); - kfree(device); - free_netdev(netdev); -} - -static const struct net_device_ops uml_netdev_ops = { - .ndo_open = uml_net_open, - .ndo_stop = uml_net_close, - .ndo_start_xmit = uml_net_start_xmit, - .ndo_set_rx_mode = uml_net_set_multicast_list, - .ndo_tx_timeout = uml_net_tx_timeout, - .ndo_set_mac_address = eth_mac_addr, - .ndo_validate_addr = eth_validate_addr, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = uml_net_poll_controller, -#endif -}; - -/* - * Ensures that platform_driver_register is called only once by - * eth_configure. Will be set in an initcall. - */ -static int driver_registered; - -static void eth_configure(int n, void *init, char *mac, - struct transport *transport, gfp_t gfp_mask) -{ - struct uml_net *device; - struct net_device *dev; - struct uml_net_private *lp; - int err, size; - - size = transport->private_size + sizeof(struct uml_net_private); - - device = kzalloc(sizeof(*device), gfp_mask); - if (device == NULL) { - printk(KERN_ERR "eth_configure failed to allocate struct " - "uml_net\n"); - return; - } - - dev = alloc_etherdev(size); - if (dev == NULL) { - printk(KERN_ERR "eth_configure: failed to allocate struct " - "net_device for eth%d\n", n); - goto out_free_device; - } - - INIT_LIST_HEAD(&device->list); - device->index = n; - - /* If this name ends up conflicting with an existing registered - * netdevice, that is OK, register_netdev{,ice}() will notice this - * and fail. - */ - snprintf(dev->name, sizeof(dev->name), "eth%d", n); - - uml_net_setup_etheraddr(dev, mac); - - printk(KERN_INFO "Netdevice %d (%pM) : ", n, dev->dev_addr); - - lp = netdev_priv(dev); - /* This points to the transport private data. It's still clear, but we - * must memset it to 0 *now*. Let's help the drivers. */ - memset(lp, 0, size); - INIT_WORK(&lp->work, uml_dev_close); - - /* sysfs register */ - if (!driver_registered) { - platform_driver_register(¨_net_driver); - driver_registered = 1; - } - device->pdev.id = n; - device->pdev.name = DRIVER_NAME; - device->pdev.dev.release = net_device_release; - dev_set_drvdata(&device->pdev.dev, device); - if (platform_device_register(&device->pdev)) - goto out_free_netdev; - SET_NETDEV_DEV(dev,&device->pdev.dev); - - device->dev = dev; - - /* - * These just fill in a data structure, so there's no failure - * to be worried about. - */ - (*transport->kern->init)(dev, init); - - *lp = ((struct uml_net_private) - { .list = LIST_HEAD_INIT(lp->list), - .dev = dev, - .fd = -1, - .mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0}, - .max_packet = transport->user->max_packet, - .protocol = transport->kern->protocol, - .open = transport->user->open, - .close = transport->user->close, - .remove = transport->user->remove, - .read = transport->kern->read, - .write = transport->kern->write, - .add_address = transport->user->add_address, - .delete_address = transport->user->delete_address }); - - spin_lock_init(&lp->lock); - memcpy(lp->mac, dev->dev_addr, sizeof(lp->mac)); - - if ((transport->user->init != NULL) && - ((*transport->user->init)(&lp->user, dev) != 0)) - goto out_unregister; - - dev->mtu = transport->user->mtu; - dev->netdev_ops = ¨_netdev_ops; - dev->ethtool_ops = ¨_net_ethtool_ops; - dev->watchdog_timeo = (HZ >> 1); - dev->irq = UM_ETH_IRQ; - - err = update_drop_skb(lp->max_packet); - if (err) - goto out_undo_user_init; - - rtnl_lock(); - err = register_netdevice(dev); - rtnl_unlock(); - if (err) - goto out_undo_user_init; - - spin_lock(&devices_lock); - list_add(&device->list, &devices); - spin_unlock(&devices_lock); - - return; - -out_undo_user_init: - if (transport->user->remove != NULL) - (*transport->user->remove)(&lp->user); -out_unregister: - platform_device_unregister(&device->pdev); - return; /* platform_device_unregister frees dev and device */ -out_free_netdev: - free_netdev(dev); -out_free_device: - kfree(device); -} - -static struct uml_net *find_device(int n) -{ - struct uml_net *device; - struct list_head *ele; - - spin_lock(&devices_lock); - list_for_each(ele, &devices) { - device = list_entry(ele, struct uml_net, list); - if (device->index == n) - goto out; - } - device = NULL; - out: - spin_unlock(&devices_lock); - return device; -} - -static int eth_parse(char *str, int *index_out, char **str_out, - char **error_out) -{ - char *end; - int n, err = -EINVAL; - - n = simple_strtoul(str, &end, 0); - if (end == str) { - *error_out = "Bad device number"; - return err; - } - - str = end; - if (*str != '=') { - *error_out = "Expected '=' after device number"; - return err; - } - - str++; - if (find_device(n)) { - *error_out = "Device already configured"; - return err; - } - - *index_out = n; - *str_out = str; - return 0; -} - -struct eth_init { - struct list_head list; - char *init; - int index; -}; - -static DEFINE_SPINLOCK(transports_lock); -static LIST_HEAD(transports); - -/* Filled in during early boot */ -static LIST_HEAD(eth_cmd_line); - -static int check_transport(struct transport *transport, char *eth, int n, - void **init_out, char **mac_out, gfp_t gfp_mask) -{ - int len; - - len = strlen(transport->name); - if (strncmp(eth, transport->name, len)) - return 0; - - eth += len; - if (*eth == ',') - eth++; - else if (*eth != '\0') - return 0; - - *init_out = kmalloc(transport->setup_size, gfp_mask); - if (*init_out == NULL) - return 1; - - if (!transport->setup(eth, mac_out, *init_out)) { - kfree(*init_out); - *init_out = NULL; - } - return 1; -} - -void register_transport(struct transport *new) -{ - struct list_head *ele, *next; - struct eth_init *eth; - void *init; - char *mac = NULL; - int match; - - spin_lock(&transports_lock); - BUG_ON(!list_empty(&new->list)); - list_add(&new->list, &transports); - spin_unlock(&transports_lock); - - list_for_each_safe(ele, next, ð_cmd_line) { - eth = list_entry(ele, struct eth_init, list); - match = check_transport(new, eth->init, eth->index, &init, - &mac, GFP_KERNEL); - if (!match) - continue; - else if (init != NULL) { - eth_configure(eth->index, init, mac, new, GFP_KERNEL); - kfree(init); - } - list_del(ð->list); - } -} - -static int eth_setup_common(char *str, int index) -{ - struct list_head *ele; - struct transport *transport; - void *init; - char *mac = NULL; - int found = 0; - - spin_lock(&transports_lock); - list_for_each(ele, &transports) { - transport = list_entry(ele, struct transport, list); - if (!check_transport(transport, str, index, &init, - &mac, GFP_ATOMIC)) - continue; - if (init != NULL) { - eth_configure(index, init, mac, transport, GFP_ATOMIC); - kfree(init); - } - found = 1; - break; - } - - spin_unlock(&transports_lock); - return found; -} - -static int __init eth_setup(char *str) -{ - struct eth_init *new; - char *error; - int n, err; - - err = eth_parse(str, &n, &str, &error); - if (err) { - printk(KERN_ERR "eth_setup - Couldn't parse '%s' : %s\n", - str, error); - return 1; - } - - new = memblock_alloc_or_panic(sizeof(*new), SMP_CACHE_BYTES); - - INIT_LIST_HEAD(&new->list); - new->index = n; - new->init = str; - - list_add_tail(&new->list, ð_cmd_line); - return 1; -} - -__setup("eth", eth_setup); -__uml_help(eth_setup, -"eth[0-9]+=<transport>,<options>\n" -" Configure a network device.\n\n" -); - -static int net_config(char *str, char **error_out) -{ - int n, err; - - err = eth_parse(str, &n, &str, error_out); - if (err) - return err; - - /* This string is broken up and the pieces used by the underlying - * driver. So, it is freed only if eth_setup_common fails. - */ - str = kstrdup(str, GFP_KERNEL); - if (str == NULL) { - *error_out = "net_config failed to strdup string"; - return -ENOMEM; - } - err = !eth_setup_common(str, n); - if (err) - kfree(str); - return err; -} - -static int net_id(char **str, int *start_out, int *end_out) -{ - char *end; - int n; - - n = simple_strtoul(*str, &end, 0); - if ((*end != '\0') || (end == *str)) - return -1; - - *start_out = n; - *end_out = n; - *str = end; - return n; -} - -static int net_remove(int n, char **error_out) -{ - struct uml_net *device; - struct net_device *dev; - struct uml_net_private *lp; - - device = find_device(n); - if (device == NULL) - return -ENODEV; - - dev = device->dev; - lp = netdev_priv(dev); - if (lp->fd > 0) - return -EBUSY; - unregister_netdev(dev); - platform_device_unregister(&device->pdev); - - return 0; -} - -static struct mc_device net_mc = { - .list = LIST_HEAD_INIT(net_mc.list), - .name = "eth", - .config = net_config, - .get_config = NULL, - .id = net_id, - .remove = net_remove, -}; - -#ifdef CONFIG_INET -static int uml_inetaddr_event(struct notifier_block *this, unsigned long event, - void *ptr) -{ - struct in_ifaddr *ifa = ptr; - struct net_device *dev = ifa->ifa_dev->dev; - struct uml_net_private *lp; - void (*proc)(unsigned char *, unsigned char *, void *); - unsigned char addr_buf[4], netmask_buf[4]; - - if (dev->netdev_ops->ndo_open != uml_net_open) - return NOTIFY_DONE; - - lp = netdev_priv(dev); - - proc = NULL; - switch (event) { - case NETDEV_UP: - proc = lp->add_address; - break; - case NETDEV_DOWN: - proc = lp->delete_address; - break; - } - if (proc != NULL) { - memcpy(addr_buf, &ifa->ifa_address, sizeof(addr_buf)); - memcpy(netmask_buf, &ifa->ifa_mask, sizeof(netmask_buf)); - (*proc)(addr_buf, netmask_buf, &lp->user); - } - return NOTIFY_DONE; -} - -/* uml_net_init shouldn't be called twice on two CPUs at the same time */ -static struct notifier_block uml_inetaddr_notifier = { - .notifier_call = uml_inetaddr_event, -}; - -static void inet_register(void) -{ - struct list_head *ele; - struct uml_net_private *lp; - struct in_device *ip; - struct in_ifaddr *in; - - register_inetaddr_notifier(¨_inetaddr_notifier); - - /* Devices may have been opened already, so the uml_inetaddr_notifier - * didn't get a chance to run for them. This fakes it so that - * addresses which have already been set up get handled properly. - */ - spin_lock(&opened_lock); - list_for_each(ele, &opened) { - lp = list_entry(ele, struct uml_net_private, list); - ip = lp->dev->ip_ptr; - if (ip == NULL) - continue; - in = ip->ifa_list; - while (in != NULL) { - uml_inetaddr_event(NULL, NETDEV_UP, in); - in = in->ifa_next; - } - } - spin_unlock(&opened_lock); -} -#else -static inline void inet_register(void) -{ -} -#endif - -static int uml_net_init(void) -{ - mconsole_register_dev(&net_mc); - inet_register(); - return 0; -} - -__initcall(uml_net_init); - -static void close_devices(void) -{ - struct list_head *ele; - struct uml_net_private *lp; - - spin_lock(&opened_lock); - list_for_each(ele, &opened) { - lp = list_entry(ele, struct uml_net_private, list); - um_free_irq(lp->dev->irq, lp->dev); - if ((lp->close != NULL) && (lp->fd >= 0)) - (*lp->close)(lp->fd, &lp->user); - if (lp->remove != NULL) - (*lp->remove)(&lp->user); - } - spin_unlock(&opened_lock); -} - -__uml_exitcall(close_devices); - -void iter_addresses(void *d, void (*cb)(unsigned char *, unsigned char *, - void *), - void *arg) -{ - struct net_device *dev = d; - struct in_device *ip = dev->ip_ptr; - struct in_ifaddr *in; - unsigned char address[4], netmask[4]; - - if (ip == NULL) return; - in = ip->ifa_list; - while (in != NULL) { - memcpy(address, &in->ifa_address, sizeof(address)); - memcpy(netmask, &in->ifa_mask, sizeof(netmask)); - (*cb)(address, netmask, arg); - in = in->ifa_next; - } -} - -int dev_netmask(void *d, void *m) -{ - struct net_device *dev = d; - struct in_device *ip = dev->ip_ptr; - struct in_ifaddr *in; - __be32 *mask_out = m; - - if (ip == NULL) - return 1; - - in = ip->ifa_list; - if (in == NULL) - return 1; - - *mask_out = in->ifa_mask; - return 0; -} - -void *get_output_buffer(int *len_out) -{ - void *ret; - - ret = (void *) __get_free_pages(GFP_KERNEL, 0); - if (ret) *len_out = PAGE_SIZE; - else *len_out = 0; - return ret; -} - -void free_output_buffer(void *buffer) -{ - free_pages((unsigned long) buffer, 0); -} - -int tap_setup_common(char *str, char *type, char **dev_name, char **mac_out, - char **gate_addr) -{ - char *remain; - - remain = split_if_spec(str, dev_name, mac_out, gate_addr, NULL); - if (remain != NULL) { - printk(KERN_ERR "tap_setup_common - Extra garbage on " - "specification : '%s'\n", remain); - return 1; - } - - return 0; -} - -unsigned short eth_protocol(struct sk_buff *skb) -{ - return eth_type_trans(skb, skb->dev); -} diff --git a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c deleted file mode 100644 index 4c9576452ab0..000000000000 --- a/arch/um/drivers/net_user.c +++ /dev/null @@ -1,271 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#include <stdio.h> -#include <unistd.h> -#include <stdarg.h> -#include <errno.h> -#include <stddef.h> -#include <string.h> -#include <sys/socket.h> -#include <sys/wait.h> -#include <net_user.h> -#include <os.h> -#include <um_malloc.h> - -int tap_open_common(void *dev, char *gate_addr) -{ - int tap_addr[4]; - - if (gate_addr == NULL) - return 0; - if (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], - &tap_addr[1], &tap_addr[2], &tap_addr[3]) != 4) { - printk(UM_KERN_ERR "Invalid tap IP address - '%s'\n", - gate_addr); - return -EINVAL; - } - return 0; -} - -void tap_check_ips(char *gate_addr, unsigned char *eth_addr) -{ - int tap_addr[4]; - - if ((gate_addr != NULL) && - (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], - &tap_addr[1], &tap_addr[2], &tap_addr[3]) == 4) && - (eth_addr[0] == tap_addr[0]) && - (eth_addr[1] == tap_addr[1]) && - (eth_addr[2] == tap_addr[2]) && - (eth_addr[3] == tap_addr[3])) { - printk(UM_KERN_ERR "The tap IP address and the UML eth IP " - "address must be different\n"); - } -} - -/* Do reliable error handling as this fails frequently enough. */ -void read_output(int fd, char *output, int len) -{ - int remain, ret, expected; - char c; - char *str; - - if (output == NULL) { - output = &c; - len = sizeof(c); - } - - *output = '\0'; - ret = read(fd, &remain, sizeof(remain)); - - if (ret != sizeof(remain)) { - if (ret < 0) - ret = -errno; - expected = sizeof(remain); - str = "length"; - goto err; - } - - while (remain != 0) { - expected = (remain < len) ? remain : len; - ret = read(fd, output, expected); - if (ret != expected) { - if (ret < 0) - ret = -errno; - str = "data"; - goto err; - } - remain -= ret; - } - - return; - -err: - if (ret < 0) - printk(UM_KERN_ERR "read_output - read of %s failed, " - "errno = %d\n", str, -ret); - else - printk(UM_KERN_ERR "read_output - read of %s failed, read only " - "%d of %d bytes\n", str, ret, expected); -} - -int net_read(int fd, void *buf, int len) -{ - int n; - - n = read(fd, buf, len); - - if ((n < 0) && (errno == EAGAIN)) - return 0; - else if (n == 0) - return -ENOTCONN; - return n; -} - -int net_recvfrom(int fd, void *buf, int len) -{ - int n; - - CATCH_EINTR(n = recvfrom(fd, buf, len, 0, NULL, NULL)); - if (n < 0) { - if (errno == EAGAIN) - return 0; - return -errno; - } - else if (n == 0) - return -ENOTCONN; - return n; -} - -int net_write(int fd, void *buf, int len) -{ - int n; - - n = write(fd, buf, len); - - if ((n < 0) && (errno == EAGAIN)) - return 0; - else if (n == 0) - return -ENOTCONN; - return n; -} - -int net_send(int fd, void *buf, int len) -{ - int n; - - CATCH_EINTR(n = send(fd, buf, len, 0)); - if (n < 0) { - if (errno == EAGAIN) - return 0; - return -errno; - } - else if (n == 0) - return -ENOTCONN; - return n; -} - -int net_sendto(int fd, void *buf, int len, void *to, int sock_len) -{ - int n; - - CATCH_EINTR(n = sendto(fd, buf, len, 0, (struct sockaddr *) to, - sock_len)); - if (n < 0) { - if (errno == EAGAIN) - return 0; - return -errno; - } - else if (n == 0) - return -ENOTCONN; - return n; -} - -struct change_pre_exec_data { - int close_me; - int stdout_fd; -}; - -static void change_pre_exec(void *arg) -{ - struct change_pre_exec_data *data = arg; - - close(data->close_me); - dup2(data->stdout_fd, 1); -} - -static int change_tramp(char **argv, char *output, int output_len) -{ - int pid, fds[2], err; - struct change_pre_exec_data pe_data; - - err = os_pipe(fds, 1, 0); - if (err < 0) { - printk(UM_KERN_ERR "change_tramp - pipe failed, err = %d\n", - -err); - return err; - } - pe_data.close_me = fds[0]; - pe_data.stdout_fd = fds[1]; - pid = run_helper(change_pre_exec, &pe_data, argv); - - if (pid > 0) /* Avoid hang as we won't get data in failure case. */ - read_output(fds[0], output, output_len); - - close(fds[0]); - close(fds[1]); - - if (pid > 0) - helper_wait(pid); - return pid; -} - -static void change(char *dev, char *what, unsigned char *addr, - unsigned char *netmask) -{ - char addr_buf[sizeof("255.255.255.255\0")]; - char netmask_buf[sizeof("255.255.255.255\0")]; - char version[sizeof("nnnnn\0")]; - char *argv[] = { "uml_net", version, what, dev, addr_buf, - netmask_buf, NULL }; - char *output; - int output_len, pid; - - sprintf(version, "%d", UML_NET_VERSION); - sprintf(addr_buf, "%d.%d.%d.%d", addr[0], addr[1], addr[2], addr[3]); - sprintf(netmask_buf, "%d.%d.%d.%d", netmask[0], netmask[1], - netmask[2], netmask[3]); - - output_len = UM_KERN_PAGE_SIZE; - output = uml_kmalloc(output_len, UM_GFP_KERNEL); - if (output == NULL) - printk(UM_KERN_ERR "change : failed to allocate output " - "buffer\n"); - - pid = change_tramp(argv, output, output_len); - if (pid < 0) { - kfree(output); - return; - } - - if (output != NULL) { - printk("%s", output); - kfree(output); - } -} - -void open_addr(unsigned char *addr, unsigned char *netmask, void *arg) -{ - change(arg, "add", addr, netmask); -} - -void close_addr(unsigned char *addr, unsigned char *netmask, void *arg) -{ - change(arg, "del", addr, netmask); -} - -char *split_if_spec(char *str, ...) -{ - char **arg, *end, *ret = NULL; - va_list ap; - - va_start(ap, str); - while ((arg = va_arg(ap, char **)) != NULL) { - if (*str == '\0') - goto out; - end = strchr(str, ','); - if (end != str) - *arg = str; - if (end == NULL) - goto out; - *end++ = '\0'; - str = end; - } - ret = str; -out: - va_end(ap); - return ret; -} diff --git a/arch/um/drivers/slip.h b/arch/um/drivers/slip.h deleted file mode 100644 index 0f3b7ca99465..000000000000 --- a/arch/um/drivers/slip.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __UM_SLIP_H -#define __UM_SLIP_H - -#include "slip_common.h" - -struct slip_data { - void *dev; - char name[sizeof("slnnnnn\0")]; - char *addr; - char *gate_addr; - int slave; - struct slip_proto slip; -}; - -extern const struct net_user_info slip_user_info; - -extern int slip_user_read(int fd, void *buf, int len, struct slip_data *pri); -extern int slip_user_write(int fd, void *buf, int len, struct slip_data *pri); - -#endif diff --git a/arch/um/drivers/slip_common.c b/arch/um/drivers/slip_common.c deleted file mode 100644 index 20fe4f42743d..000000000000 --- a/arch/um/drivers/slip_common.c +++ /dev/null @@ -1,55 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <string.h> -#include "slip_common.h" -#include <net_user.h> - -int slip_proto_read(int fd, void *buf, int len, struct slip_proto *slip) -{ - int i, n, size, start; - - if(slip->more > 0){ - i = 0; - while(i < slip->more){ - size = slip_unesc(slip->ibuf[i++], slip->ibuf, - &slip->pos, &slip->esc); - if(size){ - memcpy(buf, slip->ibuf, size); - memmove(slip->ibuf, &slip->ibuf[i], - slip->more - i); - slip->more = slip->more - i; - return size; - } - } - slip->more = 0; - } - - n = net_read(fd, &slip->ibuf[slip->pos], - sizeof(slip->ibuf) - slip->pos); - if(n <= 0) - return n; - - start = slip->pos; - for(i = 0; i < n; i++){ - size = slip_unesc(slip->ibuf[start + i], slip->ibuf,&slip->pos, - &slip->esc); - if(size){ - memcpy(buf, slip->ibuf, size); - memmove(slip->ibuf, &slip->ibuf[start+i+1], - n - (i + 1)); - slip->more = n - (i + 1); - return size; - } - } - return 0; -} - -int slip_proto_write(int fd, void *buf, int len, struct slip_proto *slip) -{ - int actual, n; - - actual = slip_esc(buf, slip->obuf, len); - n = net_write(fd, slip->obuf, actual); - if(n < 0) - return n; - else return len; -} diff --git a/arch/um/drivers/slip_common.h b/arch/um/drivers/slip_common.h deleted file mode 100644 index d3798b5caf7f..000000000000 --- a/arch/um/drivers/slip_common.h +++ /dev/null @@ -1,106 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __UM_SLIP_COMMON_H -#define __UM_SLIP_COMMON_H - -#define BUF_SIZE 1500 - /* two bytes each for a (pathological) max packet of escaped chars + * - * terminating END char + initial END char */ -#define ENC_BUF_SIZE (2 * BUF_SIZE + 2) - -/* SLIP protocol characters. */ -#define SLIP_END 0300 /* indicates end of frame */ -#define SLIP_ESC 0333 /* indicates byte stuffing */ -#define SLIP_ESC_END 0334 /* ESC ESC_END means END 'data' */ -#define SLIP_ESC_ESC 0335 /* ESC ESC_ESC means ESC 'data' */ - -static inline int slip_unesc(unsigned char c, unsigned char *buf, int *pos, - int *esc) -{ - int ret; - - switch(c){ - case SLIP_END: - *esc = 0; - ret=*pos; - *pos=0; - return(ret); - case SLIP_ESC: - *esc = 1; - return(0); - case SLIP_ESC_ESC: - if(*esc){ - *esc = 0; - c = SLIP_ESC; - } - break; - case SLIP_ESC_END: - if(*esc){ - *esc = 0; - c = SLIP_END; - } - break; - } - buf[(*pos)++] = c; - return(0); -} - -static inline int slip_esc(unsigned char *s, unsigned char *d, int len) -{ - unsigned char *ptr = d; - unsigned char c; - - /* - * Send an initial END character to flush out any - * data that may have accumulated in the receiver - * due to line noise. - */ - - *ptr++ = SLIP_END; - - /* - * For each byte in the packet, send the appropriate - * character sequence, according to the SLIP protocol. - */ - - while (len-- > 0) { - switch(c = *s++) { - case SLIP_END: - *ptr++ = SLIP_ESC; - *ptr++ = SLIP_ESC_END; - break; - case SLIP_ESC: - *ptr++ = SLIP_ESC; - *ptr++ = SLIP_ESC_ESC; - break; - default: - *ptr++ = c; - break; - } - } - *ptr++ = SLIP_END; - return (ptr - d); -} - -struct slip_proto { - unsigned char ibuf[ENC_BUF_SIZE]; - unsigned char obuf[ENC_BUF_SIZE]; - int more; /* more data: do not read fd until ibuf has been drained */ - int pos; - int esc; -}; - -static inline void slip_proto_init(struct slip_proto * slip) -{ - memset(slip->ibuf, 0, sizeof(slip->ibuf)); - memset(slip->obuf, 0, sizeof(slip->obuf)); - slip->more = 0; - slip->pos = 0; - slip->esc = 0; -} - -extern int slip_proto_read(int fd, void *buf, int len, - struct slip_proto *slip); -extern int slip_proto_write(int fd, void *buf, int len, - struct slip_proto *slip); - -#endif diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c deleted file mode 100644 index c58ccdcc16d6..000000000000 --- a/arch/um/drivers/slip_kern.c +++ /dev/null @@ -1,93 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#include <linux/if_arp.h> -#include <linux/init.h> -#include <linux/netdevice.h> -#include <net_kern.h> -#include "slip.h" - -struct slip_init { - char *gate_addr; -}; - -static void slip_init(struct net_device *dev, void *data) -{ - struct uml_net_private *private; - struct slip_data *spri; - struct slip_init *init = data; - - private = netdev_priv(dev); - spri = (struct slip_data *) private->user; - - memset(spri->name, 0, sizeof(spri->name)); - spri->addr = NULL; - spri->gate_addr = init->gate_addr; - spri->slave = -1; - spri->dev = dev; - - slip_proto_init(&spri->slip); - - dev->hard_header_len = 0; - dev->header_ops = NULL; - dev->addr_len = 0; - dev->type = ARPHRD_SLIP; - dev->tx_queue_len = 256; - dev->flags = IFF_NOARP; - printk("SLIP backend - SLIP IP = %s\n", spri->gate_addr); -} - -static unsigned short slip_protocol(struct sk_buff *skbuff) -{ - return htons(ETH_P_IP); -} - -static int slip_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return slip_user_read(fd, skb_mac_header(skb), skb->dev->mtu, - (struct slip_data *) &lp->user); -} - -static int slip_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return slip_user_write(fd, skb->data, skb->len, - (struct slip_data *) &lp->user); -} - -static const struct net_kern_info slip_kern_info = { - .init = slip_init, - .protocol = slip_protocol, - .read = slip_read, - .write = slip_write, -}; - -static int slip_setup(char *str, char **mac_out, void *data) -{ - struct slip_init *init = data; - - *init = ((struct slip_init) { .gate_addr = NULL }); - - if (str[0] != '\0') - init->gate_addr = str; - return 1; -} - -static struct transport slip_transport = { - .list = LIST_HEAD_INIT(slip_transport.list), - .name = "slip", - .setup = slip_setup, - .user = &slip_user_info, - .kern = &slip_kern_info, - .private_size = sizeof(struct slip_data), - .setup_size = sizeof(struct slip_init), -}; - -static int register_slip(void) -{ - register_transport(&slip_transport); - return 0; -} - -late_initcall(register_slip); diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c deleted file mode 100644 index 7334019c9e60..000000000000 --- a/arch/um/drivers/slip_user.c +++ /dev/null @@ -1,252 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <fcntl.h> -#include <string.h> -#include <termios.h> -#include <sys/wait.h> -#include <net_user.h> -#include <os.h> -#include "slip.h" -#include <um_malloc.h> - -static int slip_user_init(void *data, void *dev) -{ - struct slip_data *pri = data; - - pri->dev = dev; - return 0; -} - -static int set_up_tty(int fd) -{ - int i; - struct termios tios; - - if (tcgetattr(fd, &tios) < 0) { - printk(UM_KERN_ERR "could not get initial terminal " - "attributes\n"); - return -1; - } - - tios.c_cflag = CS8 | CREAD | HUPCL | CLOCAL; - tios.c_iflag = IGNBRK | IGNPAR; - tios.c_oflag = 0; - tios.c_lflag = 0; - for (i = 0; i < NCCS; i++) - tios.c_cc[i] = 0; - tios.c_cc[VMIN] = 1; - tios.c_cc[VTIME] = 0; - - cfsetospeed(&tios, B38400); - cfsetispeed(&tios, B38400); - - if (tcsetattr(fd, TCSAFLUSH, &tios) < 0) { - printk(UM_KERN_ERR "failed to set terminal attributes\n"); - return -1; - } - return 0; -} - -struct slip_pre_exec_data { - int stdin_fd; - int stdout_fd; - int close_me; -}; - -static void slip_pre_exec(void *arg) -{ - struct slip_pre_exec_data *data = arg; - - if (data->stdin_fd >= 0) - dup2(data->stdin_fd, 0); - dup2(data->stdout_fd, 1); - if (data->close_me >= 0) - close(data->close_me); -} - -static int slip_tramp(char **argv, int fd) -{ - struct slip_pre_exec_data pe_data; - char *output; - int pid, fds[2], err, output_len; - - err = os_pipe(fds, 1, 0); - if (err < 0) { - printk(UM_KERN_ERR "slip_tramp : pipe failed, err = %d\n", - -err); - goto out; - } - - err = 0; - pe_data.stdin_fd = fd; - pe_data.stdout_fd = fds[1]; - pe_data.close_me = fds[0]; - err = run_helper(slip_pre_exec, &pe_data, argv); - if (err < 0) - goto out_close; - pid = err; - - output_len = UM_KERN_PAGE_SIZE; - output = uml_kmalloc(output_len, UM_GFP_KERNEL); - if (output == NULL) { - printk(UM_KERN_ERR "slip_tramp : failed to allocate output " - "buffer\n"); - os_kill_process(pid, 1); - err = -ENOMEM; - goto out_close; - } - - close(fds[1]); - read_output(fds[0], output, output_len); - printk("%s", output); - - err = helper_wait(pid); - close(fds[0]); - - kfree(output); - return err; - -out_close: - close(fds[0]); - close(fds[1]); -out: - return err; -} - -static int slip_open(void *data) -{ - struct slip_data *pri = data; - char version_buf[sizeof("nnnnn\0")]; - char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; - char *argv[] = { "uml_net", version_buf, "slip", "up", gate_buf, - NULL }; - int sfd, mfd, err; - - err = get_pty(); - if (err < 0) { - printk(UM_KERN_ERR "slip-open : Failed to open pty, err = %d\n", - -err); - goto out; - } - mfd = err; - - err = open(ptsname(mfd), O_RDWR, 0); - if (err < 0) { - printk(UM_KERN_ERR "Couldn't open tty for slip line, " - "err = %d\n", -err); - goto out_close; - } - sfd = err; - - err = set_up_tty(sfd); - if (err) - goto out_close2; - - pri->slave = sfd; - pri->slip.pos = 0; - pri->slip.esc = 0; - if (pri->gate_addr != NULL) { - sprintf(version_buf, "%d", UML_NET_VERSION); - strcpy(gate_buf, pri->gate_addr); - - err = slip_tramp(argv, sfd); - - if (err < 0) { - printk(UM_KERN_ERR "slip_tramp failed - err = %d\n", - -err); - goto out_close2; - } - err = os_get_ifname(pri->slave, pri->name); - if (err < 0) { - printk(UM_KERN_ERR "get_ifname failed, err = %d\n", - -err); - goto out_close2; - } - iter_addresses(pri->dev, open_addr, pri->name); - } - else { - err = os_set_slip(sfd); - if (err < 0) { - printk(UM_KERN_ERR "Failed to set slip discipline " - "encapsulation - err = %d\n", -err); - goto out_close2; - } - } - return mfd; -out_close2: - close(sfd); -out_close: - close(mfd); -out: - return err; -} - -static void slip_close(int fd, void *data) -{ - struct slip_data *pri = data; - char version_buf[sizeof("nnnnn\0")]; - char *argv[] = { "uml_net", version_buf, "slip", "down", pri->name, - NULL }; - int err; - - if (pri->gate_addr != NULL) - iter_addresses(pri->dev, close_addr, pri->name); - - sprintf(version_buf, "%d", UML_NET_VERSION); - - err = slip_tramp(argv, pri->slave); - - if (err != 0) - printk(UM_KERN_ERR "slip_tramp failed - errno = %d\n", -err); - close(fd); - close(pri->slave); - pri->slave = -1; -} - -int slip_user_read(int fd, void *buf, int len, struct slip_data *pri) -{ - return slip_proto_read(fd, buf, len, &pri->slip); -} - -int slip_user_write(int fd, void *buf, int len, struct slip_data *pri) -{ - return slip_proto_write(fd, buf, len, &pri->slip); -} - -static void slip_add_addr(unsigned char *addr, unsigned char *netmask, - void *data) -{ - struct slip_data *pri = data; - - if (pri->slave < 0) - return; - open_addr(addr, netmask, pri->name); -} - -static void slip_del_addr(unsigned char *addr, unsigned char *netmask, - void *data) -{ - struct slip_data *pri = data; - - if (pri->slave < 0) - return; - close_addr(addr, netmask, pri->name); -} - -const struct net_user_info slip_user_info = { - .init = slip_user_init, - .open = slip_open, - .close = slip_close, - .remove = NULL, - .add_address = slip_add_addr, - .delete_address = slip_del_addr, - .mtu = BUF_SIZE, - .max_packet = BUF_SIZE, -}; diff --git a/arch/um/drivers/slirp.h b/arch/um/drivers/slirp.h deleted file mode 100644 index 4aef2b88249a..000000000000 --- a/arch/um/drivers/slirp.h +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __UM_SLIRP_H -#define __UM_SLIRP_H - -#include "slip_common.h" - -#define SLIRP_MAX_ARGS 100 -/* - * XXX this next definition is here because I don't understand why this - * initializer doesn't work in slirp_kern.c: - * - * argv : { init->argv[ 0 ... SLIRP_MAX_ARGS-1 ] }, - * - * or why I can't typecast like this: - * - * argv : (char* [SLIRP_MAX_ARGS])(init->argv), - */ -struct arg_list_dummy_wrapper { char *argv[SLIRP_MAX_ARGS]; }; - -struct slirp_data { - void *dev; - struct arg_list_dummy_wrapper argw; - int pid; - int slave; - struct slip_proto slip; -}; - -extern const struct net_user_info slirp_user_info; - -extern int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri); -extern int slirp_user_write(int fd, void *buf, int len, - struct slirp_data *pri); - -#endif diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c deleted file mode 100644 index 0a6151ee9572..000000000000 --- a/arch/um/drivers/slirp_kern.c +++ /dev/null @@ -1,120 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#include <linux/if_arp.h> -#include <linux/init.h> -#include <linux/netdevice.h> -#include <linux/string.h> -#include <net_kern.h> -#include <net_user.h> -#include "slirp.h" - -struct slirp_init { - struct arg_list_dummy_wrapper argw; /* XXX should be simpler... */ -}; - -static void slirp_init(struct net_device *dev, void *data) -{ - struct uml_net_private *private; - struct slirp_data *spri; - struct slirp_init *init = data; - int i; - - private = netdev_priv(dev); - spri = (struct slirp_data *) private->user; - - spri->argw = init->argw; - spri->pid = -1; - spri->slave = -1; - spri->dev = dev; - - slip_proto_init(&spri->slip); - - dev->hard_header_len = 0; - dev->header_ops = NULL; - dev->addr_len = 0; - dev->type = ARPHRD_SLIP; - dev->tx_queue_len = 256; - dev->flags = IFF_NOARP; - printk("SLIRP backend - command line:"); - for (i = 0; spri->argw.argv[i] != NULL; i++) - printk(" '%s'",spri->argw.argv[i]); - printk("\n"); -} - -static unsigned short slirp_protocol(struct sk_buff *skbuff) -{ - return htons(ETH_P_IP); -} - -static int slirp_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return slirp_user_read(fd, skb_mac_header(skb), skb->dev->mtu, - (struct slirp_data *) &lp->user); -} - -static int slirp_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return slirp_user_write(fd, skb->data, skb->len, - (struct slirp_data *) &lp->user); -} - -const struct net_kern_info slirp_kern_info = { - .init = slirp_init, - .protocol = slirp_protocol, - .read = slirp_read, - .write = slirp_write, -}; - -static int slirp_setup(char *str, char **mac_out, void *data) -{ - struct slirp_init *init = data; - int i=0; - - *init = ((struct slirp_init) { .argw = { { "slirp", NULL } } }); - - str = split_if_spec(str, mac_out, NULL); - - if (str == NULL) /* no command line given after MAC addr */ - return 1; - - do { - if (i >= SLIRP_MAX_ARGS - 1) { - printk(KERN_WARNING "slirp_setup: truncating slirp " - "arguments\n"); - break; - } - init->argw.argv[i++] = str; - while(*str && *str!=',') { - if (*str == '_') - *str=' '; - str++; - } - if (*str != ',') - break; - *str++ = '\0'; - } while (1); - - init->argw.argv[i] = NULL; - return 1; -} - -static struct transport slirp_transport = { - .list = LIST_HEAD_INIT(slirp_transport.list), - .name = "slirp", - .setup = slirp_setup, - .user = &slirp_user_info, - .kern = &slirp_kern_info, - .private_size = sizeof(struct slirp_data), - .setup_size = sizeof(struct slirp_init), -}; - -static int register_slirp(void) -{ - register_transport(&slirp_transport); - return 0; -} - -late_initcall(register_slirp); diff --git a/arch/um/drivers/slirp_user.c b/arch/um/drivers/slirp_user.c deleted file mode 100644 index 97228aa080cb..000000000000 --- a/arch/um/drivers/slirp_user.c +++ /dev/null @@ -1,124 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#include <unistd.h> -#include <errno.h> -#include <string.h> -#include <sys/wait.h> -#include <net_user.h> -#include <os.h> -#include "slirp.h" - -static int slirp_user_init(void *data, void *dev) -{ - struct slirp_data *pri = data; - - pri->dev = dev; - return 0; -} - -struct slirp_pre_exec_data { - int stdin_fd; - int stdout_fd; -}; - -static void slirp_pre_exec(void *arg) -{ - struct slirp_pre_exec_data *data = arg; - - if (data->stdin_fd != -1) - dup2(data->stdin_fd, 0); - if (data->stdout_fd != -1) - dup2(data->stdout_fd, 1); -} - -static int slirp_tramp(char **argv, int fd) -{ - struct slirp_pre_exec_data pe_data; - int pid; - - pe_data.stdin_fd = fd; - pe_data.stdout_fd = fd; - pid = run_helper(slirp_pre_exec, &pe_data, argv); - - return pid; -} - -static int slirp_open(void *data) -{ - struct slirp_data *pri = data; - int fds[2], err; - - err = os_pipe(fds, 1, 1); - if (err) - return err; - - err = slirp_tramp(pri->argw.argv, fds[1]); - if (err < 0) { - printk(UM_KERN_ERR "slirp_tramp failed - errno = %d\n", -err); - goto out; - } - - pri->slave = fds[1]; - pri->slip.pos = 0; - pri->slip.esc = 0; - pri->pid = err; - - return fds[0]; -out: - close(fds[0]); - close(fds[1]); - return err; -} - -static void slirp_close(int fd, void *data) -{ - struct slirp_data *pri = data; - int err; - - close(fd); - close(pri->slave); - - pri->slave = -1; - - if (pri->pid<1) { - printk(UM_KERN_ERR "slirp_close: no child process to shut " - "down\n"); - return; - } - -#if 0 - if (kill(pri->pid, SIGHUP)<0) { - printk(UM_KERN_ERR "slirp_close: sending hangup to %d failed " - "(%d)\n", pri->pid, errno); - } -#endif - err = helper_wait(pri->pid); - if (err < 0) - return; - - pri->pid = -1; -} - -int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri) -{ - return slip_proto_read(fd, buf, len, &pri->slip); -} - -int slirp_user_write(int fd, void *buf, int len, struct slirp_data *pri) -{ - return slip_proto_write(fd, buf, len, &pri->slip); -} - -const struct net_user_info slirp_user_info = { - .init = slirp_user_init, - .open = slirp_open, - .close = slirp_close, - .remove = NULL, - .add_address = NULL, - .delete_address = NULL, - .mtu = BUF_SIZE, - .max_packet = BUF_SIZE, -}; diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c index c5e6545f6fcf..8e8a8bf518b6 100644 --- a/arch/um/drivers/ubd_user.c +++ b/arch/um/drivers/ubd_user.c @@ -41,7 +41,7 @@ int start_io_thread(struct os_helper_thread **td_out, int *fd_out) *fd_out = fds[1]; err = os_set_fd_block(*fd_out, 0); - err = os_set_fd_block(kernel_fd, 0); + err |= os_set_fd_block(kernel_fd, 0); if (err) { printk("start_io_thread - failed to set nonblocking I/O.\n"); goto out_close; diff --git a/arch/um/drivers/umcast.h b/arch/um/drivers/umcast.h deleted file mode 100644 index fe39bee1e3bd..000000000000 --- a/arch/um/drivers/umcast.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#ifndef __DRIVERS_UMCAST_H -#define __DRIVERS_UMCAST_H - -#include <net_user.h> - -struct umcast_data { - char *addr; - unsigned short lport; - unsigned short rport; - void *listen_addr; - void *remote_addr; - int ttl; - int unicast; - void *dev; -}; - -extern const struct net_user_info umcast_user_info; - -extern int umcast_user_write(int fd, void *buf, int len, - struct umcast_data *pri); - -#endif diff --git a/arch/um/drivers/umcast_kern.c b/arch/um/drivers/umcast_kern.c deleted file mode 100644 index 595a54f2b9c6..000000000000 --- a/arch/um/drivers/umcast_kern.c +++ /dev/null @@ -1,188 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * user-mode-linux networking multicast transport - * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org> - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * - * based on the existing uml-networking code, which is - * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and - * James Leu (jleu@mindspring.net). - * Copyright (C) 2001 by various other people who didn't put their name here. - * - */ - -#include <linux/init.h> -#include <linux/netdevice.h> -#include "umcast.h" -#include <net_kern.h> - -struct umcast_init { - char *addr; - int lport; - int rport; - int ttl; - bool unicast; -}; - -static void umcast_init(struct net_device *dev, void *data) -{ - struct uml_net_private *pri; - struct umcast_data *dpri; - struct umcast_init *init = data; - - pri = netdev_priv(dev); - dpri = (struct umcast_data *) pri->user; - dpri->addr = init->addr; - dpri->lport = init->lport; - dpri->rport = init->rport; - dpri->unicast = init->unicast; - dpri->ttl = init->ttl; - dpri->dev = dev; - - if (dpri->unicast) { - printk(KERN_INFO "ucast backend address: %s:%u listen port: " - "%u\n", dpri->addr, dpri->rport, dpri->lport); - } else { - printk(KERN_INFO "mcast backend multicast address: %s:%u, " - "TTL:%u\n", dpri->addr, dpri->lport, dpri->ttl); - } -} - -static int umcast_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return net_recvfrom(fd, skb_mac_header(skb), - skb->dev->mtu + ETH_HEADER_OTHER); -} - -static int umcast_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return umcast_user_write(fd, skb->data, skb->len, - (struct umcast_data *) &lp->user); -} - -static const struct net_kern_info umcast_kern_info = { - .init = umcast_init, - .protocol = eth_protocol, - .read = umcast_read, - .write = umcast_write, -}; - -static int mcast_setup(char *str, char **mac_out, void *data) -{ - struct umcast_init *init = data; - char *port_str = NULL, *ttl_str = NULL, *remain; - char *last; - - *init = ((struct umcast_init) - { .addr = "239.192.168.1", - .lport = 1102, - .ttl = 1 }); - - remain = split_if_spec(str, mac_out, &init->addr, &port_str, &ttl_str, - NULL); - if (remain != NULL) { - printk(KERN_ERR "mcast_setup - Extra garbage on " - "specification : '%s'\n", remain); - return 0; - } - - if (port_str != NULL) { - init->lport = simple_strtoul(port_str, &last, 10); - if ((*last != '\0') || (last == port_str)) { - printk(KERN_ERR "mcast_setup - Bad port : '%s'\n", - port_str); - return 0; - } - } - - if (ttl_str != NULL) { - init->ttl = simple_strtoul(ttl_str, &last, 10); - if ((*last != '\0') || (last == ttl_str)) { - printk(KERN_ERR "mcast_setup - Bad ttl : '%s'\n", - ttl_str); - return 0; - } - } - - init->unicast = false; - init->rport = init->lport; - - printk(KERN_INFO "Configured mcast device: %s:%u-%u\n", init->addr, - init->lport, init->ttl); - - return 1; -} - -static int ucast_setup(char *str, char **mac_out, void *data) -{ - struct umcast_init *init = data; - char *lport_str = NULL, *rport_str = NULL, *remain; - char *last; - - *init = ((struct umcast_init) - { .addr = "", - .lport = 1102, - .rport = 1102 }); - - remain = split_if_spec(str, mac_out, &init->addr, - &lport_str, &rport_str, NULL); - if (remain != NULL) { - printk(KERN_ERR "ucast_setup - Extra garbage on " - "specification : '%s'\n", remain); - return 0; - } - - if (lport_str != NULL) { - init->lport = simple_strtoul(lport_str, &last, 10); - if ((*last != '\0') || (last == lport_str)) { - printk(KERN_ERR "ucast_setup - Bad listen port : " - "'%s'\n", lport_str); - return 0; - } - } - - if (rport_str != NULL) { - init->rport = simple_strtoul(rport_str, &last, 10); - if ((*last != '\0') || (last == rport_str)) { - printk(KERN_ERR "ucast_setup - Bad remote port : " - "'%s'\n", rport_str); - return 0; - } - } - - init->unicast = true; - - printk(KERN_INFO "Configured ucast device: :%u -> %s:%u\n", - init->lport, init->addr, init->rport); - - return 1; -} - -static struct transport mcast_transport = { - .list = LIST_HEAD_INIT(mcast_transport.list), - .name = "mcast", - .setup = mcast_setup, - .user = &umcast_user_info, - .kern = &umcast_kern_info, - .private_size = sizeof(struct umcast_data), - .setup_size = sizeof(struct umcast_init), -}; - -static struct transport ucast_transport = { - .list = LIST_HEAD_INIT(ucast_transport.list), - .name = "ucast", - .setup = ucast_setup, - .user = &umcast_user_info, - .kern = &umcast_kern_info, - .private_size = sizeof(struct umcast_data), - .setup_size = sizeof(struct umcast_init), -}; - -static int register_umcast(void) -{ - register_transport(&mcast_transport); - register_transport(&ucast_transport); - return 0; -} - -late_initcall(register_umcast); diff --git a/arch/um/drivers/umcast_user.c b/arch/um/drivers/umcast_user.c deleted file mode 100644 index b50b13cff04e..000000000000 --- a/arch/um/drivers/umcast_user.c +++ /dev/null @@ -1,184 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * user-mode-linux networking multicast transport - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org> - * - * based on the existing uml-networking code, which is - * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and - * James Leu (jleu@mindspring.net). - * Copyright (C) 2001 by various other people who didn't put their name here. - * - * - */ - -#include <unistd.h> -#include <errno.h> -#include <netinet/in.h> -#include "umcast.h" -#include <net_user.h> -#include <um_malloc.h> - -static struct sockaddr_in *new_addr(char *addr, unsigned short port) -{ - struct sockaddr_in *sin; - - sin = uml_kmalloc(sizeof(struct sockaddr_in), UM_GFP_KERNEL); - if (sin == NULL) { - printk(UM_KERN_ERR "new_addr: allocation of sockaddr_in " - "failed\n"); - return NULL; - } - sin->sin_family = AF_INET; - if (addr) - sin->sin_addr.s_addr = in_aton(addr); - else - sin->sin_addr.s_addr = INADDR_ANY; - sin->sin_port = htons(port); - return sin; -} - -static int umcast_user_init(void *data, void *dev) -{ - struct umcast_data *pri = data; - - pri->remote_addr = new_addr(pri->addr, pri->rport); - if (pri->unicast) - pri->listen_addr = new_addr(NULL, pri->lport); - else - pri->listen_addr = pri->remote_addr; - pri->dev = dev; - return 0; -} - -static void umcast_remove(void *data) -{ - struct umcast_data *pri = data; - - kfree(pri->listen_addr); - if (pri->unicast) - kfree(pri->remote_addr); - pri->listen_addr = pri->remote_addr = NULL; -} - -static int umcast_open(void *data) -{ - struct umcast_data *pri = data; - struct sockaddr_in *lsin = pri->listen_addr; - struct sockaddr_in *rsin = pri->remote_addr; - struct ip_mreq mreq; - int fd, yes = 1, err = -EINVAL; - - - if ((!pri->unicast && lsin->sin_addr.s_addr == 0) || - (rsin->sin_addr.s_addr == 0) || - (lsin->sin_port == 0) || (rsin->sin_port == 0)) - goto out; - - fd = socket(AF_INET, SOCK_DGRAM, 0); - - if (fd < 0) { - err = -errno; - printk(UM_KERN_ERR "umcast_open : data socket failed, " - "errno = %d\n", errno); - goto out; - } - - if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) { - err = -errno; - printk(UM_KERN_ERR "umcast_open: SO_REUSEADDR failed, " - "errno = %d\n", errno); - goto out_close; - } - - if (!pri->unicast) { - /* set ttl according to config */ - if (setsockopt(fd, SOL_IP, IP_MULTICAST_TTL, &pri->ttl, - sizeof(pri->ttl)) < 0) { - err = -errno; - printk(UM_KERN_ERR "umcast_open: IP_MULTICAST_TTL " - "failed, error = %d\n", errno); - goto out_close; - } - - /* set LOOP, so data does get fed back to local sockets */ - if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, - &yes, sizeof(yes)) < 0) { - err = -errno; - printk(UM_KERN_ERR "umcast_open: IP_MULTICAST_LOOP " - "failed, error = %d\n", errno); - goto out_close; - } - } - - /* bind socket to the address */ - if (bind(fd, (struct sockaddr *) lsin, sizeof(*lsin)) < 0) { - err = -errno; - printk(UM_KERN_ERR "umcast_open : data bind failed, " - "errno = %d\n", errno); - goto out_close; - } - - if (!pri->unicast) { - /* subscribe to the multicast group */ - mreq.imr_multiaddr.s_addr = lsin->sin_addr.s_addr; - mreq.imr_interface.s_addr = 0; - if (setsockopt(fd, SOL_IP, IP_ADD_MEMBERSHIP, - &mreq, sizeof(mreq)) < 0) { - err = -errno; - printk(UM_KERN_ERR "umcast_open: IP_ADD_MEMBERSHIP " - "failed, error = %d\n", errno); - printk(UM_KERN_ERR "There appears not to be a " - "multicast-capable network interface on the " - "host.\n"); - printk(UM_KERN_ERR "eth0 should be configured in order " - "to use the multicast transport.\n"); - goto out_close; - } - } - - return fd; - - out_close: - close(fd); - out: - return err; -} - -static void umcast_close(int fd, void *data) -{ - struct umcast_data *pri = data; - - if (!pri->unicast) { - struct ip_mreq mreq; - struct sockaddr_in *lsin = pri->listen_addr; - - mreq.imr_multiaddr.s_addr = lsin->sin_addr.s_addr; - mreq.imr_interface.s_addr = 0; - if (setsockopt(fd, SOL_IP, IP_DROP_MEMBERSHIP, - &mreq, sizeof(mreq)) < 0) { - printk(UM_KERN_ERR "umcast_close: IP_DROP_MEMBERSHIP " - "failed, error = %d\n", errno); - } - } - - close(fd); -} - -int umcast_user_write(int fd, void *buf, int len, struct umcast_data *pri) -{ - struct sockaddr_in *data_addr = pri->remote_addr; - - return net_sendto(fd, buf, len, data_addr, sizeof(*data_addr)); -} - -const struct net_user_info umcast_user_info = { - .init = umcast_user_init, - .open = umcast_open, - .close = umcast_close, - .remove = umcast_remove, - .add_address = NULL, - .delete_address = NULL, - .mtu = ETH_MAX_PACKET, - .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, -}; diff --git a/arch/um/drivers/vde.h b/arch/um/drivers/vde.h deleted file mode 100644 index cab0379e6142..000000000000 --- a/arch/um/drivers/vde.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org). - */ - -#ifndef __UM_VDE_H__ -#define __UM_VDE_H__ - -struct vde_data { - char *vde_switch; - char *descr; - void *args; - void *conn; - void *dev; -}; - -struct vde_init { - char *vde_switch; - char *descr; - int port; - char *group; - int mode; -}; - -extern const struct net_user_info vde_user_info; - -extern void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init); - -extern int vde_user_read(void *conn, void *buf, int len); -extern int vde_user_write(void *conn, void *buf, int len); - -#endif diff --git a/arch/um/drivers/vde_kern.c b/arch/um/drivers/vde_kern.c deleted file mode 100644 index bc6f22cbfb35..000000000000 --- a/arch/um/drivers/vde_kern.c +++ /dev/null @@ -1,129 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org). - * - * Transport usage: - * ethN=vde,<vde_switch>,<mac addr>,<port>,<group>,<mode>,<description> - * - */ - -#include <linux/init.h> -#include <linux/netdevice.h> -#include <net_kern.h> -#include <net_user.h> -#include "vde.h" - -static void vde_init(struct net_device *dev, void *data) -{ - struct vde_init *init = data; - struct uml_net_private *pri; - struct vde_data *vpri; - - pri = netdev_priv(dev); - vpri = (struct vde_data *) pri->user; - - vpri->vde_switch = init->vde_switch; - vpri->descr = init->descr ? init->descr : "UML vde_transport"; - vpri->args = NULL; - vpri->conn = NULL; - vpri->dev = dev; - - printk("vde backend - %s, ", vpri->vde_switch ? - vpri->vde_switch : "(default socket)"); - - vde_init_libstuff(vpri, init); - - printk("\n"); -} - -static int vde_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - struct vde_data *pri = (struct vde_data *) &lp->user; - - if (pri->conn != NULL) - return vde_user_read(pri->conn, skb_mac_header(skb), - skb->dev->mtu + ETH_HEADER_OTHER); - - printk(KERN_ERR "vde_read - we have no VDECONN to read from"); - return -EBADF; -} - -static int vde_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - struct vde_data *pri = (struct vde_data *) &lp->user; - - if (pri->conn != NULL) - return vde_user_write((void *)pri->conn, skb->data, - skb->len); - - printk(KERN_ERR "vde_write - we have no VDECONN to write to"); - return -EBADF; -} - -static const struct net_kern_info vde_kern_info = { - .init = vde_init, - .protocol = eth_protocol, - .read = vde_read, - .write = vde_write, -}; - -static int vde_setup(char *str, char **mac_out, void *data) -{ - struct vde_init *init = data; - char *remain, *port_str = NULL, *mode_str = NULL, *last; - - *init = ((struct vde_init) - { .vde_switch = NULL, - .descr = NULL, - .port = 0, - .group = NULL, - .mode = 0 }); - - remain = split_if_spec(str, &init->vde_switch, mac_out, &port_str, - &init->group, &mode_str, &init->descr, NULL); - - if (remain != NULL) - printk(KERN_WARNING "vde_setup - Ignoring extra data :" - "'%s'\n", remain); - - if (port_str != NULL) { - init->port = simple_strtoul(port_str, &last, 10); - if ((*last != '\0') || (last == port_str)) { - printk(KERN_ERR "vde_setup - Bad port : '%s'\n", - port_str); - return 0; - } - } - - if (mode_str != NULL) { - init->mode = simple_strtoul(mode_str, &last, 8); - if ((*last != '\0') || (last == mode_str)) { - printk(KERN_ERR "vde_setup - Bad mode : '%s'\n", - mode_str); - return 0; - } - } - - printk(KERN_INFO "Configured vde device: %s\n", init->vde_switch ? - init->vde_switch : "(default socket)"); - - return 1; -} - -static struct transport vde_transport = { - .list = LIST_HEAD_INIT(vde_transport.list), - .name = "vde", - .setup = vde_setup, - .user = &vde_user_info, - .kern = &vde_kern_info, - .private_size = sizeof(struct vde_data), - .setup_size = sizeof(struct vde_init), -}; - -static int register_vde(void) -{ - register_transport(&vde_transport); - return 0; -} - -late_initcall(register_vde); diff --git a/arch/um/drivers/vde_user.c b/arch/um/drivers/vde_user.c deleted file mode 100644 index bc7dc4e1e486..000000000000 --- a/arch/um/drivers/vde_user.c +++ /dev/null @@ -1,125 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org). - */ - -#include <stddef.h> -#include <errno.h> -#include <libvdeplug.h> -#include <net_user.h> -#include <um_malloc.h> -#include "vde.h" - -static int vde_user_init(void *data, void *dev) -{ - struct vde_data *pri = data; - VDECONN *conn = NULL; - int err = -EINVAL; - - pri->dev = dev; - - conn = vde_open(pri->vde_switch, pri->descr, pri->args); - - if (conn == NULL) { - err = -errno; - printk(UM_KERN_ERR "vde_user_init: vde_open failed, " - "errno = %d\n", errno); - return err; - } - - printk(UM_KERN_INFO "vde backend - connection opened\n"); - - pri->conn = conn; - - return 0; -} - -static int vde_user_open(void *data) -{ - struct vde_data *pri = data; - - if (pri->conn != NULL) - return vde_datafd(pri->conn); - - printk(UM_KERN_WARNING "vde_open - we have no VDECONN to open"); - return -EINVAL; -} - -static void vde_remove(void *data) -{ - struct vde_data *pri = data; - - if (pri->conn != NULL) { - printk(UM_KERN_INFO "vde backend - closing connection\n"); - vde_close(pri->conn); - pri->conn = NULL; - kfree(pri->args); - pri->args = NULL; - return; - } - - printk(UM_KERN_WARNING "vde_remove - we have no VDECONN to remove"); -} - -const struct net_user_info vde_user_info = { - .init = vde_user_init, - .open = vde_user_open, - .close = NULL, - .remove = vde_remove, - .add_address = NULL, - .delete_address = NULL, - .mtu = ETH_MAX_PACKET, - .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, -}; - -void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init) -{ - struct vde_open_args *args; - - vpri->args = uml_kmalloc(sizeof(struct vde_open_args), UM_GFP_KERNEL); - if (vpri->args == NULL) { - printk(UM_KERN_ERR "vde_init_libstuff - vde_open_args " - "allocation failed"); - return; - } - - args = vpri->args; - - args->port = init->port; - args->group = init->group; - args->mode = init->mode ? init->mode : 0700; - - args->port ? printk("port %d", args->port) : - printk("undefined port"); -} - -int vde_user_read(void *conn, void *buf, int len) -{ - VDECONN *vconn = conn; - int rv; - - if (vconn == NULL) - return 0; - - rv = vde_recv(vconn, buf, len, 0); - if (rv < 0) { - if (errno == EAGAIN) - return 0; - return -errno; - } - else if (rv == 0) - return -ENOTCONN; - - return rv; -} - -int vde_user_write(void *conn, void *buf, int len) -{ - VDECONN *vconn = conn; - - if (vconn == NULL) - return 0; - - return vde_send(vconn, buf, len, 0); -} - diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index b97bb52dd562..9bbbddfe866b 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -8,6 +8,8 @@ * Copyright (C) 2001 by various other people who didn't put their name here. */ +#define pr_fmt(fmt) "uml-vector: " fmt + #include <linux/memblock.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> @@ -27,7 +29,6 @@ #include <init.h> #include <irq_kern.h> #include <irq_user.h> -#include <net_kern.h> #include <os.h> #include "mconsole_kern.h" #include "vector_user.h" @@ -1533,13 +1534,47 @@ static const struct net_device_ops vector_netdev_ops = { static void vector_timer_expire(struct timer_list *t) { - struct vector_private *vp = from_timer(vp, t, tl); + struct vector_private *vp = timer_container_of(vp, t, tl); vp->estats.tx_kicks++; napi_schedule(&vp->napi); } +static void vector_setup_etheraddr(struct net_device *dev, char *str) +{ + u8 addr[ETH_ALEN]; + + if (str == NULL) + goto random; + + if (!mac_pton(str, addr)) { + netdev_err(dev, + "Failed to parse '%s' as an ethernet address\n", str); + goto random; + } + if (is_multicast_ether_addr(addr)) { + netdev_err(dev, + "Attempt to assign a multicast ethernet address to a device disallowed\n"); + goto random; + } + if (!is_valid_ether_addr(addr)) { + netdev_err(dev, + "Attempt to assign an invalid ethernet address to a device disallowed\n"); + goto random; + } + if (!is_local_ether_addr(addr)) { + netdev_warn(dev, "Warning: Assigning a globally valid ethernet address to a device\n"); + netdev_warn(dev, "You should set the 2nd rightmost bit in the first byte of the MAC,\n"); + netdev_warn(dev, "i.e. %02x:%02x:%02x:%02x:%02x:%02x\n", + addr[0] | 0x02, addr[1], addr[2], addr[3], addr[4], addr[5]); + } + eth_hw_addr_set(dev, addr); + return; +random: + netdev_info(dev, "Choosing a random ethernet address\n"); + eth_hw_addr_random(dev); +} static void vector_eth_configure( int n, @@ -1553,14 +1588,12 @@ static void vector_eth_configure( device = kzalloc(sizeof(*device), GFP_KERNEL); if (device == NULL) { - printk(KERN_ERR "eth_configure failed to allocate struct " - "vector_device\n"); + pr_err("Failed to allocate struct vector_device for vec%d\n", n); return; } dev = alloc_etherdev(sizeof(struct vector_private)); if (dev == NULL) { - printk(KERN_ERR "eth_configure: failed to allocate struct " - "net_device for vec%d\n", n); + pr_err("Failed to allocate struct net_device for vec%d\n", n); goto out_free_device; } @@ -1574,7 +1607,7 @@ static void vector_eth_configure( * and fail. */ snprintf(dev->name, sizeof(dev->name), "vec%d", n); - uml_net_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac")); + vector_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac")); vp = netdev_priv(dev); /* sysfs register */ @@ -1592,35 +1625,19 @@ static void vector_eth_configure( device->dev = dev; - *vp = ((struct vector_private) - { - .list = LIST_HEAD_INIT(vp->list), - .dev = dev, - .unit = n, - .options = get_transport_options(def), - .rx_irq = 0, - .tx_irq = 0, - .parsed = def, - .max_packet = get_mtu(def) + ETH_HEADER_OTHER, - /* TODO - we need to calculate headroom so that ip header - * is 16 byte aligned all the time - */ - .headroom = get_headroom(def), - .form_header = NULL, - .verify_header = NULL, - .header_rxbuffer = NULL, - .header_txbuffer = NULL, - .header_size = 0, - .rx_header_size = 0, - .rexmit_scheduled = false, - .opened = false, - .transport_data = NULL, - .in_write_poll = false, - .coalesce = 2, - .req_size = get_req_size(def), - .in_error = false, - .bpf = NULL - }); + INIT_LIST_HEAD(&vp->list); + vp->dev = dev; + vp->unit = n; + vp->options = get_transport_options(def); + vp->parsed = def; + vp->max_packet = get_mtu(def) + ETH_HEADER_OTHER; + /* + * TODO - we need to calculate headroom so that ip header + * is 16 byte aligned all the time + */ + vp->headroom = get_headroom(def); + vp->coalesce = 2; + vp->req_size = get_req_size(def); dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST); INIT_WORK(&vp->reset_tx, vector_reset_tx); @@ -1690,8 +1707,7 @@ static int __init vector_setup(char *str) err = vector_parse(str, &n, &str, &error); if (err) { - printk(KERN_ERR "vector_setup - Couldn't parse '%s' : %s\n", - str, error); + pr_err("Couldn't parse '%s': %s\n", str, error); return 1; } new = memblock_alloc_or_panic(sizeof(*new), SMP_CACHE_BYTES); diff --git a/arch/um/drivers/vfio_kern.c b/arch/um/drivers/vfio_kern.c new file mode 100644 index 000000000000..13b971a2bd43 --- /dev/null +++ b/arch/um/drivers/vfio_kern.c @@ -0,0 +1,656 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2025 Ant Group + * Author: Tiwei Bie <tiwei.btw@antgroup.com> + */ + +#define pr_fmt(fmt) "vfio-uml: " fmt + +#include <linux/module.h> +#include <linux/logic_iomem.h> +#include <linux/mutex.h> +#include <linux/list.h> +#include <linux/string.h> +#include <linux/unaligned.h> +#include <irq_kern.h> +#include <init.h> +#include <os.h> + +#include "virt-pci.h" +#include "vfio_user.h" + +#define to_vdev(_pdev) container_of(_pdev, struct uml_vfio_device, pdev) + +struct uml_vfio_intr_ctx { + struct uml_vfio_device *dev; + int irq; +}; + +struct uml_vfio_device { + const char *name; + int group; + + struct um_pci_device pdev; + struct uml_vfio_user_device udev; + struct uml_vfio_intr_ctx *intr_ctx; + + int msix_cap; + int msix_bar; + int msix_offset; + int msix_size; + u32 *msix_data; + + struct list_head list; +}; + +struct uml_vfio_group { + int id; + int fd; + int users; + struct list_head list; +}; + +static struct { + int fd; + int users; +} uml_vfio_container = { .fd = -1 }; +static DEFINE_MUTEX(uml_vfio_container_mtx); + +static LIST_HEAD(uml_vfio_groups); +static DEFINE_MUTEX(uml_vfio_groups_mtx); + +static LIST_HEAD(uml_vfio_devices); + +static int uml_vfio_set_container(int group_fd) +{ + int err; + + guard(mutex)(¨_vfio_container_mtx); + + err = uml_vfio_user_set_container(uml_vfio_container.fd, group_fd); + if (err) + return err; + + uml_vfio_container.users++; + if (uml_vfio_container.users > 1) + return 0; + + err = uml_vfio_user_setup_iommu(uml_vfio_container.fd); + if (err) { + uml_vfio_user_unset_container(uml_vfio_container.fd, group_fd); + uml_vfio_container.users--; + } + return err; +} + +static void uml_vfio_unset_container(int group_fd) +{ + guard(mutex)(¨_vfio_container_mtx); + + uml_vfio_user_unset_container(uml_vfio_container.fd, group_fd); + uml_vfio_container.users--; +} + +static int uml_vfio_open_group(int group_id) +{ + struct uml_vfio_group *group; + int err; + + guard(mutex)(¨_vfio_groups_mtx); + + list_for_each_entry(group, ¨_vfio_groups, list) { + if (group->id == group_id) { + group->users++; + return group->fd; + } + } + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return -ENOMEM; + + group->fd = uml_vfio_user_open_group(group_id); + if (group->fd < 0) { + err = group->fd; + goto free_group; + } + + err = uml_vfio_set_container(group->fd); + if (err) + goto close_group; + + group->id = group_id; + group->users = 1; + + list_add(&group->list, ¨_vfio_groups); + + return group->fd; + +close_group: + os_close_file(group->fd); +free_group: + kfree(group); + return err; +} + +static int uml_vfio_release_group(int group_fd) +{ + struct uml_vfio_group *group; + + guard(mutex)(¨_vfio_groups_mtx); + + list_for_each_entry(group, ¨_vfio_groups, list) { + if (group->fd == group_fd) { + group->users--; + if (group->users == 0) { + uml_vfio_unset_container(group_fd); + os_close_file(group_fd); + list_del(&group->list); + kfree(group); + } + return 0; + } + } + + return -ENOENT; +} + +static irqreturn_t uml_vfio_interrupt(int unused, void *opaque) +{ + struct uml_vfio_intr_ctx *ctx = opaque; + struct uml_vfio_device *dev = ctx->dev; + int index = ctx - dev->intr_ctx; + int irqfd = dev->udev.irqfd[index]; + int irq = dev->msix_data[index]; + uint64_t v; + int r; + + do { + r = os_read_file(irqfd, &v, sizeof(v)); + if (r == sizeof(v)) + generic_handle_irq(irq); + } while (r == sizeof(v) || r == -EINTR); + WARN(r != -EAGAIN, "read returned %d\n", r); + + return IRQ_HANDLED; +} + +static int uml_vfio_activate_irq(struct uml_vfio_device *dev, int index) +{ + struct uml_vfio_intr_ctx *ctx = &dev->intr_ctx[index]; + int err, irqfd; + + if (ctx->irq >= 0) + return 0; + + irqfd = uml_vfio_user_activate_irq(&dev->udev, index); + if (irqfd < 0) + return irqfd; + + ctx->irq = um_request_irq(UM_IRQ_ALLOC, irqfd, IRQ_READ, + uml_vfio_interrupt, 0, + "vfio-uml", ctx); + if (ctx->irq < 0) { + err = ctx->irq; + goto deactivate; + } + + err = add_sigio_fd(irqfd); + if (err) + goto free_irq; + + return 0; + +free_irq: + um_free_irq(ctx->irq, ctx); + ctx->irq = -1; +deactivate: + uml_vfio_user_deactivate_irq(&dev->udev, index); + return err; +} + +static int uml_vfio_deactivate_irq(struct uml_vfio_device *dev, int index) +{ + struct uml_vfio_intr_ctx *ctx = &dev->intr_ctx[index]; + + if (ctx->irq >= 0) { + ignore_sigio_fd(dev->udev.irqfd[index]); + um_free_irq(ctx->irq, ctx); + uml_vfio_user_deactivate_irq(&dev->udev, index); + ctx->irq = -1; + } + return 0; +} + +static int uml_vfio_update_msix_cap(struct uml_vfio_device *dev, + unsigned int offset, int size, + unsigned long val) +{ + /* + * Here, we handle only the operations we care about, + * ignoring the rest. + */ + if (size == 2 && offset == dev->msix_cap + PCI_MSIX_FLAGS) { + switch (val & ~PCI_MSIX_FLAGS_QSIZE) { + case PCI_MSIX_FLAGS_ENABLE: + case 0: + return uml_vfio_user_update_irqs(&dev->udev); + } + } + return 0; +} + +static int uml_vfio_update_msix_table(struct uml_vfio_device *dev, + unsigned int offset, int size, + unsigned long val) +{ + int index; + + /* + * Here, we handle only the operations we care about, + * ignoring the rest. + */ + offset -= dev->msix_offset + PCI_MSIX_ENTRY_DATA; + + if (size != 4 || offset % PCI_MSIX_ENTRY_SIZE != 0) + return 0; + + index = offset / PCI_MSIX_ENTRY_SIZE; + if (index >= dev->udev.irq_count) + return -EINVAL; + + dev->msix_data[index] = val; + + return val ? uml_vfio_activate_irq(dev, index) : + uml_vfio_deactivate_irq(dev, index); +} + +static unsigned long __uml_vfio_cfgspace_read(struct uml_vfio_device *dev, + unsigned int offset, int size) +{ + u8 data[8]; + + memset(data, 0xff, sizeof(data)); + + if (uml_vfio_user_cfgspace_read(&dev->udev, offset, data, size)) + return ULONG_MAX; + + switch (size) { + case 1: + return data[0]; + case 2: + return le16_to_cpup((void *)data); + case 4: + return le32_to_cpup((void *)data); +#ifdef CONFIG_64BIT + case 8: + return le64_to_cpup((void *)data); +#endif + default: + return ULONG_MAX; + } +} + +static unsigned long uml_vfio_cfgspace_read(struct um_pci_device *pdev, + unsigned int offset, int size) +{ + struct uml_vfio_device *dev = to_vdev(pdev); + + return __uml_vfio_cfgspace_read(dev, offset, size); +} + +static void __uml_vfio_cfgspace_write(struct uml_vfio_device *dev, + unsigned int offset, int size, + unsigned long val) +{ + u8 data[8]; + + switch (size) { + case 1: + data[0] = (u8)val; + break; + case 2: + put_unaligned_le16(val, (void *)data); + break; + case 4: + put_unaligned_le32(val, (void *)data); + break; +#ifdef CONFIG_64BIT + case 8: + put_unaligned_le64(val, (void *)data); + break; +#endif + } + + WARN_ON(uml_vfio_user_cfgspace_write(&dev->udev, offset, data, size)); +} + +static void uml_vfio_cfgspace_write(struct um_pci_device *pdev, + unsigned int offset, int size, + unsigned long val) +{ + struct uml_vfio_device *dev = to_vdev(pdev); + + if (offset < dev->msix_cap + PCI_CAP_MSIX_SIZEOF && + offset + size > dev->msix_cap) + WARN_ON(uml_vfio_update_msix_cap(dev, offset, size, val)); + + __uml_vfio_cfgspace_write(dev, offset, size, val); +} + +static void uml_vfio_bar_copy_from(struct um_pci_device *pdev, int bar, + void *buffer, unsigned int offset, int size) +{ + struct uml_vfio_device *dev = to_vdev(pdev); + + memset(buffer, 0xff, size); + uml_vfio_user_bar_read(&dev->udev, bar, offset, buffer, size); +} + +static unsigned long uml_vfio_bar_read(struct um_pci_device *pdev, int bar, + unsigned int offset, int size) +{ + u8 data[8]; + + uml_vfio_bar_copy_from(pdev, bar, data, offset, size); + + switch (size) { + case 1: + return data[0]; + case 2: + return le16_to_cpup((void *)data); + case 4: + return le32_to_cpup((void *)data); +#ifdef CONFIG_64BIT + case 8: + return le64_to_cpup((void *)data); +#endif + default: + return ULONG_MAX; + } +} + +static void uml_vfio_bar_copy_to(struct um_pci_device *pdev, int bar, + unsigned int offset, const void *buffer, + int size) +{ + struct uml_vfio_device *dev = to_vdev(pdev); + + uml_vfio_user_bar_write(&dev->udev, bar, offset, buffer, size); +} + +static void uml_vfio_bar_write(struct um_pci_device *pdev, int bar, + unsigned int offset, int size, + unsigned long val) +{ + struct uml_vfio_device *dev = to_vdev(pdev); + u8 data[8]; + + if (bar == dev->msix_bar && offset + size > dev->msix_offset && + offset < dev->msix_offset + dev->msix_size) + WARN_ON(uml_vfio_update_msix_table(dev, offset, size, val)); + + switch (size) { + case 1: + data[0] = (u8)val; + break; + case 2: + put_unaligned_le16(val, (void *)data); + break; + case 4: + put_unaligned_le32(val, (void *)data); + break; +#ifdef CONFIG_64BIT + case 8: + put_unaligned_le64(val, (void *)data); + break; +#endif + } + + uml_vfio_bar_copy_to(pdev, bar, offset, data, size); +} + +static void uml_vfio_bar_set(struct um_pci_device *pdev, int bar, + unsigned int offset, u8 value, int size) +{ + struct uml_vfio_device *dev = to_vdev(pdev); + int i; + + for (i = 0; i < size; i++) + uml_vfio_user_bar_write(&dev->udev, bar, offset + i, &value, 1); +} + +static const struct um_pci_ops uml_vfio_um_pci_ops = { + .cfgspace_read = uml_vfio_cfgspace_read, + .cfgspace_write = uml_vfio_cfgspace_write, + .bar_read = uml_vfio_bar_read, + .bar_write = uml_vfio_bar_write, + .bar_copy_from = uml_vfio_bar_copy_from, + .bar_copy_to = uml_vfio_bar_copy_to, + .bar_set = uml_vfio_bar_set, +}; + +static u8 uml_vfio_find_capability(struct uml_vfio_device *dev, u8 cap) +{ + u8 id, pos; + u16 ent; + int ttl = 48; /* PCI_FIND_CAP_TTL */ + + pos = __uml_vfio_cfgspace_read(dev, PCI_CAPABILITY_LIST, sizeof(pos)); + + while (pos && ttl--) { + ent = __uml_vfio_cfgspace_read(dev, pos, sizeof(ent)); + + id = ent & 0xff; + if (id == 0xff) + break; + if (id == cap) + return pos; + + pos = ent >> 8; + } + + return 0; +} + +static int uml_vfio_read_msix_table(struct uml_vfio_device *dev) +{ + unsigned int off; + u16 flags; + u32 tbl; + + off = uml_vfio_find_capability(dev, PCI_CAP_ID_MSIX); + if (!off) + return -ENOTSUPP; + + dev->msix_cap = off; + + tbl = __uml_vfio_cfgspace_read(dev, off + PCI_MSIX_TABLE, sizeof(tbl)); + flags = __uml_vfio_cfgspace_read(dev, off + PCI_MSIX_FLAGS, sizeof(flags)); + + dev->msix_bar = tbl & PCI_MSIX_TABLE_BIR; + dev->msix_offset = tbl & PCI_MSIX_TABLE_OFFSET; + dev->msix_size = ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) * PCI_MSIX_ENTRY_SIZE; + + dev->msix_data = kzalloc(dev->msix_size, GFP_KERNEL); + if (!dev->msix_data) + return -ENOMEM; + + return 0; +} + +static void uml_vfio_open_device(struct uml_vfio_device *dev) +{ + struct uml_vfio_intr_ctx *ctx; + int err, group_id, i; + + group_id = uml_vfio_user_get_group_id(dev->name); + if (group_id < 0) { + pr_err("Failed to get group id (%s), error %d\n", + dev->name, group_id); + goto free_dev; + } + + dev->group = uml_vfio_open_group(group_id); + if (dev->group < 0) { + pr_err("Failed to open group %d (%s), error %d\n", + group_id, dev->name, dev->group); + goto free_dev; + } + + err = uml_vfio_user_setup_device(&dev->udev, dev->group, dev->name); + if (err) { + pr_err("Failed to setup device (%s), error %d\n", + dev->name, err); + goto release_group; + } + + err = uml_vfio_read_msix_table(dev); + if (err) { + pr_err("Failed to read MSI-X table (%s), error %d\n", + dev->name, err); + goto teardown_udev; + } + + dev->intr_ctx = kmalloc_array(dev->udev.irq_count, + sizeof(struct uml_vfio_intr_ctx), + GFP_KERNEL); + if (!dev->intr_ctx) { + pr_err("Failed to allocate interrupt context (%s)\n", + dev->name); + goto free_msix; + } + + for (i = 0; i < dev->udev.irq_count; i++) { + ctx = &dev->intr_ctx[i]; + ctx->dev = dev; + ctx->irq = -1; + } + + dev->pdev.ops = ¨_vfio_um_pci_ops; + + err = um_pci_device_register(&dev->pdev); + if (err) { + pr_err("Failed to register UM PCI device (%s), error %d\n", + dev->name, err); + goto free_intr_ctx; + } + + return; + +free_intr_ctx: + kfree(dev->intr_ctx); +free_msix: + kfree(dev->msix_data); +teardown_udev: + uml_vfio_user_teardown_device(&dev->udev); +release_group: + uml_vfio_release_group(dev->group); +free_dev: + list_del(&dev->list); + kfree(dev->name); + kfree(dev); +} + +static void uml_vfio_release_device(struct uml_vfio_device *dev) +{ + int i; + + for (i = 0; i < dev->udev.irq_count; i++) + uml_vfio_deactivate_irq(dev, i); + uml_vfio_user_update_irqs(&dev->udev); + + um_pci_device_unregister(&dev->pdev); + kfree(dev->intr_ctx); + kfree(dev->msix_data); + uml_vfio_user_teardown_device(&dev->udev); + uml_vfio_release_group(dev->group); + list_del(&dev->list); + kfree(dev->name); + kfree(dev); +} + +static struct uml_vfio_device *uml_vfio_find_device(const char *device) +{ + struct uml_vfio_device *dev; + + list_for_each_entry(dev, ¨_vfio_devices, list) { + if (!strcmp(dev->name, device)) + return dev; + } + return NULL; +} + +static int uml_vfio_cmdline_set(const char *device, const struct kernel_param *kp) +{ + struct uml_vfio_device *dev; + int fd; + + if (uml_vfio_container.fd < 0) { + fd = uml_vfio_user_open_container(); + if (fd < 0) + return fd; + uml_vfio_container.fd = fd; + } + + if (uml_vfio_find_device(device)) + return -EEXIST; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + dev->name = kstrdup(device, GFP_KERNEL); + if (!dev->name) { + kfree(dev); + return -ENOMEM; + } + + list_add_tail(&dev->list, ¨_vfio_devices); + return 0; +} + +static int uml_vfio_cmdline_get(char *buffer, const struct kernel_param *kp) +{ + return 0; +} + +static const struct kernel_param_ops uml_vfio_cmdline_param_ops = { + .set = uml_vfio_cmdline_set, + .get = uml_vfio_cmdline_get, +}; + +device_param_cb(device, ¨_vfio_cmdline_param_ops, NULL, 0400); +__uml_help(uml_vfio_cmdline_param_ops, +"vfio_uml.device=<domain:bus:slot.function>\n" +" Pass through a PCI device to UML via VFIO. Currently, only MSI-X\n" +" capable devices are supported, and it is assumed that drivers will\n" +" use MSI-X. This parameter can be specified multiple times to pass\n" +" through multiple PCI devices to UML.\n\n" +); + +static int __init uml_vfio_init(void) +{ + struct uml_vfio_device *dev, *n; + + sigio_broken(); + + /* If the opening fails, the device will be released. */ + list_for_each_entry_safe(dev, n, ¨_vfio_devices, list) + uml_vfio_open_device(dev); + + return 0; +} +late_initcall(uml_vfio_init); + +static void __exit uml_vfio_exit(void) +{ + struct uml_vfio_device *dev, *n; + + list_for_each_entry_safe(dev, n, ¨_vfio_devices, list) + uml_vfio_release_device(dev); + + if (uml_vfio_container.fd >= 0) + os_close_file(uml_vfio_container.fd); +} +module_exit(uml_vfio_exit); diff --git a/arch/um/drivers/vfio_user.c b/arch/um/drivers/vfio_user.c new file mode 100644 index 000000000000..6a45d8e14582 --- /dev/null +++ b/arch/um/drivers/vfio_user.c @@ -0,0 +1,327 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2025 Ant Group + * Author: Tiwei Bie <tiwei.btw@antgroup.com> + */ +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/eventfd.h> +#include <linux/limits.h> +#include <linux/vfio.h> +#include <linux/pci_regs.h> +#include <as-layout.h> +#include <um_malloc.h> + +#include "vfio_user.h" + +int uml_vfio_user_open_container(void) +{ + int r, fd; + + fd = open("/dev/vfio/vfio", O_RDWR); + if (fd < 0) + return -errno; + + r = ioctl(fd, VFIO_GET_API_VERSION); + if (r != VFIO_API_VERSION) { + r = r < 0 ? -errno : -EINVAL; + goto error; + } + + r = ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU); + if (r <= 0) { + r = r < 0 ? -errno : -EINVAL; + goto error; + } + + return fd; + +error: + close(fd); + return r; +} + +int uml_vfio_user_setup_iommu(int container) +{ + /* + * This is a bit tricky. See the big comment in + * vhost_user_set_mem_table() in virtio_uml.c. + */ + unsigned long reserved = uml_reserved - uml_physmem; + struct vfio_iommu_type1_dma_map dma_map = { + .argsz = sizeof(dma_map), + .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, + .vaddr = uml_reserved, + .iova = reserved, + .size = physmem_size - reserved, + }; + + if (ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU) < 0) + return -errno; + + if (ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map) < 0) + return -errno; + + return 0; +} + +int uml_vfio_user_get_group_id(const char *device) +{ + char *path, *buf, *end; + const char *name; + int r; + + path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL); + if (!path) + return -ENOMEM; + + sprintf(path, "/sys/bus/pci/devices/%s/iommu_group", device); + + buf = uml_kmalloc(PATH_MAX + 1, UM_GFP_KERNEL); + if (!buf) { + r = -ENOMEM; + goto free_path; + } + + r = readlink(path, buf, PATH_MAX); + if (r < 0) { + r = -errno; + goto free_buf; + } + buf[r] = '\0'; + + name = basename(buf); + + r = strtoul(name, &end, 10); + if (*end != '\0' || end == name) { + r = -EINVAL; + goto free_buf; + } + +free_buf: + kfree(buf); +free_path: + kfree(path); + return r; +} + +int uml_vfio_user_open_group(int group_id) +{ + char *path; + int fd; + + path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL); + if (!path) + return -ENOMEM; + + sprintf(path, "/dev/vfio/%d", group_id); + + fd = open(path, O_RDWR); + if (fd < 0) { + fd = -errno; + goto out; + } + +out: + kfree(path); + return fd; +} + +int uml_vfio_user_set_container(int container, int group) +{ + if (ioctl(group, VFIO_GROUP_SET_CONTAINER, &container) < 0) + return -errno; + return 0; +} + +int uml_vfio_user_unset_container(int container, int group) +{ + if (ioctl(group, VFIO_GROUP_UNSET_CONTAINER, &container) < 0) + return -errno; + return 0; +} + +static int vfio_set_irqs(int device, int start, int count, int *irqfd) +{ + struct vfio_irq_set *irq_set; + int argsz = sizeof(*irq_set) + sizeof(*irqfd) * count; + int err = 0; + + irq_set = uml_kmalloc(argsz, UM_GFP_KERNEL); + if (!irq_set) + return -ENOMEM; + + irq_set->argsz = argsz; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set->start = start; + irq_set->count = count; + memcpy(irq_set->data, irqfd, sizeof(*irqfd) * count); + + if (ioctl(device, VFIO_DEVICE_SET_IRQS, irq_set) < 0) { + err = -errno; + goto out; + } + +out: + kfree(irq_set); + return err; +} + +int uml_vfio_user_setup_device(struct uml_vfio_user_device *dev, + int group, const char *device) +{ + struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; + struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; + int err, i; + + dev->device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, device); + if (dev->device < 0) + return -errno; + + if (ioctl(dev->device, VFIO_DEVICE_GET_INFO, &device_info) < 0) { + err = -errno; + goto close_device; + } + + dev->num_regions = device_info.num_regions; + if (dev->num_regions > VFIO_PCI_CONFIG_REGION_INDEX + 1) + dev->num_regions = VFIO_PCI_CONFIG_REGION_INDEX + 1; + + dev->region = uml_kmalloc(sizeof(*dev->region) * dev->num_regions, + UM_GFP_KERNEL); + if (!dev->region) { + err = -ENOMEM; + goto close_device; + } + + for (i = 0; i < dev->num_regions; i++) { + struct vfio_region_info region = { + .argsz = sizeof(region), + .index = i, + }; + if (ioctl(dev->device, VFIO_DEVICE_GET_REGION_INFO, ®ion) < 0) { + err = -errno; + goto free_region; + } + dev->region[i].size = region.size; + dev->region[i].offset = region.offset; + } + + /* Only MSI-X is supported currently. */ + irq_info.index = VFIO_PCI_MSIX_IRQ_INDEX; + if (ioctl(dev->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0) { + err = -errno; + goto free_region; + } + + dev->irq_count = irq_info.count; + + dev->irqfd = uml_kmalloc(sizeof(int) * dev->irq_count, UM_GFP_KERNEL); + if (!dev->irqfd) { + err = -ENOMEM; + goto free_region; + } + + memset(dev->irqfd, -1, sizeof(int) * dev->irq_count); + + err = vfio_set_irqs(dev->device, 0, dev->irq_count, dev->irqfd); + if (err) + goto free_irqfd; + + return 0; + +free_irqfd: + kfree(dev->irqfd); +free_region: + kfree(dev->region); +close_device: + close(dev->device); + return err; +} + +void uml_vfio_user_teardown_device(struct uml_vfio_user_device *dev) +{ + kfree(dev->irqfd); + kfree(dev->region); + close(dev->device); +} + +int uml_vfio_user_activate_irq(struct uml_vfio_user_device *dev, int index) +{ + int irqfd; + + irqfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (irqfd < 0) + return -errno; + + dev->irqfd[index] = irqfd; + return irqfd; +} + +void uml_vfio_user_deactivate_irq(struct uml_vfio_user_device *dev, int index) +{ + close(dev->irqfd[index]); + dev->irqfd[index] = -1; +} + +int uml_vfio_user_update_irqs(struct uml_vfio_user_device *dev) +{ + return vfio_set_irqs(dev->device, 0, dev->irq_count, dev->irqfd); +} + +static int vfio_region_read(struct uml_vfio_user_device *dev, unsigned int index, + uint64_t offset, void *buf, uint64_t size) +{ + if (index >= dev->num_regions || offset + size > dev->region[index].size) + return -EINVAL; + + if (pread(dev->device, buf, size, dev->region[index].offset + offset) < 0) + return -errno; + + return 0; +} + +static int vfio_region_write(struct uml_vfio_user_device *dev, unsigned int index, + uint64_t offset, const void *buf, uint64_t size) +{ + if (index >= dev->num_regions || offset + size > dev->region[index].size) + return -EINVAL; + + if (pwrite(dev->device, buf, size, dev->region[index].offset + offset) < 0) + return -errno; + + return 0; +} + +int uml_vfio_user_cfgspace_read(struct uml_vfio_user_device *dev, + unsigned int offset, void *buf, int size) +{ + return vfio_region_read(dev, VFIO_PCI_CONFIG_REGION_INDEX, + offset, buf, size); +} + +int uml_vfio_user_cfgspace_write(struct uml_vfio_user_device *dev, + unsigned int offset, const void *buf, int size) +{ + return vfio_region_write(dev, VFIO_PCI_CONFIG_REGION_INDEX, + offset, buf, size); +} + +int uml_vfio_user_bar_read(struct uml_vfio_user_device *dev, int bar, + unsigned int offset, void *buf, int size) +{ + return vfio_region_read(dev, bar, offset, buf, size); +} + +int uml_vfio_user_bar_write(struct uml_vfio_user_device *dev, int bar, + unsigned int offset, const void *buf, int size) +{ + return vfio_region_write(dev, bar, offset, buf, size); +} diff --git a/arch/um/drivers/vfio_user.h b/arch/um/drivers/vfio_user.h new file mode 100644 index 000000000000..75535e05059b --- /dev/null +++ b/arch/um/drivers/vfio_user.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_VFIO_USER_H +#define __UM_VFIO_USER_H + +struct uml_vfio_user_device { + int device; + + struct { + uint64_t size; + uint64_t offset; + } *region; + int num_regions; + + int32_t *irqfd; + int irq_count; +}; + +int uml_vfio_user_open_container(void); +int uml_vfio_user_setup_iommu(int container); + +int uml_vfio_user_get_group_id(const char *device); +int uml_vfio_user_open_group(int group_id); +int uml_vfio_user_set_container(int container, int group); +int uml_vfio_user_unset_container(int container, int group); + +int uml_vfio_user_setup_device(struct uml_vfio_user_device *dev, + int group, const char *device); +void uml_vfio_user_teardown_device(struct uml_vfio_user_device *dev); + +int uml_vfio_user_activate_irq(struct uml_vfio_user_device *dev, int index); +void uml_vfio_user_deactivate_irq(struct uml_vfio_user_device *dev, int index); +int uml_vfio_user_update_irqs(struct uml_vfio_user_device *dev); + +int uml_vfio_user_cfgspace_read(struct uml_vfio_user_device *dev, + unsigned int offset, void *buf, int size); +int uml_vfio_user_cfgspace_write(struct uml_vfio_user_device *dev, + unsigned int offset, const void *buf, int size); + +int uml_vfio_user_bar_read(struct uml_vfio_user_device *dev, int bar, + unsigned int offset, void *buf, int size); +int uml_vfio_user_bar_write(struct uml_vfio_user_device *dev, int bar, + unsigned int offset, const void *buf, int size); + +#endif /* __UM_VFIO_USER_H */ diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c index b83b5a765d4e..0fe207ca4b72 100644 --- a/arch/um/drivers/virt-pci.c +++ b/arch/um/drivers/virt-pci.c @@ -538,11 +538,6 @@ void um_pci_platform_device_unregister(struct um_pci_device *dev) static int __init um_pci_init(void) { - struct irq_domain_info inner_domain_info = { - .size = MAX_MSI_VECTORS, - .hwirq_max = MAX_MSI_VECTORS, - .ops = &um_pci_inner_domain_ops, - }; int err, i; WARN_ON(logic_iomem_add_region(&virt_cfgspace_resource, @@ -564,10 +559,10 @@ static int __init um_pci_init(void) goto free; } - inner_domain_info.fwnode = um_pci_fwnode; - um_pci_inner_domain = irq_domain_instantiate(&inner_domain_info); - if (IS_ERR(um_pci_inner_domain)) { - err = PTR_ERR(um_pci_inner_domain); + um_pci_inner_domain = irq_domain_create_linear(um_pci_fwnode, MAX_MSI_VECTORS, + &um_pci_inner_domain_ops, NULL); + if (!um_pci_inner_domain) { + err = -ENOMEM; goto free; } @@ -602,7 +597,7 @@ static int __init um_pci_init(void) return 0; free: - if (!IS_ERR_OR_NULL(um_pci_inner_domain)) + if (um_pci_inner_domain) irq_domain_remove(um_pci_inner_domain); if (um_pci_fwnode) irq_domain_free_fwnode(um_pci_fwnode); diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c index e4316c7981e8..d05918e422f9 100644 --- a/arch/um/drivers/xterm.c +++ b/arch/um/drivers/xterm.c @@ -81,7 +81,7 @@ __uml_setup("xterm=", xterm_setup, " '<switch> command arg1 arg2 ...'.\n" " The default values are 'xterm=" CONFIG_XTERM_CHAN_DEFAULT_EMULATOR ",-T,-e'.\n" -" Values for gnome-terminal are 'xterm=gnome-terminal,-t,-x'.\n\n" +" Values for gnome-terminal are 'xterm=gnome-terminal,-t,--'.\n\n" ); static int xterm_open(int input, int output, int primary, void *d, @@ -97,12 +97,9 @@ static int xterm_open(int input, int output, int primary, void *d, if (access(argv[4], X_OK) < 0) argv[4] = "port-helper"; - /* - * Check that DISPLAY is set, this doesn't guarantee the xterm - * will work but w/o it we can be pretty sure it won't. - */ - if (getenv("DISPLAY") == NULL) { - printk(UM_KERN_ERR "xterm_open: $DISPLAY not set.\n"); + /* Ensure we are running on Xorg or Wayland. */ + if (!getenv("DISPLAY") && !getenv("WAYLAND_DISPLAY")) { + printk(UM_KERN_ERR "xterm_open : neither $DISPLAY nor $WAYLAND_DISPLAY is set.\n"); return -ENODEV; } diff --git a/arch/um/include/asm/asm-prototypes.h b/arch/um/include/asm/asm-prototypes.h index 5898a26daa0d..408b31d59127 100644 --- a/arch/um/include/asm/asm-prototypes.h +++ b/arch/um/include/asm/asm-prototypes.h @@ -1 +1,6 @@ #include <asm-generic/asm-prototypes.h> +#include <asm/checksum.h> + +#ifdef CONFIG_UML_X86 +extern void cmpxchg8b_emu(void); +#endif diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h index 749dfe8512e8..36dbedd1af48 100644 --- a/arch/um/include/asm/irq.h +++ b/arch/um/include/asm/irq.h @@ -13,17 +13,18 @@ #define TELNETD_IRQ 8 #define XTERM_IRQ 9 #define RANDOM_IRQ 10 +#define SIGCHLD_IRQ 11 #ifdef CONFIG_UML_NET_VECTOR -#define VECTOR_BASE_IRQ (RANDOM_IRQ + 1) +#define VECTOR_BASE_IRQ (SIGCHLD_IRQ + 1) #define VECTOR_IRQ_SPACE 8 #define UM_FIRST_DYN_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ) #else -#define UM_FIRST_DYN_IRQ (RANDOM_IRQ + 1) +#define UM_FIRST_DYN_IRQ (SIGCHLD_IRQ + 1) #endif diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h index a3eaca41ff61..4d0e4239f3cc 100644 --- a/arch/um/include/asm/mmu.h +++ b/arch/um/include/asm/mmu.h @@ -6,11 +6,14 @@ #ifndef __ARCH_UM_MMU_H #define __ARCH_UM_MMU_H +#include "linux/types.h" #include <mm_id.h> typedef struct mm_context { struct mm_id id; + struct list_head list; + /* Address range in need of a TLB sync */ unsigned long sync_tlb_range_from; unsigned long sync_tlb_range_to; diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index ca2a519d53ab..24fdea6f88c3 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -314,7 +314,7 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr); ((swp_entry_t) { pte_val(pte_mkuptodate(pte)) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_get_bits(pte, _PAGE_SWP_EXCLUSIVE); } diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h index 73f3a4792ed8..8ca66a1918c3 100644 --- a/arch/um/include/shared/common-offsets.h +++ b/arch/um/include/shared/common-offsets.h @@ -14,3 +14,7 @@ DEFINE(UM_THREAD_SIZE, THREAD_SIZE); DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); + +DEFINE(UM_KERN_GDT_ENTRY_TLS_ENTRIES, GDT_ENTRY_TLS_ENTRIES); + +DEFINE(UM_SECCOMP_ARCH_NATIVE, SECCOMP_ARCH_NATIVE); diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h index 88835b52ae2b..746abc24a5d5 100644 --- a/arch/um/include/shared/irq_user.h +++ b/arch/um/include/shared/irq_user.h @@ -17,6 +17,8 @@ enum um_irq_type { struct siginfo; extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs, void *mc); +extern void sigchld_handler(int sig, struct siginfo *unused_si, + struct uml_pt_regs *regs, void *mc); void sigio_run_timetravel_handlers(void); extern void free_irq_by_fd(int fd); extern void deactivate_fd(int fd, int irqnum); diff --git a/arch/um/include/shared/net_kern.h b/arch/um/include/shared/net_kern.h deleted file mode 100644 index 67b2e9a1f2e5..000000000000 --- a/arch/um/include/shared/net_kern.h +++ /dev/null @@ -1,69 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#ifndef __UM_NET_KERN_H -#define __UM_NET_KERN_H - -#include <linux/netdevice.h> -#include <linux/platform_device.h> -#include <linux/skbuff.h> -#include <linux/socket.h> -#include <linux/list.h> -#include <linux/workqueue.h> - -struct uml_net { - struct list_head list; - struct net_device *dev; - struct platform_device pdev; - int index; -}; - -struct uml_net_private { - struct list_head list; - spinlock_t lock; - struct net_device *dev; - struct timer_list tl; - - struct work_struct work; - int fd; - unsigned char mac[ETH_ALEN]; - int max_packet; - unsigned short (*protocol)(struct sk_buff *); - int (*open)(void *); - void (*close)(int, void *); - void (*remove)(void *); - int (*read)(int, struct sk_buff *skb, struct uml_net_private *); - int (*write)(int, struct sk_buff *skb, struct uml_net_private *); - - void (*add_address)(unsigned char *, unsigned char *, void *); - void (*delete_address)(unsigned char *, unsigned char *, void *); - char user[]; -}; - -struct net_kern_info { - void (*init)(struct net_device *, void *); - unsigned short (*protocol)(struct sk_buff *); - int (*read)(int, struct sk_buff *skb, struct uml_net_private *); - int (*write)(int, struct sk_buff *skb, struct uml_net_private *); -}; - -struct transport { - struct list_head list; - const char *name; - int (* const setup)(char *, char **, void *); - const struct net_user_info *user; - const struct net_kern_info *kern; - const int private_size; - const int setup_size; -}; - -extern int tap_setup_common(char *str, char *type, char **dev_name, - char **mac_out, char **gate_addr); -extern void register_transport(struct transport *new); -extern unsigned short eth_protocol(struct sk_buff *skb); -extern void uml_net_setup_etheraddr(struct net_device *dev, char *str); - - -#endif diff --git a/arch/um/include/shared/net_user.h b/arch/um/include/shared/net_user.h deleted file mode 100644 index ba92a4d93531..000000000000 --- a/arch/um/include/shared/net_user.h +++ /dev/null @@ -1,52 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#ifndef __UM_NET_USER_H__ -#define __UM_NET_USER_H__ - -#define ETH_ADDR_LEN (6) -#define ETH_HEADER_ETHERTAP (16) -#define ETH_HEADER_OTHER (26) /* 14 for ethernet + VLAN + MPLS for crazy people */ -#define ETH_MAX_PACKET (1500) - -#define UML_NET_VERSION (4) - -struct net_user_info { - int (*init)(void *, void *); - int (*open)(void *); - void (*close)(int, void *); - void (*remove)(void *); - void (*add_address)(unsigned char *, unsigned char *, void *); - void (*delete_address)(unsigned char *, unsigned char *, void *); - int max_packet; - int mtu; -}; - -extern void iter_addresses(void *d, void (*cb)(unsigned char *, - unsigned char *, void *), - void *arg); - -extern void *get_output_buffer(int *len_out); -extern void free_output_buffer(void *buffer); - -extern int tap_open_common(void *dev, char *gate_addr); -extern void tap_check_ips(char *gate_addr, unsigned char *eth_addr); - -extern void read_output(int fd, char *output_out, int len); - -extern int net_read(int fd, void *buf, int len); -extern int net_recvfrom(int fd, void *buf, int len); -extern int net_write(int fd, void *buf, int len); -extern int net_send(int fd, void *buf, int len); -extern int net_sendto(int fd, void *buf, int len, void *to, int sock_len); - -extern void open_addr(unsigned char *addr, unsigned char *netmask, void *arg); -extern void close_addr(unsigned char *addr, unsigned char *netmask, void *arg); - -extern char *split_if_spec(char *str, ...); - -extern int dev_netmask(void *d, void *m); - -#endif diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 152a60080d5b..b35cc8ce333b 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -143,7 +143,6 @@ extern int os_access(const char *file, int mode); extern int os_set_exec_close(int fd); extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg); extern int os_get_ifname(int fd, char *namebuf); -extern int os_set_slip(int fd); extern int os_mode_fd(int fd, int mode); extern int os_seek_file(int fd, unsigned long long offset); @@ -198,6 +197,7 @@ extern int create_mem_file(unsigned long long len); extern void report_enomem(void); /* process.c */ +pid_t os_reap_child(void); extern void os_alarm_process(int pid); extern void os_kill_process(int pid, int reap_child); extern void os_kill_ptraced_process(int pid, int reap_child); @@ -286,7 +286,7 @@ int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len); /* skas/process.c */ extern int is_skas_winch(int pid, int fd, void *data); -extern int start_userspace(unsigned long stub_stack); +extern int start_userspace(struct mm_id *mm_id); extern void userspace(struct uml_pt_regs *regs); extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)); extern void switch_threads(jmp_buf *me, jmp_buf *you); diff --git a/arch/um/include/shared/skas/mm_id.h b/arch/um/include/shared/skas/mm_id.h index 140388c282f6..89df9a55fbea 100644 --- a/arch/um/include/shared/skas/mm_id.h +++ b/arch/um/include/shared/skas/mm_id.h @@ -6,12 +6,21 @@ #ifndef __MM_ID_H #define __MM_ID_H +#define STUB_MAX_FDS 4 + struct mm_id { int pid; unsigned long stack; int syscall_data_len; + + /* Only used with SECCOMP mode */ + int sock; + int syscall_fd_num; + int syscall_fd_map[STUB_MAX_FDS]; }; void __switch_mm(struct mm_id *mm_idp); +void notify_mm_kill(int pid); + #endif diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h index 85c50122ab98..7d1de4cab551 100644 --- a/arch/um/include/shared/skas/skas.h +++ b/arch/um/include/shared/skas/skas.h @@ -8,6 +8,7 @@ #include <sysdep/ptrace.h> +extern int using_seccomp; extern int userspace_pid[]; extern void new_thread_handler(void); diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h index 81a4cace032c..c261a77a32f6 100644 --- a/arch/um/include/shared/skas/stub-data.h +++ b/arch/um/include/shared/skas/stub-data.h @@ -11,8 +11,15 @@ #include <linux/compiler_types.h> #include <as-layout.h> #include <sysdep/tls.h> +#include <sysdep/stub-data.h> +#include <mm_id.h> + +#define FUTEX_IN_CHILD 0 +#define FUTEX_IN_KERN 1 struct stub_init_data { + int seccomp; + unsigned long stub_start; int stub_code_fd; @@ -20,7 +27,8 @@ struct stub_init_data { int stub_data_fd; unsigned long stub_data_offset; - unsigned long segv_handler; + unsigned long signal_handler; + unsigned long signal_restorer; }; #define STUB_NEXT_SYSCALL(s) \ @@ -52,6 +60,16 @@ struct stub_data { /* 128 leaves enough room for additional fields in the struct */ struct stub_syscall syscall_data[(UM_KERN_PAGE_SIZE - 128) / sizeof(struct stub_syscall)] __aligned(16); + /* data shared with signal handler (only used in seccomp mode) */ + short restart_wait; + unsigned int futex; + int signal; + unsigned short si_offset; + unsigned short mctx_offset; + + /* seccomp architecture specific state restore */ + struct stub_data_arch arch_data; + /* Stack for our signal handlers and for calling into . */ unsigned char sigstack[UM_KERN_PAGE_SIZE] __aligned(UM_KERN_PAGE_SIZE); }; diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index 4df1cd0d2017..b8f4e9281599 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -12,7 +12,7 @@ CPPFLAGS_vmlinux.lds := -DSTART=$(LDS_START) \ -DELF_ARCH=$(LDS_ELF_ARCH) \ -DELF_FORMAT=$(LDS_ELF_FORMAT) \ $(LDS_EXTRA) -extra-y := vmlinux.lds +always-$(KBUILD_BUILTIN) := vmlinux.lds obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \ physmem.o process.o ptrace.o reboot.o sigio.o \ @@ -25,7 +25,6 @@ obj-$(CONFIG_GPROF) += gprof_syms.o obj-$(CONFIG_OF) += dtb.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_STACKTRACE) += stacktrace.o -obj-$(CONFIG_GENERIC_PCI_IOMAP) += ioport.o USER_OBJS := config.o diff --git a/arch/um/kernel/ioport.c b/arch/um/kernel/ioport.c deleted file mode 100644 index 7220615b3beb..000000000000 --- a/arch/um/kernel/ioport.c +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2021 Intel Corporation - * Author: Johannes Berg <johannes@sipsolutions.net> - */ -#include <asm/iomap.h> -#include <asm-generic/pci_iomap.h> - -void __iomem *__pci_ioport_map(struct pci_dev *dev, unsigned long port, - unsigned int nr) -{ - return NULL; -} diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index abe8f30a521c..0dfaf96bb7da 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -690,3 +690,9 @@ void __init init_IRQ(void) /* Initialize EPOLL Loop */ os_setup_epoll(); } + +void sigchld_handler(int sig, struct siginfo *unused_si, + struct uml_pt_regs *regs, void *mc) +{ + do_IRQ(SIGCHLD_IRQ, regs); +} diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 0eb5a1d3ba70..849fafa4b54f 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -8,6 +8,7 @@ #include <linux/sched/signal.h> #include <linux/slab.h> +#include <shared/irq_kern.h> #include <asm/pgalloc.h> #include <asm/sections.h> #include <asm/mmu_context.h> @@ -19,6 +20,9 @@ /* Ensure the stub_data struct covers the allocated area */ static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE); +spinlock_t mm_list_lock; +struct list_head mm_list; + int init_new_context(struct task_struct *task, struct mm_struct *mm) { struct mm_id *new_id = &mm->context.id; @@ -31,14 +35,14 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) new_id->stack = stack; - block_signals_trace(); - new_id->pid = start_userspace(stack); - unblock_signals_trace(); + scoped_guard(spinlock_irqsave, &mm_list_lock) { + /* Insert into list, used for lookups when the child dies */ + list_add(&mm->context.list, &mm_list); + } - if (new_id->pid < 0) { - ret = new_id->pid; + ret = start_userspace(new_id); + if (ret < 0) goto out_free; - } /* Ensure the new MM is clean and nothing unwanted is mapped */ unmap(new_id, 0, STUB_START); @@ -60,13 +64,82 @@ void destroy_context(struct mm_struct *mm) * zero, resulting in a kill(0), which will result in the * whole UML suddenly dying. Also, cover negative and * 1 cases, since they shouldn't happen either. + * + * Negative cases happen if the child died unexpectedly. */ - if (mmu->id.pid < 2) { + if (mmu->id.pid >= 0 && mmu->id.pid < 2) { printk(KERN_ERR "corrupt mm_context - pid = %d\n", mmu->id.pid); return; } - os_kill_ptraced_process(mmu->id.pid, 1); + + if (mmu->id.pid > 0) { + os_kill_ptraced_process(mmu->id.pid, 1); + mmu->id.pid = -1; + } + + if (using_seccomp && mmu->id.sock) + os_close_file(mmu->id.sock); free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES)); + + guard(spinlock_irqsave)(&mm_list_lock); + + list_del(&mm->context.list); +} + +static irqreturn_t mm_sigchld_irq(int irq, void* dev) +{ + struct mm_context *mm_context; + pid_t pid; + + guard(spinlock)(&mm_list_lock); + + while ((pid = os_reap_child()) > 0) { + /* + * A child died, check if we have an MM with the PID. This is + * only relevant in SECCOMP mode (as ptrace will fail anyway). + * + * See wait_stub_done_seccomp for more details. + */ + list_for_each_entry(mm_context, &mm_list, list) { + if (mm_context->id.pid == pid) { + struct stub_data *stub_data; + printk("Unexpectedly lost MM child! Affected tasks will segfault."); + + /* Marks the MM as dead */ + mm_context->id.pid = -1; + + /* + * NOTE: If SMP is implemented, a futex_wake + * needs to be added here. + */ + stub_data = (void *)mm_context->id.stack; + stub_data->futex = FUTEX_IN_KERN; + + /* + * NOTE: Currently executing syscalls by + * affected tasks may finish normally. + */ + break; + } + } + } + + return IRQ_HANDLED; +} + +static int __init init_child_tracking(void) +{ + int err; + + spin_lock_init(&mm_list_lock); + INIT_LIST_HEAD(&mm_list); + + err = request_irq(SIGCHLD_IRQ, mm_sigchld_irq, 0, "SIGCHLD", NULL); + if (err < 0) + panic("Failed to register SIGCHLD IRQ: %d", err); + + return 0; } +early_initcall(init_child_tracking) diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c index 796fc266d3bb..67cab46a602c 100644 --- a/arch/um/kernel/skas/stub.c +++ b/arch/um/kernel/skas/stub.c @@ -5,21 +5,54 @@ #include <sysdep/stub.h> -static __always_inline int syscall_handler(struct stub_data *d) +#include <linux/futex.h> +#include <sys/socket.h> +#include <errno.h> + +/* + * Known security issues + * + * Userspace can jump to this address to execute *any* syscall that is + * permitted by the stub. As we will return afterwards, it can do + * whatever it likes, including: + * - Tricking the kernel into handing out the memory FD + * - Using this memory FD to read/write all physical memory + * - Running in parallel to the kernel processing a syscall + * (possibly creating data races?) + * - Blocking e.g. SIGALRM to avoid time based scheduling + * + * To avoid this, the permitted location for each syscall needs to be + * checked for in the SECCOMP filter (which is reasonably simple). Also, + * more care will need to go into considerations how the code might be + * tricked by using a prepared stack (or even modifying the stack from + * another thread in case SMP support is added). + * + * As for the SIGALRM, the best counter measure will be to check in the + * kernel that the process is reporting back the SIGALRM in a timely + * fashion. + */ +static __always_inline int syscall_handler(int fd_map[STUB_MAX_FDS]) { + struct stub_data *d = get_stub_data(); int i; unsigned long res; + int fd; for (i = 0; i < d->syscall_data_len; i++) { struct stub_syscall *sc = &d->syscall_data[i]; switch (sc->syscall) { case STUB_SYSCALL_MMAP: + if (fd_map) + fd = fd_map[sc->mem.fd]; + else + fd = sc->mem.fd; + res = stub_syscall6(STUB_MMAP_NR, sc->mem.addr, sc->mem.length, sc->mem.prot, MAP_SHARED | MAP_FIXED, - sc->mem.fd, sc->mem.offset); + fd, sc->mem.offset); if (res != sc->mem.addr) { d->err = res; d->syscall_data_len = i; @@ -51,9 +84,98 @@ static __always_inline int syscall_handler(struct stub_data *d) void __section(".__syscall_stub") stub_syscall_handler(void) { + syscall_handler(NULL); + + trap_myself(); +} + +void __section(".__syscall_stub") +stub_signal_interrupt(int sig, siginfo_t *info, void *p) +{ struct stub_data *d = get_stub_data(); + char rcv_data; + union { + char data[CMSG_SPACE(sizeof(int) * STUB_MAX_FDS)]; + struct cmsghdr align; + } ctrl = {}; + struct iovec iov = { + .iov_base = &rcv_data, + .iov_len = 1, + }; + struct msghdr msghdr = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = &ctrl, + .msg_controllen = sizeof(ctrl), + }; + ucontext_t *uc = p; + struct cmsghdr *fd_msg; + int *fd_map; + int num_fds; + long res; - syscall_handler(d); + d->signal = sig; + d->si_offset = (unsigned long)info - (unsigned long)&d->sigstack[0]; + d->mctx_offset = (unsigned long)&uc->uc_mcontext - (unsigned long)&d->sigstack[0]; - trap_myself(); +restart_wait: + d->futex = FUTEX_IN_KERN; + do { + res = stub_syscall3(__NR_futex, (unsigned long)&d->futex, + FUTEX_WAKE, 1); + } while (res == -EINTR); + + do { + res = stub_syscall4(__NR_futex, (unsigned long)&d->futex, + FUTEX_WAIT, FUTEX_IN_KERN, 0); + } while (res == -EINTR || d->futex == FUTEX_IN_KERN); + + if (res < 0 && res != -EAGAIN) + stub_syscall1(__NR_exit_group, 1); + + if (d->syscall_data_len) { + /* Read passed FDs (if any) */ + do { + res = stub_syscall3(__NR_recvmsg, 0, (unsigned long)&msghdr, 0); + } while (res == -EINTR); + + /* We should never have a receive error (other than -EAGAIN) */ + if (res < 0 && res != -EAGAIN) + stub_syscall1(__NR_exit_group, 1); + + /* Receive the FDs */ + num_fds = 0; + fd_msg = msghdr.msg_control; + fd_map = (void *)&CMSG_DATA(fd_msg); + if (res == iov.iov_len && msghdr.msg_controllen > sizeof(struct cmsghdr)) + num_fds = (fd_msg->cmsg_len - CMSG_LEN(0)) / sizeof(int); + + /* Try running queued syscalls. */ + res = syscall_handler(fd_map); + + while (num_fds) + stub_syscall2(__NR_close, fd_map[--num_fds], 0); + } else { + res = 0; + } + + if (res < 0 || d->restart_wait) { + /* Report SIGSYS if we restart. */ + d->signal = SIGSYS; + d->restart_wait = 0; + + goto restart_wait; + } + + /* Restore arch dependent state that is not part of the mcontext */ + stub_seccomp_restore_state(&d->arch_data); + + /* Return so that the host modified mcontext is restored. */ +} + +void __section(".__syscall_stub") +stub_signal_restorer(void) +{ + /* We must not have anything on the stack when doing rt_sigreturn */ + stub_syscall0(__NR_rt_sigreturn); } diff --git a/arch/um/kernel/skas/stub_exe.c b/arch/um/kernel/skas/stub_exe.c index 23c99b285e82..cbafaa684e66 100644 --- a/arch/um/kernel/skas/stub_exe.c +++ b/arch/um/kernel/skas/stub_exe.c @@ -1,8 +1,12 @@ #include <sys/ptrace.h> #include <sys/prctl.h> +#include <sys/fcntl.h> #include <asm/unistd.h> #include <sysdep/stub.h> #include <stub-data.h> +#include <linux/filter.h> +#include <linux/seccomp.h> +#include <generated/asm-offsets.h> void _start(void); @@ -25,8 +29,6 @@ noinline static void real_init(void) } sa = { /* Need to set SA_RESTORER (but the handler never returns) */ .sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO | 0x04000000, - /* no need to mask any signals */ - .sa_mask = 0, }; /* set a nice name */ @@ -35,13 +37,20 @@ noinline static void real_init(void) /* Make sure this process dies if the kernel dies */ stub_syscall2(__NR_prctl, PR_SET_PDEATHSIG, SIGKILL); + /* Needed in SECCOMP mode (and safe to do anyway) */ + stub_syscall5(__NR_prctl, PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + /* read information from STDIN and close it */ res = stub_syscall3(__NR_read, 0, (unsigned long)&init_data, sizeof(init_data)); if (res != sizeof(init_data)) stub_syscall1(__NR_exit, 10); - stub_syscall1(__NR_close, 0); + /* In SECCOMP mode, FD 0 is a socket and is later used for FD passing */ + if (!init_data.seccomp) + stub_syscall1(__NR_close, 0); + else + stub_syscall3(__NR_fcntl, 0, F_SETFL, O_NONBLOCK); /* map stub code + data */ res = stub_syscall6(STUB_MMAP_NR, @@ -59,22 +68,148 @@ noinline static void real_init(void) if (res != init_data.stub_start + UM_KERN_PAGE_SIZE) stub_syscall1(__NR_exit, 12); + /* In SECCOMP mode, we only need the signalling FD from now on */ + if (init_data.seccomp) { + res = stub_syscall3(__NR_close_range, 1, ~0U, 0); + if (res != 0) + stub_syscall1(__NR_exit, 13); + } + /* setup signal stack inside stub data */ stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE; stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0); - /* register SIGSEGV handler */ - sa.sa_handler_ = (void *) init_data.segv_handler; - res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, (unsigned long)&sa, 0, - sizeof(sa.sa_mask)); - if (res != 0) - stub_syscall1(__NR_exit, 13); + /* register signal handlers */ + sa.sa_handler_ = (void *) init_data.signal_handler; + sa.sa_restorer = (void *) init_data.signal_restorer; + if (!init_data.seccomp) { + /* In ptrace mode, the SIGSEGV handler never returns */ + sa.sa_mask = 0; + + res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); + if (res != 0) + stub_syscall1(__NR_exit, 14); + } else { + /* SECCOMP mode uses rt_sigreturn, need to mask all signals */ + sa.sa_mask = ~0ULL; + + res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); + if (res != 0) + stub_syscall1(__NR_exit, 15); + + res = stub_syscall4(__NR_rt_sigaction, SIGSYS, + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); + if (res != 0) + stub_syscall1(__NR_exit, 16); + + res = stub_syscall4(__NR_rt_sigaction, SIGALRM, + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); + if (res != 0) + stub_syscall1(__NR_exit, 17); + + res = stub_syscall4(__NR_rt_sigaction, SIGTRAP, + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); + if (res != 0) + stub_syscall1(__NR_exit, 18); + + res = stub_syscall4(__NR_rt_sigaction, SIGILL, + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); + if (res != 0) + stub_syscall1(__NR_exit, 19); + + res = stub_syscall4(__NR_rt_sigaction, SIGFPE, + (unsigned long)&sa, 0, sizeof(sa.sa_mask)); + if (res != 0) + stub_syscall1(__NR_exit, 20); + } + + /* + * If in seccomp mode, install the SECCOMP filter and trigger a syscall. + * Otherwise set PTRACE_TRACEME and do a SIGSTOP. + */ + if (init_data.seccomp) { + struct sock_filter filter[] = { +#if __BITS_PER_LONG > 32 + /* [0] Load upper 32bit of instruction pointer from seccomp_data */ + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + (offsetof(struct seccomp_data, instruction_pointer) + 4)), + + /* [1] Jump forward 3 instructions if the upper address is not identical */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) >> 32, 0, 3), +#endif + /* [2] Load lower 32bit of instruction pointer from seccomp_data */ + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + (offsetof(struct seccomp_data, instruction_pointer))), + + /* [3] Mask out lower bits */ + BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xfffff000), + + /* [4] Jump to [6] if the lower bits are not on the expected page */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) & 0xfffff000, 1, 0), + + /* [5] Trap call, allow */ + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP), + + /* [6,7] Check architecture */ + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + offsetof(struct seccomp_data, arch)), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, + UM_SECCOMP_ARCH_NATIVE, 1, 0), + + /* [8] Kill (for architecture check) */ + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS), + + /* [9] Load syscall number */ + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + offsetof(struct seccomp_data, nr)), + + /* [10-16] Check against permitted syscalls */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_futex, + 7, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_recvmsg, + 6, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_close, + 5, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, STUB_MMAP_NR, + 4, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_munmap, + 3, 0), +#ifdef __i386__ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_set_thread_area, + 2, 0), +#else + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_arch_prctl, + 2, 0), +#endif + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigreturn, + 1, 0), + + /* [17] Not one of the permitted syscalls */ + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS), + + /* [18] Permitted call for the stub */ + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = sizeof(filter) / sizeof(filter[0]), + .filter = filter, + }; + + if (stub_syscall3(__NR_seccomp, SECCOMP_SET_MODE_FILTER, + SECCOMP_FILTER_FLAG_TSYNC, + (unsigned long)&prog) != 0) + stub_syscall1(__NR_exit, 21); - stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); + /* Fall through, the exit syscall will cause SIGSYS */ + } else { + stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); - stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP); + stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP); + } - stub_syscall1(__NR_exit, 14); + stub_syscall1(__NR_exit, 30); __builtin_unreachable(); } diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 1394568c0210..ae0fa2173778 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -856,11 +856,16 @@ static struct clock_event_device timer_clockevent = { static irqreturn_t um_timer(int irq, void *dev) { - if (get_current()->mm != NULL) - { - /* userspace - relay signal, results in correct userspace timers */ + /* + * Interrupt the (possibly) running userspace process, technically this + * should only happen if userspace is currently executing. + * With infinite CPU time-travel, we can only get here when userspace + * is not executing. Do not notify there and avoid spurious scheduling. + */ + if (time_travel_mode != TT_MODE_INFCPU && + time_travel_mode != TT_MODE_EXTERNAL && + get_current()->mm) os_alarm_process(get_current()->mm->context.id.pid); - } (*timer_clockevent.event_handler)(&timer_clockevent); diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index ef2272e92a43..5b80a3a89c20 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -16,7 +16,122 @@ #include <kern_util.h> #include <os.h> #include <skas.h> -#include <arch.h> + +/* + * NOTE: UML does not have exception tables. As such, this is almost a copy + * of the code in mm/memory.c, only adjusting the logic to simply check whether + * we are coming from the kernel instead of doing an additional lookup in the + * exception table. + * We can do this simplification because we never get here if the exception was + * fixable. + */ +static inline bool get_mmap_lock_carefully(struct mm_struct *mm, bool is_user) +{ + if (likely(mmap_read_trylock(mm))) + return true; + + if (!is_user) + return false; + + return !mmap_read_lock_killable(mm); +} + +static inline bool mmap_upgrade_trylock(struct mm_struct *mm) +{ + /* + * We don't have this operation yet. + * + * It should be easy enough to do: it's basically a + * atomic_long_try_cmpxchg_acquire() + * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but + * it also needs the proper lockdep magic etc. + */ + return false; +} + +static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, bool is_user) +{ + mmap_read_unlock(mm); + if (!is_user) + return false; + + return !mmap_write_lock_killable(mm); +} + +/* + * Helper for page fault handling. + * + * This is kind of equivalend to "mmap_read_lock()" followed + * by "find_extend_vma()", except it's a lot more careful about + * the locking (and will drop the lock on failure). + * + * For example, if we have a kernel bug that causes a page + * fault, we don't want to just use mmap_read_lock() to get + * the mm lock, because that would deadlock if the bug were + * to happen while we're holding the mm lock for writing. + * + * So this checks the exception tables on kernel faults in + * order to only do this all for instructions that are actually + * expected to fault. + * + * We can also actually take the mm lock for writing if we + * need to extend the vma, which helps the VM layer a lot. + */ +static struct vm_area_struct * +um_lock_mm_and_find_vma(struct mm_struct *mm, + unsigned long addr, bool is_user) +{ + struct vm_area_struct *vma; + + if (!get_mmap_lock_carefully(mm, is_user)) + return NULL; + + vma = find_vma(mm, addr); + if (likely(vma && (vma->vm_start <= addr))) + return vma; + + /* + * Well, dang. We might still be successful, but only + * if we can extend a vma to do so. + */ + if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) { + mmap_read_unlock(mm); + return NULL; + } + + /* + * We can try to upgrade the mmap lock atomically, + * in which case we can continue to use the vma + * we already looked up. + * + * Otherwise we'll have to drop the mmap lock and + * re-take it, and also look up the vma again, + * re-checking it. + */ + if (!mmap_upgrade_trylock(mm)) { + if (!upgrade_mmap_lock_carefully(mm, is_user)) + return NULL; + + vma = find_vma(mm, addr); + if (!vma) + goto fail; + if (vma->vm_start <= addr) + goto success; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto fail; + } + + if (expand_stack_locked(vma, addr)) + goto fail; + +success: + mmap_write_downgrade(mm); + return vma; + +fail: + mmap_write_unlock(mm); + return NULL; +} /* * Note this is constrained to return 0, -EFAULT, -EACCES, -ENOMEM by @@ -44,21 +159,10 @@ int handle_page_fault(unsigned long address, unsigned long ip, if (is_user) flags |= FAULT_FLAG_USER; retry: - mmap_read_lock(mm); - vma = find_vma(mm, address); - if (!vma) - goto out; - if (vma->vm_start <= address) - goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto out; - if (is_user && !ARCH_IS_STACKGROW(address)) - goto out; - vma = expand_stack(mm, address); + vma = um_lock_mm_and_find_vma(mm, address, is_user); if (!vma) goto out_nosemaphore; -good_area: *code_out = SEGV_ACCERR; if (is_write) { if (!(vma->vm_flags & VM_WRITE)) diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index 049dfa5bc9c6..fae836713487 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -8,7 +8,7 @@ KCOV_INSTRUMENT := n obj-y = execvp.o file.o helper.o irq.o main.o mem.o process.o \ registers.o sigio.o signal.o start_up.o time.o tty.o \ - umid.o user_syms.o util.o drivers/ skas/ + umid.o user_syms.o util.o skas/ CFLAGS_signal.o += -Wframe-larger-than=4096 diff --git a/arch/um/os-Linux/drivers/Makefile b/arch/um/os-Linux/drivers/Makefile deleted file mode 100644 index cf2d75bb1884..000000000000 --- a/arch/um/os-Linux/drivers/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) -# - -ethertap-objs := ethertap_kern.o ethertap_user.o -tuntap-objs := tuntap_kern.o tuntap_user.o - -obj-y = -obj-$(CONFIG_UML_NET_ETHERTAP) += ethertap.o -obj-$(CONFIG_UML_NET_TUNTAP) += tuntap.o - -include $(srctree)/arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/drivers/etap.h b/arch/um/os-Linux/drivers/etap.h deleted file mode 100644 index a475259f90e1..000000000000 --- a/arch/um/os-Linux/drivers/etap.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#ifndef __DRIVERS_ETAP_H -#define __DRIVERS_ETAP_H - -#include <net_user.h> - -struct ethertap_data { - char *dev_name; - char *gate_addr; - int data_fd; - int control_fd; - void *dev; -}; - -extern const struct net_user_info ethertap_user_info; - -#endif diff --git a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c deleted file mode 100644 index 5e5ee40680ce..000000000000 --- a/arch/um/os-Linux/drivers/ethertap_kern.c +++ /dev/null @@ -1,100 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and - * James Leu (jleu@mindspring.net). - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Copyright (C) 2001 by various other people who didn't put their name here. - */ - -#include <linux/init.h> -#include <linux/netdevice.h> -#include "etap.h" -#include <net_kern.h> - -struct ethertap_init { - char *dev_name; - char *gate_addr; -}; - -static void etap_init(struct net_device *dev, void *data) -{ - struct uml_net_private *pri; - struct ethertap_data *epri; - struct ethertap_init *init = data; - - pri = netdev_priv(dev); - epri = (struct ethertap_data *) pri->user; - epri->dev_name = init->dev_name; - epri->gate_addr = init->gate_addr; - epri->data_fd = -1; - epri->control_fd = -1; - epri->dev = dev; - - printk(KERN_INFO "ethertap backend - %s", epri->dev_name); - if (epri->gate_addr != NULL) - printk(KERN_CONT ", IP = %s", epri->gate_addr); - printk(KERN_CONT "\n"); -} - -static int etap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - int len; - - len = net_recvfrom(fd, skb_mac_header(skb), - skb->dev->mtu + 2 + ETH_HEADER_ETHERTAP); - if (len <= 0) - return(len); - - skb_pull(skb, 2); - len -= 2; - return len; -} - -static int etap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - skb_push(skb, 2); - return net_send(fd, skb->data, skb->len); -} - -const struct net_kern_info ethertap_kern_info = { - .init = etap_init, - .protocol = eth_protocol, - .read = etap_read, - .write = etap_write, -}; - -static int ethertap_setup(char *str, char **mac_out, void *data) -{ - struct ethertap_init *init = data; - - *init = ((struct ethertap_init) - { .dev_name = NULL, - .gate_addr = NULL }); - if (tap_setup_common(str, "ethertap", &init->dev_name, mac_out, - &init->gate_addr)) - return 0; - if (init->dev_name == NULL) { - printk(KERN_ERR "ethertap_setup : Missing tap device name\n"); - return 0; - } - - return 1; -} - -static struct transport ethertap_transport = { - .list = LIST_HEAD_INIT(ethertap_transport.list), - .name = "ethertap", - .setup = ethertap_setup, - .user = ðertap_user_info, - .kern = ðertap_kern_info, - .private_size = sizeof(struct ethertap_data), - .setup_size = sizeof(struct ethertap_init), -}; - -static int register_ethertap(void) -{ - register_transport(ðertap_transport); - return 0; -} - -late_initcall(register_ethertap); diff --git a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c deleted file mode 100644 index bdf215c0eca7..000000000000 --- a/arch/um/os-Linux/drivers/ethertap_user.c +++ /dev/null @@ -1,248 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and - * James Leu (jleu@mindspring.net). - * Copyright (C) 2001 by various other people who didn't put their name here. - */ - -#include <stdio.h> -#include <unistd.h> -#include <errno.h> -#include <string.h> -#include <sys/socket.h> -#include <sys/wait.h> -#include "etap.h" -#include <os.h> -#include <net_user.h> -#include <um_malloc.h> - -#define MAX_PACKET ETH_MAX_PACKET - -static int etap_user_init(void *data, void *dev) -{ - struct ethertap_data *pri = data; - - pri->dev = dev; - return 0; -} - -struct addr_change { - enum { ADD_ADDR, DEL_ADDR } what; - unsigned char addr[4]; - unsigned char netmask[4]; -}; - -static void etap_change(int op, unsigned char *addr, unsigned char *netmask, - int fd) -{ - struct addr_change change; - char *output; - int n; - - change.what = op; - memcpy(change.addr, addr, sizeof(change.addr)); - memcpy(change.netmask, netmask, sizeof(change.netmask)); - CATCH_EINTR(n = write(fd, &change, sizeof(change))); - if (n != sizeof(change)) { - printk(UM_KERN_ERR "etap_change - request failed, err = %d\n", - errno); - return; - } - - output = uml_kmalloc(UM_KERN_PAGE_SIZE, UM_GFP_KERNEL); - if (output == NULL) - printk(UM_KERN_ERR "etap_change : Failed to allocate output " - "buffer\n"); - read_output(fd, output, UM_KERN_PAGE_SIZE); - if (output != NULL) { - printk("%s", output); - kfree(output); - } -} - -static void etap_open_addr(unsigned char *addr, unsigned char *netmask, - void *arg) -{ - etap_change(ADD_ADDR, addr, netmask, *((int *) arg)); -} - -static void etap_close_addr(unsigned char *addr, unsigned char *netmask, - void *arg) -{ - etap_change(DEL_ADDR, addr, netmask, *((int *) arg)); -} - -struct etap_pre_exec_data { - int control_remote; - int control_me; - int data_me; -}; - -static void etap_pre_exec(void *arg) -{ - struct etap_pre_exec_data *data = arg; - - dup2(data->control_remote, 1); - close(data->data_me); - close(data->control_me); -} - -static int etap_tramp(char *dev, char *gate, int control_me, - int control_remote, int data_me, int data_remote) -{ - struct etap_pre_exec_data pe_data; - int pid, err, n; - char version_buf[sizeof("nnnnn\0")]; - char data_fd_buf[sizeof("nnnnnn\0")]; - char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; - char *setup_args[] = { "uml_net", version_buf, "ethertap", dev, - data_fd_buf, gate_buf, NULL }; - char *nosetup_args[] = { "uml_net", version_buf, "ethertap", - dev, data_fd_buf, NULL }; - char **args, c; - - sprintf(data_fd_buf, "%d", data_remote); - sprintf(version_buf, "%d", UML_NET_VERSION); - if (gate != NULL) { - strscpy(gate_buf, gate); - args = setup_args; - } - else args = nosetup_args; - - err = 0; - pe_data.control_remote = control_remote; - pe_data.control_me = control_me; - pe_data.data_me = data_me; - pid = run_helper(etap_pre_exec, &pe_data, args); - - if (pid < 0) - err = pid; - close(data_remote); - close(control_remote); - CATCH_EINTR(n = read(control_me, &c, sizeof(c))); - if (n != sizeof(c)) { - err = -errno; - printk(UM_KERN_ERR "etap_tramp : read of status failed, " - "err = %d\n", -err); - return err; - } - if (c != 1) { - printk(UM_KERN_ERR "etap_tramp : uml_net failed\n"); - err = helper_wait(pid); - } - return err; -} - -static int etap_open(void *data) -{ - struct ethertap_data *pri = data; - char *output; - int data_fds[2], control_fds[2], err, output_len; - - err = tap_open_common(pri->dev, pri->gate_addr); - if (err) - return err; - - err = socketpair(AF_UNIX, SOCK_DGRAM, 0, data_fds); - if (err) { - err = -errno; - printk(UM_KERN_ERR "etap_open - data socketpair failed - " - "err = %d\n", errno); - return err; - } - - err = socketpair(AF_UNIX, SOCK_STREAM, 0, control_fds); - if (err) { - err = -errno; - printk(UM_KERN_ERR "etap_open - control socketpair failed - " - "err = %d\n", errno); - goto out_close_data; - } - - err = etap_tramp(pri->dev_name, pri->gate_addr, control_fds[0], - control_fds[1], data_fds[0], data_fds[1]); - output_len = UM_KERN_PAGE_SIZE; - output = uml_kmalloc(output_len, UM_GFP_KERNEL); - read_output(control_fds[0], output, output_len); - - if (output == NULL) - printk(UM_KERN_ERR "etap_open : failed to allocate output " - "buffer\n"); - else { - printk("%s", output); - kfree(output); - } - - if (err < 0) { - printk(UM_KERN_ERR "etap_tramp failed - err = %d\n", -err); - goto out_close_control; - } - - pri->data_fd = data_fds[0]; - pri->control_fd = control_fds[0]; - iter_addresses(pri->dev, etap_open_addr, &pri->control_fd); - return data_fds[0]; - -out_close_control: - close(control_fds[0]); - close(control_fds[1]); -out_close_data: - close(data_fds[0]); - close(data_fds[1]); - return err; -} - -static void etap_close(int fd, void *data) -{ - struct ethertap_data *pri = data; - - iter_addresses(pri->dev, etap_close_addr, &pri->control_fd); - close(fd); - - if (shutdown(pri->data_fd, SHUT_RDWR) < 0) - printk(UM_KERN_ERR "etap_close - shutdown data socket failed, " - "errno = %d\n", errno); - - if (shutdown(pri->control_fd, SHUT_RDWR) < 0) - printk(UM_KERN_ERR "etap_close - shutdown control socket " - "failed, errno = %d\n", errno); - - close(pri->data_fd); - pri->data_fd = -1; - close(pri->control_fd); - pri->control_fd = -1; -} - -static void etap_add_addr(unsigned char *addr, unsigned char *netmask, - void *data) -{ - struct ethertap_data *pri = data; - - tap_check_ips(pri->gate_addr, addr); - if (pri->control_fd == -1) - return; - etap_open_addr(addr, netmask, &pri->control_fd); -} - -static void etap_del_addr(unsigned char *addr, unsigned char *netmask, - void *data) -{ - struct ethertap_data *pri = data; - - if (pri->control_fd == -1) - return; - - etap_close_addr(addr, netmask, &pri->control_fd); -} - -const struct net_user_info ethertap_user_info = { - .init = etap_user_init, - .open = etap_open, - .close = etap_close, - .remove = NULL, - .add_address = etap_add_addr, - .delete_address = etap_del_addr, - .mtu = ETH_MAX_PACKET, - .max_packet = ETH_MAX_PACKET + ETH_HEADER_ETHERTAP, -}; diff --git a/arch/um/os-Linux/drivers/tuntap.h b/arch/um/os-Linux/drivers/tuntap.h deleted file mode 100644 index e364e42abfc5..000000000000 --- a/arch/um/os-Linux/drivers/tuntap.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#ifndef __UM_TUNTAP_H -#define __UM_TUNTAP_H - -#include <net_user.h> - -struct tuntap_data { - char *dev_name; - int fixed_config; - char *gate_addr; - int fd; - void *dev; -}; - -extern const struct net_user_info tuntap_user_info; - -#endif diff --git a/arch/um/os-Linux/drivers/tuntap_kern.c b/arch/um/os-Linux/drivers/tuntap_kern.c deleted file mode 100644 index ff022d9cf0dd..000000000000 --- a/arch/um/os-Linux/drivers/tuntap_kern.c +++ /dev/null @@ -1,86 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#include <linux/netdevice.h> -#include <linux/init.h> -#include <linux/skbuff.h> -#include <asm/errno.h> -#include <net_kern.h> -#include "tuntap.h" - -struct tuntap_init { - char *dev_name; - char *gate_addr; -}; - -static void tuntap_init(struct net_device *dev, void *data) -{ - struct uml_net_private *pri; - struct tuntap_data *tpri; - struct tuntap_init *init = data; - - pri = netdev_priv(dev); - tpri = (struct tuntap_data *) pri->user; - tpri->dev_name = init->dev_name; - tpri->fixed_config = (init->dev_name != NULL); - tpri->gate_addr = init->gate_addr; - tpri->fd = -1; - tpri->dev = dev; - - printk(KERN_INFO "TUN/TAP backend - "); - if (tpri->gate_addr != NULL) - printk(KERN_CONT "IP = %s", tpri->gate_addr); - printk(KERN_CONT "\n"); -} - -static int tuntap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return net_read(fd, skb_mac_header(skb), - skb->dev->mtu + ETH_HEADER_OTHER); -} - -static int tuntap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return net_write(fd, skb->data, skb->len); -} - -const struct net_kern_info tuntap_kern_info = { - .init = tuntap_init, - .protocol = eth_protocol, - .read = tuntap_read, - .write = tuntap_write, -}; - -static int tuntap_setup(char *str, char **mac_out, void *data) -{ - struct tuntap_init *init = data; - - *init = ((struct tuntap_init) - { .dev_name = NULL, - .gate_addr = NULL }); - if (tap_setup_common(str, "tuntap", &init->dev_name, mac_out, - &init->gate_addr)) - return 0; - - return 1; -} - -static struct transport tuntap_transport = { - .list = LIST_HEAD_INIT(tuntap_transport.list), - .name = "tuntap", - .setup = tuntap_setup, - .user = &tuntap_user_info, - .kern = &tuntap_kern_info, - .private_size = sizeof(struct tuntap_data), - .setup_size = sizeof(struct tuntap_init), -}; - -static int register_tuntap(void) -{ - register_transport(&tuntap_transport); - return 0; -} - -late_initcall(register_tuntap); diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c deleted file mode 100644 index 91f0e27ca3a6..000000000000 --- a/arch/um/os-Linux/drivers/tuntap_user.c +++ /dev/null @@ -1,215 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#include <stdio.h> -#include <unistd.h> -#include <errno.h> -#include <string.h> -#include <linux/if_tun.h> -#include <net/if.h> -#include <sys/ioctl.h> -#include <sys/socket.h> -#include <sys/wait.h> -#include <sys/uio.h> -#include <kern_util.h> -#include <os.h> -#include "tuntap.h" - -static int tuntap_user_init(void *data, void *dev) -{ - struct tuntap_data *pri = data; - - pri->dev = dev; - return 0; -} - -static void tuntap_add_addr(unsigned char *addr, unsigned char *netmask, - void *data) -{ - struct tuntap_data *pri = data; - - tap_check_ips(pri->gate_addr, addr); - if ((pri->fd == -1) || pri->fixed_config) - return; - open_addr(addr, netmask, pri->dev_name); -} - -static void tuntap_del_addr(unsigned char *addr, unsigned char *netmask, - void *data) -{ - struct tuntap_data *pri = data; - - if ((pri->fd == -1) || pri->fixed_config) - return; - close_addr(addr, netmask, pri->dev_name); -} - -struct tuntap_pre_exec_data { - int stdout_fd; - int close_me; -}; - -static void tuntap_pre_exec(void *arg) -{ - struct tuntap_pre_exec_data *data = arg; - - dup2(data->stdout_fd, 1); - close(data->close_me); -} - -static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote, - char *buffer, int buffer_len, int *used_out) -{ - struct tuntap_pre_exec_data data; - char version_buf[sizeof("nnnnn\0")]; - char *argv[] = { "uml_net", version_buf, "tuntap", "up", gate, - NULL }; - char buf[CMSG_SPACE(sizeof(*fd_out))]; - struct msghdr msg; - struct cmsghdr *cmsg; - struct iovec iov; - int pid, n, err; - - sprintf(version_buf, "%d", UML_NET_VERSION); - - data.stdout_fd = remote; - data.close_me = me; - - pid = run_helper(tuntap_pre_exec, &data, argv); - - if (pid < 0) - return pid; - - close(remote); - - msg.msg_name = NULL; - msg.msg_namelen = 0; - if (buffer != NULL) { - iov = ((struct iovec) { buffer, buffer_len }); - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - } - else { - msg.msg_iov = NULL; - msg.msg_iovlen = 0; - } - msg.msg_control = buf; - msg.msg_controllen = sizeof(buf); - msg.msg_flags = 0; - n = recvmsg(me, &msg, 0); - *used_out = n; - if (n < 0) { - err = -errno; - printk(UM_KERN_ERR "tuntap_open_tramp : recvmsg failed - " - "errno = %d\n", errno); - return err; - } - helper_wait(pid); - - cmsg = CMSG_FIRSTHDR(&msg); - if (cmsg == NULL) { - printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a " - "message\n"); - return -EINVAL; - } - if ((cmsg->cmsg_level != SOL_SOCKET) || - (cmsg->cmsg_type != SCM_RIGHTS)) { - printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a " - "descriptor\n"); - return -EINVAL; - } - *fd_out = ((int *) CMSG_DATA(cmsg))[0]; - os_set_exec_close(*fd_out); - return 0; -} - -static int tuntap_open(void *data) -{ - struct ifreq ifr; - struct tuntap_data *pri = data; - char *output, *buffer; - int err, fds[2], len, used; - - err = tap_open_common(pri->dev, pri->gate_addr); - if (err < 0) - return err; - - if (pri->fixed_config) { - pri->fd = os_open_file("/dev/net/tun", - of_cloexec(of_rdwr(OPENFLAGS())), 0); - if (pri->fd < 0) { - printk(UM_KERN_ERR "Failed to open /dev/net/tun, " - "err = %d\n", -pri->fd); - return pri->fd; - } - memset(&ifr, 0, sizeof(ifr)); - ifr.ifr_flags = IFF_TAP | IFF_NO_PI; - strscpy(ifr.ifr_name, pri->dev_name); - if (ioctl(pri->fd, TUNSETIFF, &ifr) < 0) { - err = -errno; - printk(UM_KERN_ERR "TUNSETIFF failed, errno = %d\n", - errno); - close(pri->fd); - return err; - } - } - else { - err = socketpair(AF_UNIX, SOCK_DGRAM, 0, fds); - if (err) { - err = -errno; - printk(UM_KERN_ERR "tuntap_open : socketpair failed - " - "errno = %d\n", errno); - return err; - } - - buffer = get_output_buffer(&len); - if (buffer != NULL) - len--; - used = 0; - - err = tuntap_open_tramp(pri->gate_addr, &pri->fd, fds[0], - fds[1], buffer, len, &used); - - output = buffer; - if (err < 0) { - printk("%s", output); - free_output_buffer(buffer); - printk(UM_KERN_ERR "tuntap_open_tramp failed - " - "err = %d\n", -err); - return err; - } - - pri->dev_name = uml_strdup(buffer); - output += IFNAMSIZ; - printk("%s", output); - free_output_buffer(buffer); - - close(fds[0]); - iter_addresses(pri->dev, open_addr, pri->dev_name); - } - - return pri->fd; -} - -static void tuntap_close(int fd, void *data) -{ - struct tuntap_data *pri = data; - - if (!pri->fixed_config) - iter_addresses(pri->dev, close_addr, pri->dev_name); - close(fd); - pri->fd = -1; -} - -const struct net_user_info tuntap_user_info = { - .init = tuntap_user_init, - .open = tuntap_open, - .close = tuntap_close, - .remove = NULL, - .add_address = tuntap_add_addr, - .delete_address = tuntap_del_addr, - .mtu = ETH_MAX_PACKET, - .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, -}; diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index a0d01c68ce3e..617886d1fb1e 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -106,21 +106,6 @@ int os_get_ifname(int fd, char* namebuf) return 0; } -int os_set_slip(int fd) -{ - int disc, sencap; - - disc = N_SLIP; - if (ioctl(fd, TIOCSETD, &disc) < 0) - return -errno; - - sencap = 0; - if (ioctl(fd, SIOCSIFENCAP, &sencap) < 0) - return -errno; - - return 0; -} - int os_mode_fd(int fd, int mode) { int err; diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h index 317fca190c2b..5d8d3b0817a9 100644 --- a/arch/um/os-Linux/internal.h +++ b/arch/um/os-Linux/internal.h @@ -2,6 +2,9 @@ #ifndef __UM_OS_LINUX_INTERNAL_H #define __UM_OS_LINUX_INTERNAL_H +#include <mm_id.h> +#include <stub-data.h> + /* * elf_aux.c */ @@ -16,5 +19,5 @@ void check_tmpexec(void); * skas/process.c */ void wait_stub_done(int pid); - +void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys); #endif /* __UM_OS_LINUX_INTERNAL_H */ diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index 184566edeee9..00b49e90d05f 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -18,17 +18,29 @@ #include <init.h> #include <longjmp.h> #include <os.h> +#include <skas/skas.h> void os_alarm_process(int pid) { + if (pid <= 0) + return; + kill(pid, SIGALRM); } void os_kill_process(int pid, int reap_child) { + if (pid <= 0) + return; + + /* Block signals until child is reaped */ + block_signals(); + kill(pid, SIGKILL); if (reap_child) CATCH_EINTR(waitpid(pid, NULL, __WALL)); + + unblock_signals(); } /* Kill off a ptraced child by all means available. kill it normally first, @@ -38,11 +50,27 @@ void os_kill_process(int pid, int reap_child) void os_kill_ptraced_process(int pid, int reap_child) { + if (pid <= 0) + return; + + /* Block signals until child is reaped */ + block_signals(); + kill(pid, SIGKILL); ptrace(PTRACE_KILL, pid); ptrace(PTRACE_CONT, pid); if (reap_child) CATCH_EINTR(waitpid(pid, NULL, __WALL)); + + unblock_signals(); +} + +pid_t os_reap_child(void) +{ + int status; + + /* Try to reap a child */ + return waitpid(-1, &status, WNOHANG); } /* Don't use the glibc version, which caches the result in TLS. It misses some @@ -151,6 +179,9 @@ void init_new_thread_signals(void) set_handler(SIGBUS); signal(SIGHUP, SIG_IGN); set_handler(SIGIO); + /* We (currently) only use the child reaper IRQ in seccomp mode */ + if (using_seccomp) + set_handler(SIGCHLD); signal(SIGWINCH, SIG_IGN); } diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c index d7ca148807b2..bfba2cbc9478 100644 --- a/arch/um/os-Linux/registers.c +++ b/arch/um/os-Linux/registers.c @@ -14,8 +14,8 @@ /* This is set once at boot time and not changed thereafter */ -static unsigned long exec_regs[MAX_REG_NR]; -static unsigned long *exec_fp_regs; +unsigned long exec_regs[MAX_REG_NR]; +unsigned long *exec_fp_regs; int init_pid_registers(int pid) { diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c index a05a6ecee756..6de145f8fe3d 100644 --- a/arch/um/os-Linux/sigio.c +++ b/arch/um/os-Linux/sigio.c @@ -12,6 +12,7 @@ #include <signal.h> #include <string.h> #include <sys/epoll.h> +#include <asm/unistd.h> #include <kern_util.h> #include <init.h> #include <os.h> @@ -46,7 +47,7 @@ static void *write_sigio_thread(void *unused) __func__, errno); } - CATCH_EINTR(r = tgkill(pid, pid, SIGIO)); + CATCH_EINTR(r = syscall(__NR_tgkill, pid, pid, SIGIO)); if (r < 0) printk(UM_KERN_ERR "%s: tgkill failed, errno = %d\n", __func__, errno); diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index e71e5b4878d1..11f07f498270 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -29,6 +29,7 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *, void *mc) = [SIGBUS] = relay_signal, [SIGSEGV] = segv_handler, [SIGIO] = sigio_handler, + [SIGCHLD] = sigchld_handler, }; static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) @@ -44,7 +45,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) } /* enable signals if sig isn't IRQ signal */ - if ((sig != SIGIO) && (sig != SIGWINCH)) + if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGCHLD)) unblock_signals_trace(); (*sig_info[sig])(sig, si, &r, mc); @@ -64,6 +65,9 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) #define SIGALRM_BIT 1 #define SIGALRM_MASK (1 << SIGALRM_BIT) +#define SIGCHLD_BIT 2 +#define SIGCHLD_MASK (1 << SIGCHLD_BIT) + int signals_enabled; #if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) static int signals_blocked, signals_blocked_pending; @@ -102,6 +106,11 @@ static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) return; } + if (!enabled && (sig == SIGCHLD)) { + signals_pending |= SIGCHLD_MASK; + return; + } + block_signals_trace(); sig_handler_common(sig, si, mc); @@ -181,6 +190,8 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = { [SIGIO] = sig_handler, [SIGWINCH] = sig_handler, + /* SIGCHLD is only actually registered in seccomp mode. */ + [SIGCHLD] = sig_handler, [SIGALRM] = timer_alarm_handler, [SIGUSR1] = sigusr1_handler, @@ -309,6 +320,12 @@ void unblock_signals(void) if (save_pending & SIGIO_MASK) sig_handler_common(SIGIO, NULL, NULL); + if (save_pending & SIGCHLD_MASK) { + struct uml_pt_regs regs = {}; + + sigchld_handler(SIGCHLD, NULL, ®s, NULL); + } + /* Do not reenter the handler */ if ((save_pending & SIGALRM_MASK) && (!(signals_active & SIGALRM_MASK))) diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c index d7f1814b0e5a..8b9921ac3ef8 100644 --- a/arch/um/os-Linux/skas/mem.c +++ b/arch/um/os-Linux/skas/mem.c @@ -43,6 +43,16 @@ void syscall_stub_dump_error(struct mm_id *mm_idp) print_hex_dump(UM_KERN_ERR, " syscall data: ", 0, 16, 4, sc, sizeof(*sc), 0); + + if (using_seccomp) { + printk(UM_KERN_ERR "%s: FD map num: %d", __func__, + mm_idp->syscall_fd_num); + print_hex_dump(UM_KERN_ERR, + " FD map: ", 0, 16, + sizeof(mm_idp->syscall_fd_map[0]), + mm_idp->syscall_fd_map, + sizeof(mm_idp->syscall_fd_map), 0); + } } static inline unsigned long *check_init_stack(struct mm_id * mm_idp, @@ -80,27 +90,32 @@ static inline long do_syscall_stub(struct mm_id *mm_idp) int n, i; int err, pid = mm_idp->pid; - n = ptrace_setregs(pid, syscall_regs); - if (n < 0) { - printk(UM_KERN_ERR "Registers - \n"); - for (i = 0; i < MAX_REG_NR; i++) - printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]); - panic("%s : PTRACE_SETREGS failed, errno = %d\n", - __func__, -n); - } - /* Inform process how much we have filled in. */ proc_data->syscall_data_len = mm_idp->syscall_data_len; - err = ptrace(PTRACE_CONT, pid, 0, 0); - if (err) - panic("Failed to continue stub, pid = %d, errno = %d\n", pid, - errno); - - wait_stub_done(pid); + if (using_seccomp) { + proc_data->restart_wait = 1; + wait_stub_done_seccomp(mm_idp, 0, 1); + } else { + n = ptrace_setregs(pid, syscall_regs); + if (n < 0) { + printk(UM_KERN_ERR "Registers -\n"); + for (i = 0; i < MAX_REG_NR; i++) + printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]); + panic("%s : PTRACE_SETREGS failed, errno = %d\n", + __func__, -n); + } + + err = ptrace(PTRACE_CONT, pid, 0, 0); + if (err) + panic("Failed to continue stub, pid = %d, errno = %d\n", + pid, errno); + + wait_stub_done(pid); + } /* - * proc_data->err will be non-zero if there was an (unexpected) error. + * proc_data->err will be negative if there was an (unexpected) error. * In that case, syscall_data_len points to the last executed syscall, * otherwise it will be zero (but we do not need to rely on that). */ @@ -113,6 +128,9 @@ static inline long do_syscall_stub(struct mm_id *mm_idp) mm_idp->syscall_data_len = 0; } + if (using_seccomp) + mm_idp->syscall_fd_num = 0; + return mm_idp->syscall_data_len; } @@ -175,6 +193,44 @@ static struct stub_syscall *syscall_stub_get_previous(struct mm_id *mm_idp, return NULL; } +static int get_stub_fd(struct mm_id *mm_idp, int fd) +{ + int i; + + /* Find an FD slot (or flush and use first) */ + if (!using_seccomp) + return fd; + + /* Already crashed, value does not matter */ + if (mm_idp->syscall_data_len < 0) + return 0; + + /* Find existing FD in map if we can allocate another syscall */ + if (mm_idp->syscall_data_len < + ARRAY_SIZE(((struct stub_data *)NULL)->syscall_data)) { + for (i = 0; i < mm_idp->syscall_fd_num; i++) { + if (mm_idp->syscall_fd_map[i] == fd) + return i; + } + + if (mm_idp->syscall_fd_num < STUB_MAX_FDS) { + i = mm_idp->syscall_fd_num; + mm_idp->syscall_fd_map[i] = fd; + + mm_idp->syscall_fd_num++; + + return i; + } + } + + /* FD map full or no syscall space available, continue after flush */ + do_syscall_stub(mm_idp); + mm_idp->syscall_fd_map[0] = fd; + mm_idp->syscall_fd_num = 1; + + return 0; +} + int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot, int phys_fd, unsigned long long offset) { @@ -182,12 +238,21 @@ int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot, /* Compress with previous syscall if that is possible */ sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MMAP, virt); - if (sc && sc->mem.prot == prot && sc->mem.fd == phys_fd && + if (sc && sc->mem.prot == prot && sc->mem.offset == MMAP_OFFSET(offset - sc->mem.length)) { - sc->mem.length += len; - return 0; + int prev_fd = sc->mem.fd; + + if (using_seccomp) + prev_fd = mm_idp->syscall_fd_map[sc->mem.fd]; + + if (phys_fd == prev_fd) { + sc->mem.length += len; + return 0; + } } + phys_fd = get_stub_fd(mm_idp, phys_fd); + sc = syscall_stub_alloc(mm_idp); sc->syscall = STUB_SYSCALL_MMAP; sc->mem.addr = virt; diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index ae2aea062f06..e42ffac23e3c 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* + * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net> * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) */ @@ -15,6 +16,7 @@ #include <sys/mman.h> #include <sys/wait.h> #include <sys/stat.h> +#include <sys/socket.h> #include <asm/unistd.h> #include <as-layout.h> #include <init.h> @@ -25,8 +27,11 @@ #include <registers.h> #include <skas.h> #include <sysdep/stub.h> +#include <sysdep/mcontext.h> +#include <linux/futex.h> #include <linux/threads.h> #include <timetravel.h> +#include <asm-generic/rwonce.h> #include "../internal.h" int is_skas_winch(int pid, int fd, void *data) @@ -142,6 +147,105 @@ bad_wait: fatal_sigsegv(); } +void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys) +{ + struct stub_data *data = (void *)mm_idp->stack; + int ret; + + do { + const char byte = 0; + struct iovec iov = { + .iov_base = (void *)&byte, + .iov_len = sizeof(byte), + }; + union { + char data[CMSG_SPACE(sizeof(mm_idp->syscall_fd_map))]; + struct cmsghdr align; + } ctrl; + struct msghdr msgh = { + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + if (!running) { + if (mm_idp->syscall_fd_num) { + unsigned int fds_size = + sizeof(int) * mm_idp->syscall_fd_num; + struct cmsghdr *cmsg; + + msgh.msg_control = ctrl.data; + msgh.msg_controllen = CMSG_SPACE(fds_size); + cmsg = CMSG_FIRSTHDR(&msgh); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(fds_size); + memcpy(CMSG_DATA(cmsg), mm_idp->syscall_fd_map, + fds_size); + + CATCH_EINTR(syscall(__NR_sendmsg, mm_idp->sock, + &msgh, 0)); + } + + data->signal = 0; + data->futex = FUTEX_IN_CHILD; + CATCH_EINTR(syscall(__NR_futex, &data->futex, + FUTEX_WAKE, 1, NULL, NULL, 0)); + } + + do { + /* + * We need to check whether the child is still alive + * before and after the FUTEX_WAIT call. Before, in + * case it just died but we still updated data->futex + * to FUTEX_IN_CHILD. And after, in case it died while + * we were waiting (and SIGCHLD woke us up, see the + * IRQ handler in mmu.c). + * + * Either way, if PID is negative, then we have no + * choice but to kill the task. + */ + if (__READ_ONCE(mm_idp->pid) < 0) + goto out_kill; + + ret = syscall(__NR_futex, &data->futex, + FUTEX_WAIT, FUTEX_IN_CHILD, + NULL, NULL, 0); + if (ret < 0 && errno != EINTR && errno != EAGAIN) { + printk(UM_KERN_ERR "%s : FUTEX_WAIT failed, errno = %d\n", + __func__, errno); + goto out_kill; + } + } while (data->futex == FUTEX_IN_CHILD); + + if (__READ_ONCE(mm_idp->pid) < 0) + goto out_kill; + + running = 0; + + /* We may receive a SIGALRM before SIGSYS, iterate again. */ + } while (wait_sigsys && data->signal == SIGALRM); + + if (data->mctx_offset > sizeof(data->sigstack) - sizeof(mcontext_t)) { + printk(UM_KERN_ERR "%s : invalid mcontext offset", __func__); + goto out_kill; + } + + if (wait_sigsys && data->signal != SIGSYS) { + printk(UM_KERN_ERR "%s : expected SIGSYS but got %d", + __func__, data->signal); + goto out_kill; + } + + return; + +out_kill: + printk(UM_KERN_ERR "%s : failed to wait for stub, pid = %d, errno = %d\n", + __func__, mm_idp->pid, errno); + /* This is not true inside start_userspace */ + if (current_mm_id() == mm_idp) + fatal_sigsegv(); +} + extern unsigned long current_stub_stack(void); static void get_skas_faultinfo(int pid, struct faultinfo *fi) @@ -163,12 +267,6 @@ static void get_skas_faultinfo(int pid, struct faultinfo *fi) memcpy(fi, (void *)current_stub_stack(), sizeof(*fi)); } -static void handle_segv(int pid, struct uml_pt_regs *regs) -{ - get_skas_faultinfo(pid, ®s->faultinfo); - segv(regs->faultinfo, 0, 1, NULL, NULL); -} - static void handle_trap(int pid, struct uml_pt_regs *regs) { if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END)) @@ -181,29 +279,48 @@ extern char __syscall_stub_start[]; static int stub_exe_fd; +struct tramp_data { + struct stub_data *stub_data; + /* 0 is inherited, 1 is the kernel side */ + int sockpair[2]; +}; + #ifndef CLOSE_RANGE_CLOEXEC #define CLOSE_RANGE_CLOEXEC (1U << 2) #endif -static int userspace_tramp(void *stack) +static int userspace_tramp(void *data) { + struct tramp_data *tramp_data = data; char *const argv[] = { "uml-userspace", NULL }; - int pipe_fds[2]; unsigned long long offset; struct stub_init_data init_data = { + .seccomp = using_seccomp, .stub_start = STUB_START, - .segv_handler = STUB_CODE + - (unsigned long) stub_segv_handler - - (unsigned long) __syscall_stub_start, }; struct iomem_region *iomem; int ret; + if (using_seccomp) { + init_data.signal_handler = STUB_CODE + + (unsigned long) stub_signal_interrupt - + (unsigned long) __syscall_stub_start; + init_data.signal_restorer = STUB_CODE + + (unsigned long) stub_signal_restorer - + (unsigned long) __syscall_stub_start; + } else { + init_data.signal_handler = STUB_CODE + + (unsigned long) stub_segv_handler - + (unsigned long) __syscall_stub_start; + init_data.signal_restorer = 0; + } + init_data.stub_code_fd = phys_mapping(uml_to_phys(__syscall_stub_start), &offset); init_data.stub_code_offset = MMAP_OFFSET(offset); - init_data.stub_data_fd = phys_mapping(uml_to_phys(stack), &offset); + init_data.stub_data_fd = phys_mapping(uml_to_phys(tramp_data->stub_data), + &offset); init_data.stub_data_offset = MMAP_OFFSET(offset); /* @@ -214,20 +331,21 @@ static int userspace_tramp(void *stack) syscall(__NR_close_range, 0, ~0U, CLOSE_RANGE_CLOEXEC); fcntl(init_data.stub_data_fd, F_SETFD, 0); - for (iomem = iomem_regions; iomem; iomem = iomem->next) - fcntl(iomem->fd, F_SETFD, 0); - /* Create a pipe for init_data (no CLOEXEC) and dup2 to STDIN */ - if (pipe(pipe_fds)) - exit(2); + /* In SECCOMP mode, these FDs are passed when needed */ + if (!using_seccomp) { + for (iomem = iomem_regions; iomem; iomem = iomem->next) + fcntl(iomem->fd, F_SETFD, 0); + } - if (dup2(pipe_fds[0], 0) < 0) + /* dup2 signaling FD/socket to STDIN */ + if (dup2(tramp_data->sockpair[0], 0) < 0) exit(3); - close(pipe_fds[0]); + close(tramp_data->sockpair[0]); /* Write init_data and close write side */ - ret = write(pipe_fds[1], &init_data, sizeof(init_data)); - close(pipe_fds[1]); + ret = write(tramp_data->sockpair[1], &init_data, sizeof(init_data)); + close(tramp_data->sockpair[1]); if (ret != sizeof(init_data)) exit(4); @@ -315,11 +433,12 @@ static int __init init_stub_exe_fd(void) } __initcall(init_stub_exe_fd); +int using_seccomp; int userspace_pid[NR_CPUS]; /** * start_userspace() - prepare a new userspace process - * @stub_stack: pointer to the stub stack. + * @mm_id: The corresponding struct mm_id * * Setups a new temporary stack page that is used while userspace_tramp() runs * Clones the kernel process into a new userspace process, with FDs only. @@ -328,11 +447,15 @@ int userspace_pid[NR_CPUS]; * when negative: an error number. * FIXME: can PIDs become negative?! */ -int start_userspace(unsigned long stub_stack) +int start_userspace(struct mm_id *mm_id) { + struct stub_data *proc_data = (void *)mm_id->stack; + struct tramp_data tramp_data = { + .stub_data = proc_data, + }; void *stack; unsigned long sp; - int pid, status, n, err; + int status, n, err; /* setup a temporary stack page */ stack = mmap(NULL, UM_KERN_PAGE_SIZE, @@ -348,40 +471,55 @@ int start_userspace(unsigned long stub_stack) /* set stack pointer to the end of the stack page, so it can grow downwards */ sp = (unsigned long)stack + UM_KERN_PAGE_SIZE; - /* clone into new userspace process */ - pid = clone(userspace_tramp, (void *) sp, + /* socket pair for init data and SECCOMP FD passing (no CLOEXEC here) */ + if (socketpair(AF_UNIX, SOCK_STREAM, 0, tramp_data.sockpair)) { + err = -errno; + printk(UM_KERN_ERR "%s : socketpair failed, errno = %d\n", + __func__, errno); + return err; + } + + if (using_seccomp) + proc_data->futex = FUTEX_IN_CHILD; + + mm_id->pid = clone(userspace_tramp, (void *) sp, CLONE_VFORK | CLONE_VM | SIGCHLD, - (void *)stub_stack); - if (pid < 0) { + (void *)&tramp_data); + if (mm_id->pid < 0) { err = -errno; printk(UM_KERN_ERR "%s : clone failed, errno = %d\n", __func__, errno); - return err; + goto out_close; } - do { - CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); - if (n < 0) { + if (using_seccomp) { + wait_stub_done_seccomp(mm_id, 1, 1); + } else { + do { + CATCH_EINTR(n = waitpid(mm_id->pid, &status, + WUNTRACED | __WALL)); + if (n < 0) { + err = -errno; + printk(UM_KERN_ERR "%s : wait failed, errno = %d\n", + __func__, errno); + goto out_kill; + } + } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM)); + + if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { + err = -EINVAL; + printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n", + __func__, status); + goto out_kill; + } + + if (ptrace(PTRACE_SETOPTIONS, mm_id->pid, NULL, + (void *) PTRACE_O_TRACESYSGOOD) < 0) { err = -errno; - printk(UM_KERN_ERR "%s : wait failed, errno = %d\n", + printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n", __func__, errno); goto out_kill; } - } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM)); - - if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { - err = -EINVAL; - printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n", - __func__, status); - goto out_kill; - } - - if (ptrace(PTRACE_SETOPTIONS, pid, NULL, - (void *) PTRACE_O_TRACESYSGOOD) < 0) { - err = -errno; - printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n", - __func__, errno); - goto out_kill; } if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) { @@ -391,10 +529,22 @@ int start_userspace(unsigned long stub_stack) goto out_kill; } - return pid; + close(tramp_data.sockpair[0]); + if (using_seccomp) + mm_id->sock = tramp_data.sockpair[1]; + else + close(tramp_data.sockpair[1]); + + return 0; + +out_kill: + os_kill_ptraced_process(mm_id->pid, 1); +out_close: + close(tramp_data.sockpair[0]); + close(tramp_data.sockpair[1]); + + mm_id->pid = -1; - out_kill: - os_kill_ptraced_process(pid, 1); return err; } @@ -404,7 +554,9 @@ extern unsigned long tt_extra_sched_jiffies; void userspace(struct uml_pt_regs *regs) { int err, status, op, pid = userspace_pid[0]; - siginfo_t si; + siginfo_t si_ptrace; + siginfo_t *si; + int sig; /* Handle any immediate reschedules or signals */ interrupt_end(); @@ -437,103 +589,177 @@ void userspace(struct uml_pt_regs *regs) current_mm_sync(); - /* Flush out any pending syscalls */ - err = syscall_stub_flush(current_mm_id()); - if (err) { - if (err == -ENOMEM) - report_enomem(); + if (using_seccomp) { + struct mm_id *mm_id = current_mm_id(); + struct stub_data *proc_data = (void *) mm_id->stack; + int ret; - printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d", - __func__, -err); - fatal_sigsegv(); - } + ret = set_stub_state(regs, proc_data, singlestepping()); + if (ret) { + printk(UM_KERN_ERR "%s - failed to set regs: %d", + __func__, ret); + fatal_sigsegv(); + } - /* - * This can legitimately fail if the process loads a - * bogus value into a segment register. It will - * segfault and PTRACE_GETREGS will read that value - * out of the process. However, PTRACE_SETREGS will - * fail. In this case, there is nothing to do but - * just kill the process. - */ - if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) { - printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n", - __func__, errno); - fatal_sigsegv(); - } + /* Must have been reset by the syscall caller */ + if (proc_data->restart_wait != 0) + panic("Programming error: Flag to only run syscalls in child was not cleared!"); - if (put_fp_registers(pid, regs->fp)) { - printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n", - __func__, errno); - fatal_sigsegv(); - } + /* Mark pending syscalls for flushing */ + proc_data->syscall_data_len = mm_id->syscall_data_len; - if (singlestepping()) - op = PTRACE_SYSEMU_SINGLESTEP; - else - op = PTRACE_SYSEMU; + wait_stub_done_seccomp(mm_id, 0, 0); - if (ptrace(op, pid, 0, 0)) { - printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n", - __func__, op, errno); - fatal_sigsegv(); - } + sig = proc_data->signal; - CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); - if (err < 0) { - printk(UM_KERN_ERR "%s - wait failed, errno = %d\n", - __func__, errno); - fatal_sigsegv(); - } + if (sig == SIGTRAP && proc_data->err != 0) { + printk(UM_KERN_ERR "%s - Error flushing stub syscalls", + __func__); + syscall_stub_dump_error(mm_id); + mm_id->syscall_data_len = proc_data->err; + fatal_sigsegv(); + } - regs->is_user = 1; - if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) { - printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n", - __func__, errno); - fatal_sigsegv(); - } + mm_id->syscall_data_len = 0; + mm_id->syscall_fd_num = 0; - if (get_fp_registers(pid, regs->fp)) { - printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n", - __func__, errno); - fatal_sigsegv(); - } + ret = get_stub_state(regs, proc_data, NULL); + if (ret) { + printk(UM_KERN_ERR "%s - failed to get regs: %d", + __func__, ret); + fatal_sigsegv(); + } - UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ + if (proc_data->si_offset > sizeof(proc_data->sigstack) - sizeof(*si)) + panic("%s - Invalid siginfo offset from child", + __func__); + si = (void *)&proc_data->sigstack[proc_data->si_offset]; + + regs->is_user = 1; + + /* Fill in ORIG_RAX and extract fault information */ + PT_SYSCALL_NR(regs->gp) = si->si_syscall; + if (sig == SIGSEGV) { + mcontext_t *mcontext = (void *)&proc_data->sigstack[proc_data->mctx_offset]; - if (WIFSTOPPED(status)) { - int sig = WSTOPSIG(status); + GET_FAULTINFO_FROM_MC(regs->faultinfo, mcontext); + } + } else { + /* Flush out any pending syscalls */ + err = syscall_stub_flush(current_mm_id()); + if (err) { + if (err == -ENOMEM) + report_enomem(); + + printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d", + __func__, -err); + fatal_sigsegv(); + } - /* These signal handlers need the si argument. - * The SIGIO and SIGALARM handlers which constitute the - * majority of invocations, do not use it. + /* + * This can legitimately fail if the process loads a + * bogus value into a segment register. It will + * segfault and PTRACE_GETREGS will read that value + * out of the process. However, PTRACE_SETREGS will + * fail. In this case, there is nothing to do but + * just kill the process. */ - switch (sig) { - case SIGSEGV: - case SIGTRAP: - case SIGILL: - case SIGBUS: - case SIGFPE: - case SIGWINCH: - ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si); - break; + if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) { + printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); } - switch (sig) { - case SIGSEGV: - if (PTRACE_FULL_FAULTINFO) { + if (put_fp_registers(pid, regs->fp)) { + printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); + } + + if (singlestepping()) + op = PTRACE_SYSEMU_SINGLESTEP; + else + op = PTRACE_SYSEMU; + + if (ptrace(op, pid, 0, 0)) { + printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n", + __func__, op, errno); + fatal_sigsegv(); + } + + CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); + if (err < 0) { + printk(UM_KERN_ERR "%s - wait failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); + } + + regs->is_user = 1; + if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) { + printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); + } + + if (get_fp_registers(pid, regs->fp)) { + printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); + } + + if (WIFSTOPPED(status)) { + sig = WSTOPSIG(status); + + /* + * These signal handlers need the si argument + * and SIGSEGV needs the faultinfo. + * The SIGIO and SIGALARM handlers which constitute + * the majority of invocations, do not use it. + */ + switch (sig) { + case SIGSEGV: get_skas_faultinfo(pid, ®s->faultinfo); - (*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si, - regs, NULL); + fallthrough; + case SIGTRAP: + case SIGILL: + case SIGBUS: + case SIGFPE: + case SIGWINCH: + ptrace(PTRACE_GETSIGINFO, pid, 0, + (struct siginfo *)&si_ptrace); + si = &si_ptrace; + break; + default: + si = NULL; + break; } - else handle_segv(pid, regs); + } else { + sig = 0; + } + } + + UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ + + if (sig) { + switch (sig) { + case SIGSEGV: + if (using_seccomp || PTRACE_FULL_FAULTINFO) + (*sig_info[SIGSEGV])(SIGSEGV, + (struct siginfo *)si, + regs, NULL); + else + segv(regs->faultinfo, 0, 1, NULL, NULL); + + break; + case SIGSYS: + handle_syscall(regs); break; case SIGTRAP + 0x80: handle_trap(pid, regs); break; case SIGTRAP: - relay_signal(SIGTRAP, (struct siginfo *)&si, regs, NULL); + relay_signal(SIGTRAP, (struct siginfo *)si, regs, NULL); break; case SIGALRM: break; @@ -543,7 +769,7 @@ void userspace(struct uml_pt_regs *regs) case SIGFPE: case SIGWINCH: block_signals_trace(); - (*sig_info[sig])(sig, (struct siginfo *)&si, regs, NULL); + (*sig_info[sig])(sig, (struct siginfo *)si, regs, NULL); unblock_signals_trace(); break; default: diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 93fc82c01aba..a827c2e01aa5 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* + * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net> * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) */ @@ -24,6 +25,13 @@ #include <kern_util.h> #include <mem_user.h> #include <ptrace_user.h> +#include <stdbool.h> +#include <stub-data.h> +#include <sys/prctl.h> +#include <linux/seccomp.h> +#include <linux/filter.h> +#include <sysdep/mcontext.h> +#include <sysdep/stub.h> #include <registers.h> #include <skas.h> #include "internal.h" @@ -224,6 +232,140 @@ static void __init check_ptrace(void) check_sysemu(); } +extern unsigned long host_fp_size; +extern unsigned long exec_regs[MAX_REG_NR]; +extern unsigned long *exec_fp_regs; + +__initdata static struct stub_data *seccomp_test_stub_data; + +static void __init sigsys_handler(int sig, siginfo_t *info, void *p) +{ + ucontext_t *uc = p; + + /* Stow away the location of the mcontext in the stack */ + seccomp_test_stub_data->mctx_offset = (unsigned long)&uc->uc_mcontext - + (unsigned long)&seccomp_test_stub_data->sigstack[0]; + + /* Prevent libc from clearing memory (mctx_offset in particular) */ + syscall(__NR_exit, 0); +} + +static int __init seccomp_helper(void *data) +{ + static struct sock_filter filter[] = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_clock_nanosleep, 1, 0), + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP), + }; + static struct sock_fprog prog = { + .len = ARRAY_SIZE(filter), + .filter = filter, + }; + struct sigaction sa; + + /* close_range is needed for the stub */ + if (stub_syscall3(__NR_close_range, 1, ~0U, 0)) + exit(1); + + set_sigstack(seccomp_test_stub_data->sigstack, + sizeof(seccomp_test_stub_data->sigstack)); + + sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO; + sa.sa_sigaction = (void *) sigsys_handler; + sa.sa_restorer = NULL; + if (sigaction(SIGSYS, &sa, NULL) < 0) + exit(2); + + prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + if (syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, + SECCOMP_FILTER_FLAG_TSYNC, &prog) != 0) + exit(3); + + sleep(0); + + /* Never reached. */ + _exit(4); +} + +static bool __init init_seccomp(void) +{ + int pid; + int status; + int n; + unsigned long sp; + + /* + * We check that we can install a seccomp filter and then exit(0) + * from a trapped syscall. + * + * Note that we cannot verify that no seccomp filter already exists + * for a syscall that results in the process/thread to be killed. + */ + + os_info("Checking that seccomp filters can be installed..."); + + seccomp_test_stub_data = mmap(0, sizeof(*seccomp_test_stub_data), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANON, 0, 0); + + /* Use the syscall data area as stack, we just need something */ + sp = (unsigned long)&seccomp_test_stub_data->syscall_data + + sizeof(seccomp_test_stub_data->syscall_data) - + sizeof(void *); + pid = clone(seccomp_helper, (void *)sp, CLONE_VFORK | CLONE_VM, NULL); + + if (pid < 0) + fatal_perror("check_seccomp : clone failed"); + + CATCH_EINTR(n = waitpid(pid, &status, __WCLONE)); + if (n < 0) + fatal_perror("check_seccomp : waitpid failed"); + + if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { + struct uml_pt_regs *regs; + unsigned long fp_size; + int r; + + /* Fill in the host_fp_size from the mcontext. */ + regs = calloc(1, sizeof(struct uml_pt_regs)); + get_stub_state(regs, seccomp_test_stub_data, &fp_size); + host_fp_size = fp_size; + free(regs); + + /* Repeat with the correct size */ + regs = calloc(1, sizeof(struct uml_pt_regs) + host_fp_size); + r = get_stub_state(regs, seccomp_test_stub_data, NULL); + + /* Store as the default startup registers */ + exec_fp_regs = malloc(host_fp_size); + memcpy(exec_regs, regs->gp, sizeof(exec_regs)); + memcpy(exec_fp_regs, regs->fp, host_fp_size); + + munmap(seccomp_test_stub_data, sizeof(*seccomp_test_stub_data)); + + free(regs); + + if (r) { + os_info("failed to fetch registers: %d\n", r); + return false; + } + + os_info("OK\n"); + return true; + } + + if (WIFEXITED(status) && WEXITSTATUS(status) == 2) + os_info("missing\n"); + else + os_info("error\n"); + + munmap(seccomp_test_stub_data, sizeof(*seccomp_test_stub_data)); + return false; +} + + static void __init check_coredump_limit(void) { struct rlimit lim; @@ -278,6 +420,44 @@ void __init get_host_cpu_features( } } +static int seccomp_config __initdata; + +static int __init uml_seccomp_config(char *line, int *add) +{ + *add = 0; + + if (strcmp(line, "off") == 0) + seccomp_config = 0; + else if (strcmp(line, "auto") == 0) + seccomp_config = 1; + else if (strcmp(line, "on") == 0) + seccomp_config = 2; + else + fatal("Invalid seccomp option '%s', expected on/auto/off\n", + line); + + return 0; +} + +__uml_setup("seccomp=", uml_seccomp_config, +"seccomp=<on/auto/off>\n" +" Configure whether or not SECCOMP is used. With SECCOMP, userspace\n" +" processes work collaboratively with the kernel instead of being\n" +" traced using ptrace. All syscalls from the application are caught and\n" +" redirected using a signal. This signal handler in turn is permitted to\n" +" do the selected set of syscalls to communicate with the UML kernel and\n" +" do the required memory management.\n" +"\n" +" This method is overall faster than the ptrace based userspace, primarily\n" +" because it reduces the number of context switches for (minor) page faults.\n" +"\n" +" However, the SECCOMP filter is not (yet) restrictive enough to prevent\n" +" userspace from reading and writing all physical memory. Userspace\n" +" processes could also trick the stub into disabling SIGALRM which\n" +" prevents it from being interrupted for scheduling purposes.\n" +"\n" +" This is insecure and should only be used with a trusted userspace\n\n" +); void __init os_early_checks(void) { @@ -286,13 +466,24 @@ void __init os_early_checks(void) /* Print out the core dump limits early */ check_coredump_limit(); - check_ptrace(); - /* Need to check this early because mmapping happens before the * kernel is running. */ check_tmpexec(); + if (seccomp_config) { + if (init_seccomp()) { + using_seccomp = 1; + return; + } + + if (seccomp_config == 2) + fatal("SECCOMP userspace requested but not functional!\n"); + } + + using_seccomp = 0; + check_ptrace(); + pid = start_ptraced_child(); if (init_pid_registers(pid)) fatal("Failed to initialize default registers"); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 340e5468980e..8bed9030ad47 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -89,7 +89,7 @@ config X86 select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN select ARCH_HAS_EARLY_DEBUG if KGDB select ARCH_HAS_ELF_RANDOMIZE - select ARCH_HAS_EXECMEM_ROX if X86_64 + select ARCH_HAS_EXECMEM_ROX if X86_64 && STRICT_MODULE_RWX select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL @@ -147,7 +147,7 @@ config X86 select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANTS_NO_INSTR select ARCH_WANT_GENERAL_HUGETLB - select ARCH_WANT_HUGE_PMD_SHARE + select ARCH_WANT_HUGE_PMD_SHARE if X86_64 select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP if X86_64 select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP if X86_64 @@ -2695,6 +2695,15 @@ config MITIGATION_ITS disabled, mitigation cannot be enabled via cmdline. See <file:Documentation/admin-guide/hw-vuln/indirect-target-selection.rst> +config MITIGATION_TSA + bool "Mitigate Transient Scheduler Attacks" + depends on CPU_SUP_AMD + default y + help + Enable mitigation for Transient Scheduler Attacks. TSA is a hardware + security vulnerability on AMD CPUs which can lead to forwarding of + invalid info to subsequent instructions and thus can affect their + timing and thereby cause a leakage. endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/coco/sev/Makefile b/arch/x86/coco/sev/Makefile index db3255b979bd..342d79f0ab6a 100644 --- a/arch/x86/coco/sev/Makefile +++ b/arch/x86/coco/sev/Makefile @@ -5,5 +5,6 @@ obj-y += core.o sev-nmi.o vc-handle.o # Clang 14 and older may fail to respect __no_sanitize_undefined when inlining UBSAN_SANITIZE_sev-nmi.o := n -# GCC may fail to respect __no_sanitize_address when inlining +# GCC may fail to respect __no_sanitize_address or __no_kcsan when inlining KASAN_SANITIZE_sev-nmi.o := n +KCSAN_SANITIZE_sev-nmi.o := n diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index b6db4e0b936b..7543a8b52c67 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -88,7 +88,7 @@ static const char * const sev_status_feat_names[] = { */ static u64 snp_tsc_scale __ro_after_init; static u64 snp_tsc_offset __ro_after_init; -static u64 snp_tsc_freq_khz __ro_after_init; +static unsigned long snp_tsc_freq_khz __ro_after_init; DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); @@ -2167,15 +2167,31 @@ static unsigned long securetsc_get_tsc_khz(void) void __init snp_secure_tsc_init(void) { - unsigned long long tsc_freq_mhz; + struct snp_secrets_page *secrets; + unsigned long tsc_freq_mhz; + void *mem; if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) return; + mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE); + if (!mem) { + pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n"); + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC); + } + + secrets = (__force struct snp_secrets_page *)mem; + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz); - snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000); + + /* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */ + tsc_freq_mhz &= GENMASK_ULL(17, 0); + + snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor); x86_platform.calibrate_cpu = securetsc_get_tsc_khz; x86_platform.calibrate_tsc = securetsc_get_tsc_khz; + + early_memunmap(mem, PAGE_SIZE); } diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index 175958b02f2b..8e9a0cc20a4a 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -36,20 +36,20 @@ EXPORT_SYMBOL_GPL(write_ibpb); /* * Define the VERW operand that is disguised as entry code so that - * it can be referenced with KPTI enabled. This ensure VERW can be + * it can be referenced with KPTI enabled. This ensures VERW can be * used late in exit-to-user path after page tables are switched. */ .pushsection .entry.text, "ax" .align L1_CACHE_BYTES, 0xcc -SYM_CODE_START_NOALIGN(mds_verw_sel) +SYM_CODE_START_NOALIGN(x86_verw_sel) UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR .word __KERNEL_DS .align L1_CACHE_BYTES, 0xcc -SYM_CODE_END(mds_verw_sel); +SYM_CODE_END(x86_verw_sel); /* For KVM */ -EXPORT_SYMBOL_GPL(mds_verw_sel); +EXPORT_SYMBOL_GPL(x86_verw_sel); .popsection diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 466283326630..c2fb729c270e 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2826,7 +2826,7 @@ static void intel_pmu_read_event(struct perf_event *event) * If the PEBS counters snapshotting is enabled, * the topdown event is available in PEBS records. */ - if (is_topdown_event(event) && !is_pebs_counter_event_group(event)) + if (is_topdown_count(event) && !is_pebs_counter_event_group(event)) static_call(intel_pmu_update_topdown_event)(event, NULL); else intel_pmu_drain_pebs_buffer(); @@ -2900,6 +2900,7 @@ static void intel_pmu_config_acr(int idx, u64 mask, u32 reload) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int msr_b, msr_c; + int msr_offset; if (!mask && !cpuc->acr_cfg_b[idx]) return; @@ -2907,19 +2908,20 @@ static void intel_pmu_config_acr(int idx, u64 mask, u32 reload) if (idx < INTEL_PMC_IDX_FIXED) { msr_b = MSR_IA32_PMC_V6_GP0_CFG_B; msr_c = MSR_IA32_PMC_V6_GP0_CFG_C; + msr_offset = x86_pmu.addr_offset(idx, false); } else { msr_b = MSR_IA32_PMC_V6_FX0_CFG_B; msr_c = MSR_IA32_PMC_V6_FX0_CFG_C; - idx -= INTEL_PMC_IDX_FIXED; + msr_offset = x86_pmu.addr_offset(idx - INTEL_PMC_IDX_FIXED, false); } if (cpuc->acr_cfg_b[idx] != mask) { - wrmsrl(msr_b + x86_pmu.addr_offset(idx, false), mask); + wrmsrl(msr_b + msr_offset, mask); cpuc->acr_cfg_b[idx] = mask; } /* Only need to update the reload value when there is a valid config value. */ if (mask && cpuc->acr_cfg_c[idx] != reload) { - wrmsrl(msr_c + x86_pmu.addr_offset(idx, false), reload); + wrmsrl(msr_c + msr_offset, reload); cpuc->acr_cfg_c[idx] = reload; } } diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 3d1d3547095a..afdbda2dd7b7 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -34,6 +34,7 @@ #include <linux/syscore_ops.h> #include <clocksource/hyperv_timer.h> #include <linux/highmem.h> +#include <linux/export.h> void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c index 31f0d29cbc5e..090f5ac9f492 100644 --- a/arch/x86/hyperv/irqdomain.c +++ b/arch/x86/hyperv/irqdomain.c @@ -10,6 +10,7 @@ #include <linux/pci.h> #include <linux/irq.h> +#include <linux/export.h> #include <asm/mshyperv.h> static int hv_map_interrupt(union hv_device_id device_id, bool level, @@ -46,7 +47,7 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level, if (nr_bank < 0) { local_irq_restore(flags); pr_err("%s: unable to generate VP set\n", __func__); - return EINVAL; + return -EINVAL; } intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; @@ -66,7 +67,7 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level, if (!hv_result_success(status)) hv_status_err(status, "\n"); - return hv_result(status); + return hv_result_to_errno(status); } static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry) @@ -88,7 +89,10 @@ static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry) status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL); local_irq_restore(flags); - return hv_result(status); + if (!hv_result_success(status)) + hv_status_err(status, "\n"); + + return hv_result_to_errno(status); } #ifdef CONFIG_PCI_MSI @@ -169,13 +173,34 @@ static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev) return dev_id; } -static int hv_map_msi_interrupt(struct pci_dev *dev, int cpu, int vector, - struct hv_interrupt_entry *entry) +/** + * hv_map_msi_interrupt() - "Map" the MSI IRQ in the hypervisor. + * @data: Describes the IRQ + * @out_entry: Hypervisor (MSI) interrupt entry (can be NULL) + * + * Map the IRQ in the hypervisor by issuing a MAP_DEVICE_INTERRUPT hypercall. + * + * Return: 0 on success, -errno on failure + */ +int hv_map_msi_interrupt(struct irq_data *data, + struct hv_interrupt_entry *out_entry) { - union hv_device_id device_id = hv_build_pci_dev_id(dev); + struct irq_cfg *cfg = irqd_cfg(data); + struct hv_interrupt_entry dummy; + union hv_device_id device_id; + struct msi_desc *msidesc; + struct pci_dev *dev; + int cpu; - return hv_map_interrupt(device_id, false, cpu, vector, entry); + msidesc = irq_data_get_msi_desc(data); + dev = msi_desc_to_pci_dev(msidesc); + device_id = hv_build_pci_dev_id(dev); + cpu = cpumask_first(irq_data_get_effective_affinity_mask(data)); + + return hv_map_interrupt(device_id, false, cpu, cfg->vector, + out_entry ? out_entry : &dummy); } +EXPORT_SYMBOL_GPL(hv_map_msi_interrupt); static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg) { @@ -188,13 +213,11 @@ static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry); static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { + struct hv_interrupt_entry *stored_entry; + struct irq_cfg *cfg = irqd_cfg(data); struct msi_desc *msidesc; struct pci_dev *dev; - struct hv_interrupt_entry out_entry, *stored_entry; - struct irq_cfg *cfg = irqd_cfg(data); - const cpumask_t *affinity; - int cpu; - u64 status; + int ret; msidesc = irq_data_get_msi_desc(data); dev = msi_desc_to_pci_dev(msidesc); @@ -204,9 +227,6 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) return; } - affinity = irq_data_get_effective_affinity_mask(data); - cpu = cpumask_first_and(affinity, cpu_online_mask); - if (data->chip_data) { /* * This interrupt is already mapped. Let's unmap first. @@ -219,14 +239,12 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) stored_entry = data->chip_data; data->chip_data = NULL; - status = hv_unmap_msi_interrupt(dev, stored_entry); + ret = hv_unmap_msi_interrupt(dev, stored_entry); kfree(stored_entry); - if (status != HV_STATUS_SUCCESS) { - hv_status_debug(status, "failed to unmap\n"); + if (ret) return; - } } stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC); @@ -235,15 +253,14 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) return; } - status = hv_map_msi_interrupt(dev, cpu, cfg->vector, &out_entry); - if (status != HV_STATUS_SUCCESS) { + ret = hv_map_msi_interrupt(data, stored_entry); + if (ret) { kfree(stored_entry); return; } - *stored_entry = out_entry; data->chip_data = stored_entry; - entry_to_msi_msg(&out_entry, msg); + entry_to_msi_msg(data->chip_data, msg); return; } @@ -257,7 +274,6 @@ static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd) { struct hv_interrupt_entry old_entry; struct msi_msg msg; - u64 status; if (!irqd->chip_data) { pr_debug("%s: no chip data\n!", __func__); @@ -270,10 +286,7 @@ static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd) kfree(irqd->chip_data); irqd->chip_data = NULL; - status = hv_unmap_msi_interrupt(dev, &old_entry); - - if (status != HV_STATUS_SUCCESS) - hv_status_err(status, "\n"); + (void)hv_unmap_msi_interrupt(dev, &old_entry); } static void hv_msi_free_irq(struct irq_domain *domain, diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index e93a2f488ff7..ade6c665c97e 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -10,6 +10,7 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/cpu.h> +#include <linux/export.h> #include <asm/svm.h> #include <asm/sev.h> #include <asm/io.h> diff --git a/arch/x86/hyperv/nested.c b/arch/x86/hyperv/nested.c index 1083dc8646f9..8ccbb7c4fc27 100644 --- a/arch/x86/hyperv/nested.c +++ b/arch/x86/hyperv/nested.c @@ -11,6 +11,7 @@ #include <linux/types.h> +#include <linux/export.h> #include <hyperv/hvhdk.h> #include <asm/mshyperv.h> #include <asm/tlbflush.h> diff --git a/arch/x86/include/asm/amd/fch.h b/arch/x86/include/asm/amd/fch.h deleted file mode 100644 index 2cf5153edbc2..000000000000 --- a/arch/x86/include/asm/amd/fch.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_AMD_FCH_H_ -#define _ASM_X86_AMD_FCH_H_ - -#define FCH_PM_BASE 0xFED80300 - -/* Register offsets from PM base: */ -#define FCH_PM_DECODEEN 0x00 -#define FCH_PM_DECODEEN_SMBUS0SEL GENMASK(20, 19) -#define FCH_PM_SCRATCH 0x80 -#define FCH_PM_S5_RESET_STATUS 0xC0 - -#endif /* _ASM_X86_AMD_FCH_H_ */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ee176236c2be..286d509f9363 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -456,6 +456,7 @@ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ #define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ #define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* The memory form of VERW mitigates TSA */ #define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */ #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ #define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ @@ -487,6 +488,9 @@ #define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ #define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */ #define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */ +#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */ +#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */ +#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ /* * BUG word(s) @@ -542,5 +546,5 @@ #define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */ #define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ #define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ - +#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 363110e6b2e3..a2c1f2d24b64 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -9,6 +9,14 @@ #include <asm/cpufeature.h> #include <asm/msr.h> +/* + * Define bits that are always set to 1 in DR7, only bit 10 is + * architecturally reserved to '1'. + * + * This is also the init/reset value for DR7. + */ +#define DR7_FIXED_1 0x00000400 + DECLARE_PER_CPU(unsigned long, cpu_dr7); #ifndef CONFIG_PARAVIRT_XXL @@ -100,8 +108,8 @@ static __always_inline void native_set_debugreg(int regno, unsigned long value) static inline void hw_breakpoint_disable(void) { - /* Zero the control register for HW Breakpoint */ - set_debugreg(0UL, 7); + /* Reset the control register for HW Breakpoint */ + set_debugreg(DR7_FIXED_1, 7); /* Zero-out the individual HW breakpoint address registers */ set_debugreg(0UL, 0); @@ -125,9 +133,12 @@ static __always_inline unsigned long local_db_save(void) return 0; get_debugreg(dr7, 7); - dr7 &= ~0x400; /* architecturally set bit */ + + /* Architecturally set bit */ + dr7 &= ~DR7_FIXED_1; if (dr7) - set_debugreg(0, 7); + set_debugreg(DR7_FIXED_1, 7); + /* * Ensure the compiler doesn't lower the above statements into * the critical section; disabling breakpoints late would not diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 9a9b21b78905..b30e5474c18e 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -44,13 +44,13 @@ static __always_inline void native_irq_enable(void) static __always_inline void native_safe_halt(void) { - mds_idle_clear_cpu_buffers(); + x86_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } static __always_inline void native_halt(void) { - mds_idle_clear_cpu_buffers(); + x86_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b4a391929cdb..f7af967aa16f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -31,6 +31,7 @@ #include <asm/apic.h> #include <asm/pvclock-abi.h> +#include <asm/debugreg.h> #include <asm/desc.h> #include <asm/mtrr.h> #include <asm/msr-index.h> @@ -249,7 +250,6 @@ enum x86_intercept_stage; #define DR7_BP_EN_MASK 0x000000ff #define DR7_GE (1 << 9) #define DR7_GD (1 << 13) -#define DR7_FIXED_1 0x00000400 #define DR7_VOLATILE 0xffff2bff #define KVM_GUESTDBG_VALID_MASK \ @@ -700,8 +700,13 @@ struct kvm_vcpu_hv { struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS]; - /* Preallocated buffer for handling hypercalls passing sparse vCPU set */ + /* + * Preallocated buffers for handling hypercalls that pass sparse vCPU + * sets (for high vCPU counts, they're too large to comfortably fit on + * the stack). + */ u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS]; + DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); struct hv_vp_assist_page vp_assist_page; @@ -764,6 +769,7 @@ enum kvm_only_cpuid_leafs { CPUID_8000_0022_EAX, CPUID_7_2_EDX, CPUID_24_0_EBX, + CPUID_8000_0021_ECX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index e988bac0a4a1..3c2de4ce3b10 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h @@ -5,12 +5,20 @@ #include <asm-generic/module.h> #include <asm/orc_types.h> +struct its_array { +#ifdef CONFIG_MITIGATION_ITS + void **pages; + int num; +#endif +}; + struct mod_arch_specific { #ifdef CONFIG_UNWINDER_ORC unsigned int num_orcs; int *orc_unwind_ip; struct orc_entry *orc_unwind; #endif + struct its_array its_pages; }; #endif /* _ASM_X86_MODULE_H */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index e1752ba47e67..abc4659f5809 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -112,12 +112,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) return hv_status; } -/* Hypercall to the L0 hypervisor */ -static inline u64 hv_do_nested_hypercall(u64 control, void *input, void *output) -{ - return hv_do_hypercall(control | HV_HYPERCALL_NESTED, input, output); -} - /* Fast hypercall with 8 bytes of input and no output */ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1) { @@ -165,13 +159,6 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) return _hv_do_fast_hypercall8(control, input1); } -static inline u64 hv_do_fast_nested_hypercall8(u16 code, u64 input1) -{ - u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED; - - return _hv_do_fast_hypercall8(control, input1); -} - /* Fast hypercall with 16 bytes of input */ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2) { @@ -223,13 +210,6 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) return _hv_do_fast_hypercall16(control, input1, input2); } -static inline u64 hv_do_fast_nested_hypercall16(u16 code, u64 input1, u64 input2) -{ - u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED; - - return _hv_do_fast_hypercall16(control, input1, input2); -} - extern struct hv_vp_assist_page **hv_vp_assist_page; static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) @@ -262,6 +242,8 @@ static inline void hv_apic_init(void) {} struct irq_domain *hv_create_pci_msi_domain(void); +int hv_map_msi_interrupt(struct irq_data *data, + struct hv_interrupt_entry *out_entry); int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector, struct hv_interrupt_entry *entry); int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b7dded3c8113..5cfb5d74dd5f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -628,6 +628,7 @@ #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 #define MSR_AMD_PPIN 0xc00102f1 +#define MSR_AMD64_CPUID_FN_7 0xc0011002 #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index dd2b129b0418..6ca6516c7492 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -43,8 +43,6 @@ static __always_inline void __monitorx(const void *eax, u32 ecx, u32 edx) static __always_inline void __mwait(u32 eax, u32 ecx) { - mds_idle_clear_cpu_buffers(); - /* * Use the instruction mnemonic with implicit operands, as the LLVM * assembler fails to assemble the mnemonic with explicit operands: @@ -80,7 +78,7 @@ static __always_inline void __mwait(u32 eax, u32 ecx) */ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx) { - /* No MDS buffer clear as this is AMD/HYGON only */ + /* No need for TSA buffer clearing on AMD */ /* "mwaitx %eax, %ebx, %ecx" */ asm volatile(".byte 0x0f, 0x01, 0xfb" @@ -98,7 +96,6 @@ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx) */ static __always_inline void __sti_mwait(u32 eax, u32 ecx) { - mds_idle_clear_cpu_buffers(); asm volatile("sti; mwait" :: "a" (eax), "c" (ecx)); } @@ -115,21 +112,29 @@ static __always_inline void __sti_mwait(u32 eax, u32 ecx) */ static __always_inline void mwait_idle_with_hints(u32 eax, u32 ecx) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) { const void *addr = ¤t_thread_info()->flags; alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); __monitor(addr, 0, 0); - if (!need_resched()) { - if (ecx & 1) { - __mwait(eax, ecx); - } else { - __sti_mwait(eax, ecx); - raw_local_irq_disable(); - } + if (need_resched()) + goto out; + + if (ecx & 1) { + __mwait(eax, ecx); + } else { + __sti_mwait(eax, ecx); + raw_local_irq_disable(); } } + +out: current_clr_polling(); } diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 20d754b98f3f..10f261678749 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -302,25 +302,31 @@ .endm /* - * Macro to execute VERW instruction that mitigate transient data sampling - * attacks such as MDS. On affected systems a microcode update overloaded VERW - * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. - * + * Macro to execute VERW insns that mitigate transient data sampling + * attacks such as MDS or TSA. On affected systems a microcode update + * overloaded VERW insns to also clear the CPU buffers. VERW clobbers + * CFLAGS.ZF. * Note: Only the memory operand variant of VERW clears the CPU buffers. */ -.macro CLEAR_CPU_BUFFERS +.macro __CLEAR_CPU_BUFFERS feature #ifdef CONFIG_X86_64 - ALTERNATIVE "", "verw mds_verw_sel(%rip)", X86_FEATURE_CLEAR_CPU_BUF + ALTERNATIVE "", "verw x86_verw_sel(%rip)", \feature #else /* * In 32bit mode, the memory operand must be a %cs reference. The data * segments may not be usable (vm86 mode), and the stack segment may not * be flat (ESPFIX32). */ - ALTERNATIVE "", "verw %cs:mds_verw_sel", X86_FEATURE_CLEAR_CPU_BUF + ALTERNATIVE "", "verw %cs:x86_verw_sel", \feature #endif .endm +#define CLEAR_CPU_BUFFERS \ + __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF + +#define VM_CLEAR_CPU_BUFFERS \ + __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF_VM + #ifdef CONFIG_X86_64 .macro CLEAR_BRANCH_HISTORY ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP @@ -567,24 +573,24 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DECLARE_STATIC_KEY_FALSE(switch_vcpu_ibpb); -DECLARE_STATIC_KEY_FALSE(mds_idle_clear); +DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DECLARE_STATIC_KEY_FALSE(cpu_buf_vm_clear); -extern u16 mds_verw_sel; +extern u16 x86_verw_sel; #include <asm/segment.h> /** - * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability + * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns * * This uses the otherwise unused and obsolete VERW instruction in * combination with microcode which triggers a CPU buffer flush when the * instruction is executed. */ -static __always_inline void mds_clear_cpu_buffers(void) +static __always_inline void x86_clear_cpu_buffers(void) { static const u16 ds = __KERNEL_DS; @@ -601,14 +607,15 @@ static __always_inline void mds_clear_cpu_buffers(void) } /** - * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability + * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS + * and TSA vulnerabilities. * * Clear CPU buffers if the corresponding static key is enabled */ -static __always_inline void mds_idle_clear_cpu_buffers(void) +static __always_inline void x86_idle_clear_cpu_buffers(void) { - if (static_branch_likely(&mds_idle_clear)) - mds_clear_cpu_buffers(); + if (static_branch_likely(&cpu_buf_idle_clear)) + x86_clear_cpu_buffers(); } #endif /* __ASSEMBLER__ */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 774430c3abff..97954c936c54 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1561,7 +1561,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte) return pte_set_flags(pte, _PAGE_SWP_EXCLUSIVE); } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_flags(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 58e028d42e41..a631f7d7c0c0 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -223,6 +223,18 @@ struct snp_tsc_info_resp { u8 rsvd2[100]; } __packed; +/* + * Obtain the mean TSC frequency by decreasing the nominal TSC frequency with + * TSC_FACTOR as documented in the SNP Firmware ABI specification: + * + * GUEST_TSC_FREQ * (1 - (TSC_FACTOR * 0.00001)) + * + * which is equivalent to: + * + * GUEST_TSC_FREQ -= (GUEST_TSC_FREQ * TSC_FACTOR) / 100000; + */ +#define SNP_SCALE_TSC_FREQ(freq, factor) ((freq) - (freq) * (factor) / 100000) + struct snp_guest_req { void *req_buf; size_t req_sz; @@ -282,8 +294,11 @@ struct snp_secrets_page { u8 svsm_guest_vmpl; u8 rsvd3[3]; + /* The percentage decrease from nominal to mean TSC frequency. */ + u32 tsc_factor; + /* Remainder of page */ - u8 rsvd4[3744]; + u8 rsvd4[3740]; } __packed; struct snp_msg_desc { diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index 2f3820342598..8bc074c8d7c6 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -72,6 +72,7 @@ #define TDVMCALL_MAP_GPA 0x10001 #define TDVMCALL_GET_QUOTE 0x10002 #define TDVMCALL_REPORT_FATAL_ERROR 0x10003 +#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004ULL /* * TDG.VP.VMCALL Status Codes (returned in R10) @@ -80,6 +81,7 @@ #define TDVMCALL_STATUS_RETRY 0x0000000000000001ULL #define TDVMCALL_STATUS_INVALID_OPERAND 0x8000000000000000ULL #define TDVMCALL_STATUS_ALIGN_ERROR 0x8000000000000002ULL +#define TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED 0x8000000000000003ULL /* * Bitmasks of exposed registers (with VMM). diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index e770c4fc47f4..8727c7e21dd1 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h @@ -24,4 +24,26 @@ int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); +/* + * To prevent immediate repeat of single step trap on return from SIGTRAP + * handler if the trap flag (TF) is set without an external debugger attached, + * clear the software event flag in the augmented SS, ensuring no single-step + * trap is pending upon ERETU completion. + * + * Note, this function should be called in sigreturn() before the original + * state is restored to make sure the TF is read from the entry frame. + */ +static __always_inline void prevent_single_step_upon_eretu(struct pt_regs *regs) +{ + /* + * If the trap flag (TF) is set, i.e., the sigreturn() SYSCALL instruction + * is being single-stepped, do not clear the software event flag in the + * augmented SS, thus a debugger won't skip over the following instruction. + */ +#ifdef CONFIG_X86_FRED + if (!(regs->flags & X86_EFLAGS_TF)) + regs->fred_ss.swevent = 0; +#endif +} + #endif /* _ASM_X86_SIGHANDLING_H */ diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 8b19294600c4..7ddef3a69866 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -106,7 +106,7 @@ void tdx_init(void); typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args); -static inline u64 sc_retry(sc_func_t func, u64 fn, +static __always_inline u64 sc_retry(sc_func_t func, u64 fn, struct tdx_module_args *args) { int retry = RDRAND_RETRY_LOOPS; diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h index 0454d5e60e5d..721b408d9a67 100644 --- a/arch/x86/include/asm/trace/fpu.h +++ b/arch/x86/include/asm/trace/fpu.h @@ -44,16 +44,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_after_save, TP_ARGS(fpu) ); -DEFINE_EVENT(x86_fpu, x86_fpu_before_restore, - TP_PROTO(struct fpu *fpu), - TP_ARGS(fpu) -); - -DEFINE_EVENT(x86_fpu, x86_fpu_after_restore, - TP_PROTO(struct fpu *fpu), - TP_ARGS(fpu) -); - DEFINE_EVENT(x86_fpu, x86_fpu_regs_activated, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) @@ -64,11 +54,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_regs_deactivated, TP_ARGS(fpu) ); -DEFINE_EVENT(x86_fpu, x86_fpu_init_state, - TP_PROTO(struct fpu *fpu), - TP_ARGS(fpu) -); - DEFINE_EVENT(x86_fpu, x86_fpu_dropped, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h index 0007ba077c0c..41da492dfb01 100644 --- a/arch/x86/include/uapi/asm/debugreg.h +++ b/arch/x86/include/uapi/asm/debugreg.h @@ -15,7 +15,26 @@ which debugging register was responsible for the trap. The other bits are either reserved or not of interest to us. */ -/* Define reserved bits in DR6 which are always set to 1 */ +/* + * Define bits in DR6 which are set to 1 by default. + * + * This is also the DR6 architectural value following Power-up, Reset or INIT. + * + * Note, with the introduction of Bus Lock Detection (BLD) and Restricted + * Transactional Memory (RTM), the DR6 register has been modified: + * + * 1) BLD flag (bit 11) is no longer reserved to 1 if the CPU supports + * Bus Lock Detection. The assertion of a bus lock could clear it. + * + * 2) RTM flag (bit 16) is no longer reserved to 1 if the CPU supports + * restricted transactional memory. #DB occurred inside an RTM region + * could clear it. + * + * Apparently, DR6.BLD and DR6.RTM are active low bits. + * + * As a result, DR6_RESERVED is an incorrect name now, but it is kept for + * compatibility. + */ #define DR6_RESERVED (0xFFFF0FF0) #define DR_TRAP0 (0x1) /* db0 */ diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 6f3499507c5e..0f15d683817d 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -965,7 +965,13 @@ struct kvm_tdx_cmd { struct kvm_tdx_capabilities { __u64 supported_attrs; __u64 supported_xfam; - __u64 reserved[254]; + + __u64 kernel_tdvmcallinfo_1_r11; + __u64 user_tdvmcallinfo_1_r11; + __u64 kernel_tdvmcallinfo_1_r12; + __u64 user_tdvmcallinfo_1_r12; + + __u64 reserved[250]; /* Configurable CPUID bits for userspace */ struct kvm_cpuid2 cpuid; diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 99a783fd4691..0d2a6d953be9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -3,7 +3,7 @@ # Makefile for the linux kernel. # -extra-y += vmlinux.lds +always-$(KBUILD_BUILTIN) += vmlinux.lds CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ecfe7b497cad..ea1d984166cd 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -116,6 +116,24 @@ static struct module *its_mod; #endif static void *its_page; static unsigned int its_offset; +struct its_array its_pages; + +static void *__its_alloc(struct its_array *pages) +{ + void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE); + if (!page) + return NULL; + + void *tmp = krealloc(pages->pages, (pages->num+1) * sizeof(void *), + GFP_KERNEL); + if (!tmp) + return NULL; + + pages->pages = tmp; + pages->pages[pages->num++] = page; + + return no_free_ptr(page); +} /* Initialize a thunk with the "jmp *reg; int3" instructions. */ static void *its_init_thunk(void *thunk, int reg) @@ -151,6 +169,21 @@ static void *its_init_thunk(void *thunk, int reg) return thunk + offset; } +static void its_pages_protect(struct its_array *pages) +{ + for (int i = 0; i < pages->num; i++) { + void *page = pages->pages[i]; + execmem_restore_rox(page, PAGE_SIZE); + } +} + +static void its_fini_core(void) +{ + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) + its_pages_protect(&its_pages); + kfree(its_pages.pages); +} + #ifdef CONFIG_MODULES void its_init_mod(struct module *mod) { @@ -173,10 +206,8 @@ void its_fini_mod(struct module *mod) its_page = NULL; mutex_unlock(&text_mutex); - for (int i = 0; i < mod->its_num_pages; i++) { - void *page = mod->its_page_array[i]; - execmem_restore_rox(page, PAGE_SIZE); - } + if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) + its_pages_protect(&mod->arch.its_pages); } void its_free_mod(struct module *mod) @@ -184,37 +215,33 @@ void its_free_mod(struct module *mod) if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) return; - for (int i = 0; i < mod->its_num_pages; i++) { - void *page = mod->its_page_array[i]; + for (int i = 0; i < mod->arch.its_pages.num; i++) { + void *page = mod->arch.its_pages.pages[i]; execmem_free(page); } - kfree(mod->its_page_array); + kfree(mod->arch.its_pages.pages); } #endif /* CONFIG_MODULES */ static void *its_alloc(void) { - void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE); - - if (!page) - return NULL; + struct its_array *pages = &its_pages; + void *page; #ifdef CONFIG_MODULES - if (its_mod) { - void *tmp = krealloc(its_mod->its_page_array, - (its_mod->its_num_pages+1) * sizeof(void *), - GFP_KERNEL); - if (!tmp) - return NULL; + if (its_mod) + pages = &its_mod->arch.its_pages; +#endif - its_mod->its_page_array = tmp; - its_mod->its_page_array[its_mod->its_num_pages++] = page; + page = __its_alloc(pages); + if (!page) + return NULL; - execmem_make_temp_rw(page, PAGE_SIZE); - } -#endif /* CONFIG_MODULES */ + execmem_make_temp_rw(page, PAGE_SIZE); + if (pages == &its_pages) + set_memory_x((unsigned long)page, 1); - return no_free_ptr(page); + return page; } static void *its_allocate_thunk(int reg) @@ -268,7 +295,9 @@ u8 *its_static_thunk(int reg) return thunk; } -#endif +#else +static inline void its_fini_core(void) {} +#endif /* CONFIG_MITIGATION_ITS */ /* * Nomenclature for variable names to simplify and clarify this code and ease @@ -2338,6 +2367,8 @@ void __init alternative_instructions(void) apply_retpolines(__retpoline_sites, __retpoline_sites_end); apply_returns(__return_sites, __return_sites_end); + its_fini_core(); + /* * Adjust all CALL instructions to point to func()-10, including * those in .altinstr_replacement. @@ -3107,6 +3138,6 @@ void __ref smp_text_poke_batch_add(void *addr, const void *opcode, size_t len, c */ void __ref smp_text_poke_single(void *addr, const void *opcode, size_t len, const void *emulate) { - __smp_text_poke_batch_add(addr, opcode, len, emulate); + smp_text_poke_batch_add(addr, opcode, len, emulate); smp_text_poke_batch_finish(); } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 93da466dfe2c..329ee185d8cc 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -9,7 +9,7 @@ #include <linux/sched/clock.h> #include <linux/random.h> #include <linux/topology.h> -#include <asm/amd/fch.h> +#include <linux/platform_data/x86/amd-fch.h> #include <asm/processor.h> #include <asm/apic.h> #include <asm/cacheinfo.h> @@ -31,7 +31,7 @@ #include "cpu.h" -u16 invlpgb_count_max __ro_after_init; +u16 invlpgb_count_max __ro_after_init = 1; static inline int rdmsrq_amd_safe(unsigned msr, u64 *p) { @@ -377,6 +377,47 @@ static void bsp_determine_snp(struct cpuinfo_x86 *c) #endif } +#define ZEN_MODEL_STEP_UCODE(fam, model, step, ucode) \ + X86_MATCH_VFM_STEPS(VFM_MAKE(X86_VENDOR_AMD, fam, model), \ + step, step, ucode) + +static const struct x86_cpu_id amd_tsa_microcode[] = { + ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x1, 0x0a0011d7), + ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x2, 0x0a00123b), + ZEN_MODEL_STEP_UCODE(0x19, 0x08, 0x2, 0x0a00820d), + ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x1, 0x0a10114c), + ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x2, 0x0a10124c), + ZEN_MODEL_STEP_UCODE(0x19, 0x18, 0x1, 0x0a108109), + ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x0, 0x0a20102e), + ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x2, 0x0a201211), + ZEN_MODEL_STEP_UCODE(0x19, 0x44, 0x1, 0x0a404108), + ZEN_MODEL_STEP_UCODE(0x19, 0x50, 0x0, 0x0a500012), + ZEN_MODEL_STEP_UCODE(0x19, 0x61, 0x2, 0x0a60120a), + ZEN_MODEL_STEP_UCODE(0x19, 0x74, 0x1, 0x0a704108), + ZEN_MODEL_STEP_UCODE(0x19, 0x75, 0x2, 0x0a705208), + ZEN_MODEL_STEP_UCODE(0x19, 0x78, 0x0, 0x0a708008), + ZEN_MODEL_STEP_UCODE(0x19, 0x7c, 0x0, 0x0a70c008), + ZEN_MODEL_STEP_UCODE(0x19, 0xa0, 0x2, 0x0aa00216), + {}, +}; + +static void tsa_init(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return; + + if (cpu_has(c, X86_FEATURE_ZEN3) || + cpu_has(c, X86_FEATURE_ZEN4)) { + if (x86_match_min_microcode_rev(amd_tsa_microcode)) + setup_force_cpu_cap(X86_FEATURE_VERW_CLEAR); + else + pr_debug("%s: current revision: 0x%x\n", __func__, c->microcode); + } else { + setup_force_cpu_cap(X86_FEATURE_TSA_SQ_NO); + setup_force_cpu_cap(X86_FEATURE_TSA_L1_NO); + } +} + static void bsp_init_amd(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -489,6 +530,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) } bsp_determine_snp(c); + + tsa_init(c); + return; warn: @@ -930,6 +974,16 @@ static void init_amd_zen2(struct cpuinfo_x86 *c) init_spectral_chicken(c); fix_erratum_1386(c); zen2_zenbleed_check(c); + + /* Disable RDSEED on AMD Cyan Skillfish because of an error. */ + if (c->x86_model == 0x47 && c->x86_stepping == 0x0) { + clear_cpu_cap(c, X86_FEATURE_RDSEED); + msr_clear_bit(MSR_AMD64_CPUID_FN_7, 18); + pr_emerg("RDSEED is not reliable on this platform; disabling.\n"); + } + + /* Correct misconfigured CPUID on some clients. */ + clear_cpu_cap(c, X86_FEATURE_INVLPGB); } static void init_amd_zen3(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7f94e6a5497d..f4d3abb12317 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -94,6 +94,8 @@ static void __init bhi_apply_mitigation(void); static void __init its_select_mitigation(void); static void __init its_update_mitigation(void); static void __init its_apply_mitigation(void); +static void __init tsa_select_mitigation(void); +static void __init tsa_apply_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -169,9 +171,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DEFINE_STATIC_KEY_FALSE(switch_vcpu_ibpb); EXPORT_SYMBOL_GPL(switch_vcpu_ibpb); -/* Control MDS CPU buffer clear before idling (halt, mwait) */ -DEFINE_STATIC_KEY_FALSE(mds_idle_clear); -EXPORT_SYMBOL_GPL(mds_idle_clear); +/* Control CPU buffer clear before idling (halt, mwait) */ +DEFINE_STATIC_KEY_FALSE(cpu_buf_idle_clear); +EXPORT_SYMBOL_GPL(cpu_buf_idle_clear); /* * Controls whether l1d flush based mitigations are enabled, @@ -225,6 +227,7 @@ void __init cpu_select_mitigations(void) gds_select_mitigation(); its_select_mitigation(); bhi_select_mitigation(); + tsa_select_mitigation(); /* * After mitigations are selected, some may need to update their @@ -272,6 +275,7 @@ void __init cpu_select_mitigations(void) gds_apply_mitigation(); its_apply_mitigation(); bhi_apply_mitigation(); + tsa_apply_mitigation(); } /* @@ -637,7 +641,7 @@ static void __init mmio_apply_mitigation(void) * is required irrespective of SMT state. */ if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) - static_branch_enable(&mds_idle_clear); + static_branch_enable(&cpu_buf_idle_clear); if (mmio_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); @@ -1488,6 +1492,94 @@ static void __init its_apply_mitigation(void) } #undef pr_fmt +#define pr_fmt(fmt) "Transient Scheduler Attacks: " fmt + +enum tsa_mitigations { + TSA_MITIGATION_NONE, + TSA_MITIGATION_AUTO, + TSA_MITIGATION_UCODE_NEEDED, + TSA_MITIGATION_USER_KERNEL, + TSA_MITIGATION_VM, + TSA_MITIGATION_FULL, +}; + +static const char * const tsa_strings[] = { + [TSA_MITIGATION_NONE] = "Vulnerable", + [TSA_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", + [TSA_MITIGATION_USER_KERNEL] = "Mitigation: Clear CPU buffers: user/kernel boundary", + [TSA_MITIGATION_VM] = "Mitigation: Clear CPU buffers: VM", + [TSA_MITIGATION_FULL] = "Mitigation: Clear CPU buffers", +}; + +static enum tsa_mitigations tsa_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_TSA) ? TSA_MITIGATION_AUTO : TSA_MITIGATION_NONE; + +static int __init tsa_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + tsa_mitigation = TSA_MITIGATION_NONE; + else if (!strcmp(str, "on")) + tsa_mitigation = TSA_MITIGATION_FULL; + else if (!strcmp(str, "user")) + tsa_mitigation = TSA_MITIGATION_USER_KERNEL; + else if (!strcmp(str, "vm")) + tsa_mitigation = TSA_MITIGATION_VM; + else + pr_err("Ignoring unknown tsa=%s option.\n", str); + + return 0; +} +early_param("tsa", tsa_parse_cmdline); + +static void __init tsa_select_mitigation(void) +{ + if (cpu_mitigations_off() || !boot_cpu_has_bug(X86_BUG_TSA)) { + tsa_mitigation = TSA_MITIGATION_NONE; + return; + } + + if (tsa_mitigation == TSA_MITIGATION_NONE) + return; + + if (!boot_cpu_has(X86_FEATURE_VERW_CLEAR)) { + tsa_mitigation = TSA_MITIGATION_UCODE_NEEDED; + goto out; + } + + if (tsa_mitigation == TSA_MITIGATION_AUTO) + tsa_mitigation = TSA_MITIGATION_FULL; + + /* + * No need to set verw_clear_cpu_buf_mitigation_selected - it + * doesn't fit all cases here and it is not needed because this + * is the only VERW-based mitigation on AMD. + */ +out: + pr_info("%s\n", tsa_strings[tsa_mitigation]); +} + +static void __init tsa_apply_mitigation(void) +{ + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + break; + case TSA_MITIGATION_VM: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + case TSA_MITIGATION_FULL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + default: + break; + } +} + +#undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = @@ -2249,10 +2341,10 @@ static void update_mds_branch_idle(void) return; if (sched_smt_active()) { - static_branch_enable(&mds_idle_clear); + static_branch_enable(&cpu_buf_idle_clear); } else if (mmio_mitigation == MMIO_MITIGATION_OFF || (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) { - static_branch_disable(&mds_idle_clear); + static_branch_disable(&cpu_buf_idle_clear); } } @@ -2316,6 +2408,25 @@ void cpu_bugs_smt_update(void) break; } + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + case TSA_MITIGATION_VM: + case TSA_MITIGATION_AUTO: + case TSA_MITIGATION_FULL: + /* + * TSA-SQ can potentially lead to info leakage between + * SMT threads. + */ + if (sched_smt_active()) + static_branch_enable(&cpu_buf_idle_clear); + else + static_branch_disable(&cpu_buf_idle_clear); + break; + case TSA_MITIGATION_NONE: + case TSA_MITIGATION_UCODE_NEEDED: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -3265,6 +3376,11 @@ static ssize_t gds_show_state(char *buf) return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]); } +static ssize_t tsa_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -3328,6 +3444,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_ITS: return its_show_state(buf); + case X86_BUG_TSA: + return tsa_show_state(buf); + default: break; } @@ -3414,6 +3533,11 @@ ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_att { return cpu_show_common(dev, attr, buf, X86_BUG_ITS); } + +ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_TSA); +} #endif void __warn_thunk(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8feb8fd2957a..fb50c1dd53ef 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1233,6 +1233,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define ITS BIT(8) /* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */ #define ITS_NATIVE_ONLY BIT(9) +/* CPU is affected by Transient Scheduler Attacks */ +#define TSA BIT(10) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS), @@ -1280,7 +1282,7 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_AMD(0x16, RETBLEED), VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO), VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO), - VULNBL_AMD(0x19, SRSO), + VULNBL_AMD(0x19, SRSO | TSA), VULNBL_AMD(0x1a, SRSO), {} }; @@ -1530,6 +1532,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY); } + if (c->x86_vendor == X86_VENDOR_AMD) { + if (!cpu_has(c, X86_FEATURE_TSA_SQ_NO) || + !cpu_has(c, X86_FEATURE_TSA_L1_NO)) { + if (cpu_matches(cpu_vuln_blacklist, TSA) || + /* Enable bug on Zen guests to allow for live migration. */ + (cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_ZEN))) + setup_force_cpu_bug(X86_BUG_TSA); + } + } + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; @@ -2243,20 +2255,16 @@ EXPORT_PER_CPU_SYMBOL(__stack_chk_guard); #endif #endif -/* - * Clear all 6 debug registers: - */ -static void clear_all_debug_regs(void) +static void initialize_debug_regs(void) { - int i; - - for (i = 0; i < 8; i++) { - /* Ignore db4, db5 */ - if ((i == 4) || (i == 5)) - continue; - - set_debugreg(0, i); - } + /* Control register first -- to make sure everything is disabled. */ + set_debugreg(DR7_FIXED_1, 7); + set_debugreg(DR6_RESERVED, 6); + /* dr5 and dr4 don't exist */ + set_debugreg(0, 3); + set_debugreg(0, 2); + set_debugreg(0, 1); + set_debugreg(0, 0); } #ifdef CONFIG_KGDB @@ -2417,7 +2425,7 @@ void cpu_init(void) load_mm_ldt(&init_mm); - clear_all_debug_regs(); + initialize_debug_regs(); dbg_restore_debug_regs(); doublefault_init_cpu_tss(); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 9d852c3b2cb5..5c4eb28c3ac9 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -350,7 +350,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu) struct thresh_restart { struct threshold_block *b; - int reset; int set_lvt_off; int lvt_off; u16 old_limit; @@ -432,13 +431,13 @@ static void threshold_restart_bank(void *_tr) rdmsr(tr->b->address, lo, hi); - if (tr->b->threshold_limit < (hi & THRESHOLD_MAX)) - tr->reset = 1; /* limit cannot be lower than err count */ - - if (tr->reset) { /* reset err count and overflow bit */ - hi = - (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | - (THRESHOLD_MAX - tr->b->threshold_limit); + /* + * Reset error count and overflow bit. + * This is done during init or after handling an interrupt. + */ + if (hi & MASK_OVERFLOW_HI || tr->set_lvt_off) { + hi &= ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI); + hi |= THRESHOLD_MAX - tr->b->threshold_limit; } else if (tr->old_limit) { /* change limit w/o reset */ int new_count = (hi & THRESHOLD_MAX) + (tr->old_limit - tr->b->threshold_limit); @@ -1113,13 +1112,20 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol } bank_type = smca_get_bank_type(cpu, bank); - if (bank_type >= N_SMCA_BANK_TYPES) - return NULL; if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) { if (b->block < ARRAY_SIZE(smca_umc_block_names)) return smca_umc_block_names[b->block]; - return NULL; + } + + if (b && b->block) { + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_block_%u", b->block); + return buf_mcatype; + } + + if (bank_type >= N_SMCA_BANK_TYPES) { + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_bank_%u", bank); + return buf_mcatype; } if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index e9b3c5d4a52e..4da4eab56c81 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1740,6 +1740,11 @@ static void mc_poll_banks_default(void) void (*mc_poll_banks)(void) = mc_poll_banks_default; +static bool should_enable_timer(unsigned long iv) +{ + return !mca_cfg.ignore_ce && iv; +} + static void mce_timer_fn(struct timer_list *t) { struct timer_list *cpu_t = this_cpu_ptr(&mce_timer); @@ -1763,7 +1768,7 @@ static void mce_timer_fn(struct timer_list *t) if (mce_get_storm_mode()) { __start_timer(t, HZ); - } else { + } else if (should_enable_timer(iv)) { __this_cpu_write(mce_next_interval, iv); __start_timer(t, iv); } @@ -2156,11 +2161,10 @@ static void mce_start_timer(struct timer_list *t) { unsigned long iv = check_interval * HZ; - if (mca_cfg.ignore_ce || !iv) - return; - - this_cpu_write(mce_next_interval, iv); - __start_timer(t, iv); + if (should_enable_timer(iv)) { + this_cpu_write(mce_next_interval, iv); + __start_timer(t, iv); + } } static void __mcheck_cpu_setup_timer(void) @@ -2801,15 +2805,9 @@ static int mce_cpu_dead(unsigned int cpu) static int mce_cpu_online(unsigned int cpu) { struct timer_list *t = this_cpu_ptr(&mce_timer); - int ret; mce_device_create(cpu); - - ret = mce_threshold_create_device(cpu); - if (ret) { - mce_device_remove(cpu); - return ret; - } + mce_threshold_create_device(cpu); mce_reenable_cpu(); mce_start_timer(t); return 0; diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index efcf21e9552e..9b149b9c4109 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -478,6 +478,7 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c) void mce_intel_feature_clear(struct cpuinfo_x86 *c) { intel_clear_lmce(); + cmci_clear(); } bool intel_filter_mce(struct mce *m) diff --git a/arch/x86/kernel/cpu/microcode/amd_shas.c b/arch/x86/kernel/cpu/microcode/amd_shas.c index 2a1655b1fdd8..1fd349cfc802 100644 --- a/arch/x86/kernel/cpu/microcode/amd_shas.c +++ b/arch/x86/kernel/cpu/microcode/amd_shas.c @@ -231,6 +231,13 @@ static const struct patch_digest phashes[] = { 0x0d,0x5b,0x65,0x34,0x69,0xb2,0x62,0x21, } }, + { 0xa0011d7, { + 0x35,0x07,0xcd,0x40,0x94,0xbc,0x81,0x6b, + 0xfc,0x61,0x56,0x1a,0xe2,0xdb,0x96,0x12, + 0x1c,0x1c,0x31,0xb1,0x02,0x6f,0xe5,0xd2, + 0xfe,0x1b,0x04,0x03,0x2c,0x8f,0x4c,0x36, + } + }, { 0xa001223, { 0xfb,0x32,0x5f,0xc6,0x83,0x4f,0x8c,0xb8, 0xa4,0x05,0xf9,0x71,0x53,0x01,0x16,0xc4, @@ -294,6 +301,13 @@ static const struct patch_digest phashes[] = { 0xc0,0xcd,0x33,0xf2,0x8d,0xf9,0xef,0x59, } }, + { 0xa00123b, { + 0xef,0xa1,0x1e,0x71,0xf1,0xc3,0x2c,0xe2, + 0xc3,0xef,0x69,0x41,0x7a,0x54,0xca,0xc3, + 0x8f,0x62,0x84,0xee,0xc2,0x39,0xd9,0x28, + 0x95,0xa7,0x12,0x49,0x1e,0x30,0x71,0x72, + } + }, { 0xa00820c, { 0xa8,0x0c,0x81,0xc0,0xa6,0x00,0xe7,0xf3, 0x5f,0x65,0xd3,0xb9,0x6f,0xea,0x93,0x63, @@ -301,6 +315,13 @@ static const struct patch_digest phashes[] = { 0xe1,0x3b,0x8d,0xb2,0xf8,0x22,0x03,0xe2, } }, + { 0xa00820d, { + 0xf9,0x2a,0xc0,0xf4,0x9e,0xa4,0x87,0xa4, + 0x7d,0x87,0x00,0xfd,0xab,0xda,0x19,0xca, + 0x26,0x51,0x32,0xc1,0x57,0x91,0xdf,0xc1, + 0x05,0xeb,0x01,0x7c,0x5a,0x95,0x21,0xb7, + } + }, { 0xa10113e, { 0x05,0x3c,0x66,0xd7,0xa9,0x5a,0x33,0x10, 0x1b,0xf8,0x9c,0x8f,0xed,0xfc,0xa7,0xa0, @@ -322,6 +343,13 @@ static const struct patch_digest phashes[] = { 0xf1,0x5e,0xb0,0xde,0xb4,0x98,0xae,0xc4, } }, + { 0xa10114c, { + 0x9e,0xb6,0xa2,0xd9,0x87,0x38,0xc5,0x64, + 0xd8,0x88,0xfa,0x78,0x98,0xf9,0x6f,0x74, + 0x39,0x90,0x1b,0xa5,0xcf,0x5e,0xb4,0x2a, + 0x02,0xff,0xd4,0x8c,0x71,0x8b,0xe2,0xc0, + } + }, { 0xa10123e, { 0x03,0xb9,0x2c,0x76,0x48,0x93,0xc9,0x18, 0xfb,0x56,0xfd,0xf7,0xe2,0x1d,0xca,0x4d, @@ -343,6 +371,13 @@ static const struct patch_digest phashes[] = { 0x1b,0x7d,0x64,0x9d,0x4b,0x53,0x13,0x75, } }, + { 0xa10124c, { + 0x29,0xea,0xf1,0x2c,0xb2,0xe4,0xef,0x90, + 0xa4,0xcd,0x1d,0x86,0x97,0x17,0x61,0x46, + 0xfc,0x22,0xcb,0x57,0x75,0x19,0xc8,0xcc, + 0x0c,0xf5,0xbc,0xac,0x81,0x9d,0x9a,0xd2, + } + }, { 0xa108108, { 0xed,0xc2,0xec,0xa1,0x15,0xc6,0x65,0xe9, 0xd0,0xef,0x39,0xaa,0x7f,0x55,0x06,0xc6, @@ -350,6 +385,13 @@ static const struct patch_digest phashes[] = { 0x28,0x1e,0x9c,0x59,0x69,0x99,0x4d,0x16, } }, + { 0xa108109, { + 0x85,0xb4,0xbd,0x7c,0x49,0xa7,0xbd,0xfa, + 0x49,0x36,0x80,0x81,0xc5,0xb7,0x39,0x1b, + 0x9a,0xaa,0x50,0xde,0x9b,0xe9,0x32,0x35, + 0x42,0x7e,0x51,0x4f,0x52,0x2c,0x28,0x59, + } + }, { 0xa20102d, { 0xf9,0x6e,0xf2,0x32,0xd3,0x0f,0x5f,0x11, 0x59,0xa1,0xfe,0xcc,0xcd,0x9b,0x42,0x89, @@ -357,6 +399,13 @@ static const struct patch_digest phashes[] = { 0x8c,0xe9,0x19,0x3e,0xcc,0x3f,0x7b,0xb4, } }, + { 0xa20102e, { + 0xbe,0x1f,0x32,0x04,0x0d,0x3c,0x9c,0xdd, + 0xe1,0xa4,0xbf,0x76,0x3a,0xec,0xc2,0xf6, + 0x11,0x00,0xa7,0xaf,0x0f,0xe5,0x02,0xc5, + 0x54,0x3a,0x1f,0x8c,0x16,0xb5,0xff,0xbe, + } + }, { 0xa201210, { 0xe8,0x6d,0x51,0x6a,0x8e,0x72,0xf3,0xfe, 0x6e,0x16,0xbc,0x62,0x59,0x40,0x17,0xe9, @@ -364,6 +413,13 @@ static const struct patch_digest phashes[] = { 0xf7,0x55,0xf0,0x13,0xbb,0x22,0xf6,0x41, } }, + { 0xa201211, { + 0x69,0xa1,0x17,0xec,0xd0,0xf6,0x6c,0x95, + 0xe2,0x1e,0xc5,0x59,0x1a,0x52,0x0a,0x27, + 0xc4,0xed,0xd5,0x59,0x1f,0xbf,0x00,0xff, + 0x08,0x88,0xb5,0xe1,0x12,0xb6,0xcc,0x27, + } + }, { 0xa404107, { 0xbb,0x04,0x4e,0x47,0xdd,0x5e,0x26,0x45, 0x1a,0xc9,0x56,0x24,0xa4,0x4c,0x82,0xb0, @@ -371,6 +427,13 @@ static const struct patch_digest phashes[] = { 0x13,0xbc,0xc5,0x25,0xe4,0xc5,0xc3,0x99, } }, + { 0xa404108, { + 0x69,0x67,0x43,0x06,0xf8,0x0c,0x62,0xdc, + 0xa4,0x21,0x30,0x4f,0x0f,0x21,0x2c,0xcb, + 0xcc,0x37,0xf1,0x1c,0xc3,0xf8,0x2f,0x19, + 0xdf,0x53,0x53,0x46,0xb1,0x15,0xea,0x00, + } + }, { 0xa500011, { 0x23,0x3d,0x70,0x7d,0x03,0xc3,0xc4,0xf4, 0x2b,0x82,0xc6,0x05,0xda,0x80,0x0a,0xf1, @@ -378,6 +441,13 @@ static const struct patch_digest phashes[] = { 0x11,0x5e,0x96,0x7e,0x71,0xe9,0xfc,0x74, } }, + { 0xa500012, { + 0xeb,0x74,0x0d,0x47,0xa1,0x8e,0x09,0xe4, + 0x93,0x4c,0xad,0x03,0x32,0x4c,0x38,0x16, + 0x10,0x39,0xdd,0x06,0xaa,0xce,0xd6,0x0f, + 0x62,0x83,0x9d,0x8e,0x64,0x55,0xbe,0x63, + } + }, { 0xa601209, { 0x66,0x48,0xd4,0x09,0x05,0xcb,0x29,0x32, 0x66,0xb7,0x9a,0x76,0xcd,0x11,0xf3,0x30, @@ -385,6 +455,13 @@ static const struct patch_digest phashes[] = { 0xe8,0x73,0xe2,0xd6,0xdb,0xd2,0x77,0x1d, } }, + { 0xa60120a, { + 0x0c,0x8b,0x3d,0xfd,0x52,0x52,0x85,0x7d, + 0x20,0x3a,0xe1,0x7e,0xa4,0x21,0x3b,0x7b, + 0x17,0x86,0xae,0xac,0x13,0xb8,0x63,0x9d, + 0x06,0x01,0xd0,0xa0,0x51,0x9a,0x91,0x2c, + } + }, { 0xa704107, { 0xf3,0xc6,0x58,0x26,0xee,0xac,0x3f,0xd6, 0xce,0xa1,0x72,0x47,0x3b,0xba,0x2b,0x93, @@ -392,6 +469,13 @@ static const struct patch_digest phashes[] = { 0x64,0x39,0x71,0x8c,0xce,0xe7,0x41,0x39, } }, + { 0xa704108, { + 0xd7,0x55,0x15,0x2b,0xfe,0xc4,0xbc,0x93, + 0xec,0x91,0xa0,0xae,0x45,0xb7,0xc3,0x98, + 0x4e,0xff,0x61,0x77,0x88,0xc2,0x70,0x49, + 0xe0,0x3a,0x1d,0x84,0x38,0x52,0xbf,0x5a, + } + }, { 0xa705206, { 0x8d,0xc0,0x76,0xbd,0x58,0x9f,0x8f,0xa4, 0x12,0x9d,0x21,0xfb,0x48,0x21,0xbc,0xe7, @@ -399,6 +483,13 @@ static const struct patch_digest phashes[] = { 0x03,0x35,0xe9,0xbe,0xfb,0x06,0xdf,0xfc, } }, + { 0xa705208, { + 0x30,0x1d,0x55,0x24,0xbc,0x6b,0x5a,0x19, + 0x0c,0x7d,0x1d,0x74,0xaa,0xd1,0xeb,0xd2, + 0x16,0x62,0xf7,0x5b,0xe1,0x1f,0x18,0x11, + 0x5c,0xf0,0x94,0x90,0x26,0xec,0x69,0xff, + } + }, { 0xa708007, { 0x6b,0x76,0xcc,0x78,0xc5,0x8a,0xa3,0xe3, 0x32,0x2d,0x79,0xe4,0xc3,0x80,0xdb,0xb2, @@ -406,6 +497,13 @@ static const struct patch_digest phashes[] = { 0xdf,0x92,0x73,0x84,0x87,0x3c,0x73,0x93, } }, + { 0xa708008, { + 0x08,0x6e,0xf0,0x22,0x4b,0x8e,0xc4,0x46, + 0x58,0x34,0xe6,0x47,0xa2,0x28,0xfd,0xab, + 0x22,0x3d,0xdd,0xd8,0x52,0x9e,0x1d,0x16, + 0xfa,0x01,0x68,0x14,0x79,0x3e,0xe8,0x6b, + } + }, { 0xa70c005, { 0x88,0x5d,0xfb,0x79,0x64,0xd8,0x46,0x3b, 0x4a,0x83,0x8e,0x77,0x7e,0xcf,0xb3,0x0f, @@ -413,6 +511,13 @@ static const struct patch_digest phashes[] = { 0xee,0x49,0xac,0xe1,0x8b,0x13,0xc5,0x13, } }, + { 0xa70c008, { + 0x0f,0xdb,0x37,0xa1,0x10,0xaf,0xd4,0x21, + 0x94,0x0d,0xa4,0xa2,0xe9,0x86,0x6c,0x0e, + 0x85,0x7c,0x36,0x30,0xa3,0x3a,0x78,0x66, + 0x18,0x10,0x60,0x0d,0x78,0x3d,0x44,0xd0, + } + }, { 0xaa00116, { 0xe8,0x4c,0x2c,0x88,0xa1,0xac,0x24,0x63, 0x65,0xe5,0xaa,0x2d,0x16,0xa9,0xc3,0xf5, @@ -441,4 +546,11 @@ static const struct patch_digest phashes[] = { 0x68,0x2f,0x46,0xee,0xfe,0xc6,0x6d,0xef, } }, + { 0xaa00216, { + 0x79,0xfb,0x5b,0x9f,0xb6,0xe6,0xa8,0xf5, + 0x4e,0x7c,0x4f,0x8e,0x1d,0xad,0xd0,0x08, + 0xc2,0x43,0x7c,0x8b,0xe6,0xdb,0xd0,0xd2, + 0xe8,0x39,0x26,0xc1,0xe5,0x5a,0x48,0xf1, + } + }, }; diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 7109cbfcad4f..187d527ef73b 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -498,6 +498,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) struct rdt_hw_mon_domain *hw_dom; struct rdt_domain_hdr *hdr; struct rdt_mon_domain *d; + struct cacheinfo *ci; int err; lockdep_assert_held(&domain_list_lock); @@ -525,12 +526,13 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) d = &hw_dom->d_resctrl; d->hdr.id = id; d->hdr.type = RESCTRL_MON_DOMAIN; - d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); - if (!d->ci) { + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci) { pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name); mon_domain_free(hw_dom); return; } + d->ci_id = ci->id; cpumask_set_cpu(cpu, &d->hdr.cpu_mask); arch_mon_domain_online(r, d); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index dbf6d71bdf18..b4a1f6732a3a 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -50,6 +50,8 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, + { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, + { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 }, { X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 6290dd120f5e..ff40f09ad911 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -33,8 +33,9 @@ void io_bitmap_share(struct task_struct *tsk) set_tsk_thread_flag(tsk, TIF_IO_BITMAP); } -static void task_update_io_bitmap(struct task_struct *tsk) +static void task_update_io_bitmap(void) { + struct task_struct *tsk = current; struct thread_struct *t = &tsk->thread; if (t->iopl_emul == 3 || t->io_bitmap) { @@ -54,7 +55,12 @@ void io_bitmap_exit(struct task_struct *tsk) struct io_bitmap *iobm = tsk->thread.io_bitmap; tsk->thread.io_bitmap = NULL; - task_update_io_bitmap(tsk); + /* + * Don't touch the TSS when invoked on a failed fork(). TSS + * reflects the state of @current and not the state of @tsk. + */ + if (tsk == current) + task_update_io_bitmap(); if (iobm && refcount_dec_and_test(&iobm->refcnt)) kfree(iobm); } @@ -192,8 +198,7 @@ SYSCALL_DEFINE1(iopl, unsigned int, level) } t->iopl_emul = level; - task_update_io_bitmap(current); - + task_update_io_bitmap(); return 0; } diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 102641fd2172..8b1a9733d13e 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -385,7 +385,7 @@ static void kgdb_disable_hw_debug(struct pt_regs *regs) struct perf_event *bp; /* Disable hardware debugging while we are in kgdb: */ - set_debugreg(0UL, 7); + set_debugreg(DR7_FIXED_1, 7); for (i = 0; i < HBP_NUM; i++) { if (!breakinfo[i].enabled) continue; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c1d2dac72b9c..a838be04f874 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -176,6 +176,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) frame->ret_addr = (unsigned long) ret_from_fork_asm; p->thread.sp = (unsigned long) fork_frame; p->thread.io_bitmap = NULL; + clear_tsk_thread_flag(p, TIF_IO_BITMAP); p->thread.iopl_warn = 0; memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); @@ -464,6 +465,11 @@ void native_tss_update_io_bitmap(void) } else { struct io_bitmap *iobm = t->io_bitmap; + if (WARN_ON_ONCE(!iobm)) { + clear_thread_flag(TIF_IO_BITMAP); + native_tss_invalidate_io_bitmap(); + } + /* * Only copy bitmap data when the sequence number differs. The * update time is accounted to the incoming task. @@ -901,16 +907,24 @@ static __init bool prefer_mwait_c1_over_halt(void) */ static __cpuidle void mwait_idle(void) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (!current_set_polling_and_test()) { const void *addr = ¤t_thread_info()->flags; alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); __monitor(addr, 0, 0); - if (!need_resched()) { - __sti_mwait(0, 0); - raw_local_irq_disable(); - } + if (need_resched()) + goto out; + + __sti_mwait(0, 0); + raw_local_irq_disable(); } + +out: __current_clr_polling(); } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a10e180cbf23..3ef15c2f152f 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -93,7 +93,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, /* Only print out debug registers if they are in their non-default state. */ if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) && - (d6 == DR6_RESERVED) && (d7 == 0x400)) + (d6 == DR6_RESERVED) && (d7 == DR7_FIXED_1)) return; printk("%sDR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 8d6cf25127aa..b972bf72fb8b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -133,7 +133,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, /* Only print out debug registers if they are in their non-default state. */ if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) && - (d6 == DR6_RESERVED) && (d7 == 0x400))) { + (d6 == DR6_RESERVED) && (d7 == DR7_FIXED_1))) { printk("%sDR0: %016lx DR1: %016lx DR2: %016lx\n", log_lvl, d0, d1, d2); printk("%sDR3: %016lx DR6: %016lx DR7: %016lx\n", diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 98123ff10506..42bbc42bd350 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -152,6 +152,8 @@ SYSCALL32_DEFINE0(sigreturn) struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); sigset_t set; + prevent_single_step_upon_eretu(regs); + if (!access_ok(frame, sizeof(*frame))) goto badframe; if (__get_user(set.sig[0], &frame->sc.oldmask) @@ -175,6 +177,8 @@ SYSCALL32_DEFINE0(rt_sigreturn) struct rt_sigframe_ia32 __user *frame; sigset_t set; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4); if (!access_ok(frame, sizeof(*frame))) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index ee9453891901..d483b585c6c6 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -250,6 +250,8 @@ SYSCALL_DEFINE0(rt_sigreturn) sigset_t set; unsigned long uc_flags; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); if (!access_ok(frame, sizeof(*frame))) goto badframe; @@ -366,6 +368,8 @@ COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn) sigset_t set; unsigned long uc_flags; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); if (!access_ok(frame, sizeof(*frame))) diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 18266cc3d98c..b014e6d229f9 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -299,3 +299,27 @@ struct smp_ops smp_ops = { .send_call_func_single_ipi = native_send_call_func_single_ipi, }; EXPORT_SYMBOL_GPL(smp_ops); + +int arch_cpu_rescan_dead_smt_siblings(void) +{ + enum cpuhp_smt_control old = cpu_smt_control; + int ret; + + /* + * If SMT has been disabled and SMT siblings are in HLT, bring them back + * online and offline them again so that they end up in MWAIT proper. + * + * Called with hotplug enabled. + */ + if (old != CPU_SMT_DISABLED && old != CPU_SMT_FORCE_DISABLED) + return 0; + + ret = cpuhp_smt_enable(); + if (ret) + return ret; + + ret = cpuhp_smt_disable(old); + + return ret; +} +EXPORT_SYMBOL_GPL(arch_cpu_rescan_dead_smt_siblings); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fc78c2325fd2..58ede3fa6a75 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1244,6 +1244,10 @@ void play_dead_common(void) local_irq_disable(); } +/* + * We need to flush the caches before going to sleep, lest we have + * dirty data in our caches when we come back up. + */ void __noreturn mwait_play_dead(unsigned int eax_hint) { struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead); @@ -1290,50 +1294,6 @@ void __noreturn mwait_play_dead(unsigned int eax_hint) } /* - * We need to flush the caches before going to sleep, lest we have - * dirty data in our caches when we come back up. - */ -static inline void mwait_play_dead_cpuid_hint(void) -{ - unsigned int eax, ebx, ecx, edx; - unsigned int highest_cstate = 0; - unsigned int highest_subcstate = 0; - int i; - - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) - return; - if (!this_cpu_has(X86_FEATURE_MWAIT)) - return; - if (!this_cpu_has(X86_FEATURE_CLFLUSH)) - return; - - eax = CPUID_LEAF_MWAIT; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - - /* - * eax will be 0 if EDX enumeration is not valid. - * Initialized below to cstate, sub_cstate value when EDX is valid. - */ - if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { - eax = 0; - } else { - edx >>= MWAIT_SUBSTATE_SIZE; - for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { - if (edx & MWAIT_SUBSTATE_MASK) { - highest_cstate = i; - highest_subcstate = edx & MWAIT_SUBSTATE_MASK; - } - } - eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | - (highest_subcstate - 1); - } - - mwait_play_dead(eax); -} - -/* * Kick all "offline" CPUs out of mwait on kexec(). See comment in * mwait_play_dead(). */ @@ -1383,9 +1343,9 @@ void native_play_dead(void) play_dead_common(); tboot_shutdown(TB_SHUTDOWN_WFS); - mwait_play_dead_cpuid_hint(); - if (cpuidle_play_dead()) - hlt_play_dead(); + /* Below returns only on error. */ + cpuidle_play_dead(); + hlt_play_dead(); } #else /* ... !CONFIG_HOTPLUG_CPU */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c5c897a86418..36354b470590 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -1022,24 +1022,32 @@ static bool is_sysenter_singlestep(struct pt_regs *regs) #endif } -static __always_inline unsigned long debug_read_clear_dr6(void) +static __always_inline unsigned long debug_read_reset_dr6(void) { unsigned long dr6; + get_debugreg(dr6, 6); + dr6 ^= DR6_RESERVED; /* Flip to positive polarity */ + /* * The Intel SDM says: * - * Certain debug exceptions may clear bits 0-3. The remaining - * contents of the DR6 register are never cleared by the - * processor. To avoid confusion in identifying debug - * exceptions, debug handlers should clear the register before - * returning to the interrupted task. + * Certain debug exceptions may clear bits 0-3 of DR6. + * + * BLD induced #DB clears DR6.BLD and any other debug + * exception doesn't modify DR6.BLD. * - * Keep it simple: clear DR6 immediately. + * RTM induced #DB clears DR6.RTM and any other debug + * exception sets DR6.RTM. + * + * To avoid confusion in identifying debug exceptions, + * debug handlers should set DR6.BLD and DR6.RTM, and + * clear other DR6 bits before returning. + * + * Keep it simple: write DR6 with its architectural reset + * value 0xFFFF0FF0, defined as DR6_RESERVED, immediately. */ - get_debugreg(dr6, 6); set_debugreg(DR6_RESERVED, 6); - dr6 ^= DR6_RESERVED; /* Flip to positive polarity */ return dr6; } @@ -1239,13 +1247,13 @@ out: /* IST stack entry */ DEFINE_IDTENTRY_DEBUG(exc_debug) { - exc_debug_kernel(regs, debug_read_clear_dr6()); + exc_debug_kernel(regs, debug_read_reset_dr6()); } /* User entry, runs on regular task stack */ DEFINE_IDTENTRY_DEBUG_USER(exc_debug) { - exc_debug_user(regs, debug_read_clear_dr6()); + exc_debug_user(regs, debug_read_reset_dr6()); } #ifdef CONFIG_X86_FRED @@ -1264,7 +1272,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug) { /* * FRED #DB stores DR6 on the stack in the format which - * debug_read_clear_dr6() returns for the IDT entry points. + * debug_read_reset_dr6() returns for the IDT entry points. */ unsigned long dr6 = fred_event_data(regs); @@ -1279,7 +1287,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug) /* 32 bit does not have separate entry points. */ DEFINE_IDTENTRY_RAW(exc_debug) { - unsigned long dr6 = debug_read_clear_dr6(); + unsigned long dr6 = debug_read_reset_dr6(); if (user_mode(regs)) exc_debug_user(regs, dr6); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b2d006756e02..f84bc0569c9c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1165,6 +1165,8 @@ void kvm_set_cpu_caps(void) */ SYNTHESIZED_F(LFENCE_RDTSC), /* SmmPgCfgLock */ + /* 4: Resv */ + SYNTHESIZED_F(VERW_CLEAR), F(NULL_SEL_CLR_BASE), /* UpperAddressIgnore */ F(AUTOIBRS), @@ -1179,6 +1181,11 @@ void kvm_set_cpu_caps(void) F(SRSO_USER_KERNEL_NO), ); + kvm_cpu_cap_init(CPUID_8000_0021_ECX, + SYNTHESIZED_F(TSA_SQ_NO), + SYNTHESIZED_F(TSA_L1_NO), + ); + kvm_cpu_cap_init(CPUID_8000_0022_EAX, F(PERFMON_V2), ); @@ -1748,8 +1755,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; case 0x80000021: - entry->ebx = entry->ecx = entry->edx = 0; + entry->ebx = entry->edx = 0; cpuid_entry_override(entry, CPUID_8000_0021_EAX); + cpuid_entry_override(entry, CPUID_8000_0021_ECX); break; /* AMD Extended Performance Monitoring and Debug */ case 0x80000022: { diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 24f0318c50d7..ee27064dd72f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1979,6 +1979,9 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY) goto out_flush_all; + if (is_noncanonical_invlpg_address(entries[i], vcpu)) + continue; + /* * Lower 12 bits of 'address' encode the number of additional * pages to flush. @@ -2001,11 +2004,11 @@ out_flush_all: static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) { struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + unsigned long *vcpu_mask = hv_vcpu->vcpu_mask; u64 *sparse_banks = hv_vcpu->sparse_banks; struct kvm *kvm = vcpu->kvm; struct hv_tlb_flush_ex flush_ex; struct hv_tlb_flush flush; - DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; /* * Normally, there can be no more than 'KVM_HV_TLB_FLUSH_FIFO_SIZE' diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index cbc84c6abc2e..4e06e2e89a8f 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4896,12 +4896,16 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, { u64 error_code = PFERR_GUEST_FINAL_MASK; u8 level = PG_LEVEL_4K; + u64 direct_bits; u64 end; int r; if (!vcpu->kvm->arch.pre_fault_allowed) return -EOPNOTSUPP; + if (kvm_is_gfn_alias(vcpu->kvm, gpa_to_gfn(range->gpa))) + return -EINVAL; + /* * reload is efficient when called repeatedly, so we can do it on * every iteration. @@ -4910,15 +4914,18 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, if (r) return r; + direct_bits = 0; if (kvm_arch_has_private_mem(vcpu->kvm) && kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(range->gpa))) error_code |= PFERR_PRIVATE_ACCESS; + else + direct_bits = gfn_to_gpa(kvm_gfn_direct_bits(vcpu->kvm)); /* * Shadow paging uses GVA for kvm page fault, so restrict to * two-dimensional paging. */ - r = kvm_tdp_map_page(vcpu, range->gpa, error_code, &level); + r = kvm_tdp_map_page(vcpu, range->gpa | direct_bits, error_code, &level); if (r < 0) return r; diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index fde0ae986003..c53b92379e6e 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -52,6 +52,10 @@ /* CPUID level 0x80000022 (EAX) */ #define KVM_X86_FEATURE_PERFMON_V2 KVM_X86_FEATURE(CPUID_8000_0022_EAX, 0) +/* CPUID level 0x80000021 (ECX) */ +#define KVM_X86_FEATURE_TSA_SQ_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 1) +#define KVM_X86_FEATURE_TSA_L1_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 2) + struct cpuid_reg { u32 function; u32 index; @@ -82,6 +86,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX}, [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, [CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX}, + [CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX}, }; /* @@ -121,6 +126,8 @@ static __always_inline u32 __feature_translate(int x86_feature) KVM_X86_TRANSLATE_FEATURE(PERFMON_V2); KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL); KVM_X86_TRANSLATE_FEATURE(BHI_CTRL); + KVM_X86_TRANSLATE_FEATURE(TSA_SQ_NO); + KVM_X86_TRANSLATE_FEATURE(TSA_L1_NO); default: return x86_feature; } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 5a69b657dae9..b201f77fcd49 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1971,6 +1971,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src) struct kvm_vcpu *src_vcpu; unsigned long i; + if (src->created_vcpus != atomic_read(&src->online_vcpus) || + dst->created_vcpus != atomic_read(&dst->online_vcpus)) + return -EBUSY; + if (!sev_es_guest(src)) return 0; @@ -2871,6 +2875,33 @@ void __init sev_set_cpu_caps(void) } } +static bool is_sev_snp_initialized(void) +{ + struct sev_user_data_snp_status *status; + struct sev_data_snp_addr buf; + bool initialized = false; + int ret, error = 0; + + status = snp_alloc_firmware_page(GFP_KERNEL | __GFP_ZERO); + if (!status) + return false; + + buf.address = __psp_pa(status); + ret = sev_do_cmd(SEV_CMD_SNP_PLATFORM_STATUS, &buf, &error); + if (ret) { + pr_err("SEV: SNP_PLATFORM_STATUS failed ret=%d, fw_error=%d (%#x)\n", + ret, error, error); + goto out; + } + + initialized = !!status->state; + +out: + snp_free_firmware_page(status); + + return initialized; +} + void __init sev_hardware_setup(void) { unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count; @@ -2975,6 +3006,14 @@ void __init sev_hardware_setup(void) sev_snp_supported = sev_snp_enabled && cc_platform_has(CC_ATTR_HOST_SEV_SNP); out: + if (sev_enabled) { + init_args.probe = true; + if (sev_platform_init(&init_args)) + sev_supported = sev_es_supported = sev_snp_supported = false; + else if (sev_snp_supported) + sev_snp_supported = is_sev_snp_initialized(); + } + if (boot_cpu_has(X86_FEATURE_SEV)) pr_info("SEV %s (ASIDs %u - %u)\n", sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" : @@ -3001,15 +3040,6 @@ out: sev_supported_vmsa_features = 0; if (sev_es_debug_swap_enabled) sev_supported_vmsa_features |= SVM_SEV_FEAT_DEBUG_SWAP; - - if (!sev_enabled) - return; - - /* - * Do both SNP and SEV initialization at KVM module load. - */ - init_args.probe = true; - sev_platform_init(&init_args); } void sev_hardware_unsetup(void) @@ -4419,8 +4449,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) * the VMSA will be NULL if this vCPU is the destination for intrahost * migration, and will be copied later. */ - if (svm->sev_es.vmsa && !svm->sev_es.snp_has_guest_vmsa) - svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); + if (!svm->sev_es.snp_has_guest_vmsa) { + if (svm->sev_es.vmsa) + svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); + else + svm->vmcb->control.vmsa_pa = INVALID_PAGE; + } if (cpu_feature_enabled(X86_FEATURE_ALLOWED_SEV_FEATURES)) svm->vmcb->control.allowed_sev_features = sev->vmsa_features | diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 0c61153b275f..235c4af6b692 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -169,6 +169,9 @@ SYM_FUNC_START(__svm_vcpu_run) #endif mov VCPU_RDI(%_ASM_DI), %_ASM_DI + /* Clobbers EFLAGS.ZF */ + VM_CLEAR_CPU_BUFFERS + /* Enter guest mode */ 3: vmrun %_ASM_AX 4: @@ -335,6 +338,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) mov SVM_current_vmcb(%rdi), %rax mov KVM_VMCB_pa(%rax), %rax + /* Clobbers EFLAGS.ZF */ + VM_CLEAR_CPU_BUFFERS + /* Enter guest mode */ 1: vmrun %rax 2: diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index b952bc673271..ec79aacc446f 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -173,6 +173,8 @@ static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char i tdx_clear_unsupported_cpuid(entry); } +#define TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT BIT(1) + static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf, struct kvm_tdx_capabilities *caps) { @@ -188,6 +190,9 @@ static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf, caps->cpuid.nent = td_conf->num_cpuid_config; + caps->user_tdvmcallinfo_1_r11 = + TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT; + for (i = 0; i < td_conf->num_cpuid_config; i++) td_init_cpuid_entry2(&caps->cpuid.entries[i], i); @@ -1212,11 +1217,13 @@ static int tdx_map_gpa(struct kvm_vcpu *vcpu) /* * Converting TDVMCALL_MAP_GPA to KVM_HC_MAP_GPA_RANGE requires * userspace to enable KVM_CAP_EXIT_HYPERCALL with KVM_HC_MAP_GPA_RANGE - * bit set. If not, the error code is not defined in GHCI for TDX, use - * TDVMCALL_STATUS_INVALID_OPERAND for this case. + * bit set. This is a base call so it should always be supported, but + * KVM has no way to ensure that userspace implements the GHCI correctly. + * So if KVM_HC_MAP_GPA_RANGE does not cause a VMEXIT, return an error + * to the guest. */ if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE)) { - ret = TDVMCALL_STATUS_INVALID_OPERAND; + ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED; goto error; } @@ -1449,20 +1456,106 @@ error: return 1; } +static int tdx_complete_get_td_vm_call_info(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + + tdvmcall_set_return_code(vcpu, vcpu->run->tdx.get_tdvmcall_info.ret); + + /* + * For now, there is no TDVMCALL beyond GHCI base API supported by KVM + * directly without the support from userspace, just set the value + * returned from userspace. + */ + tdx->vp_enter_args.r11 = vcpu->run->tdx.get_tdvmcall_info.r11; + tdx->vp_enter_args.r12 = vcpu->run->tdx.get_tdvmcall_info.r12; + tdx->vp_enter_args.r13 = vcpu->run->tdx.get_tdvmcall_info.r13; + tdx->vp_enter_args.r14 = vcpu->run->tdx.get_tdvmcall_info.r14; + + return 1; +} + static int tdx_get_td_vm_call_info(struct kvm_vcpu *vcpu) { struct vcpu_tdx *tdx = to_tdx(vcpu); - if (tdx->vp_enter_args.r12) - tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); - else { + switch (tdx->vp_enter_args.r12) { + case 0: tdx->vp_enter_args.r11 = 0; + tdx->vp_enter_args.r12 = 0; tdx->vp_enter_args.r13 = 0; tdx->vp_enter_args.r14 = 0; + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUCCESS); + return 1; + case 1: + vcpu->run->tdx.get_tdvmcall_info.leaf = tdx->vp_enter_args.r12; + vcpu->run->exit_reason = KVM_EXIT_TDX; + vcpu->run->tdx.flags = 0; + vcpu->run->tdx.nr = TDVMCALL_GET_TD_VM_CALL_INFO; + vcpu->run->tdx.get_tdvmcall_info.ret = TDVMCALL_STATUS_SUCCESS; + vcpu->run->tdx.get_tdvmcall_info.r11 = 0; + vcpu->run->tdx.get_tdvmcall_info.r12 = 0; + vcpu->run->tdx.get_tdvmcall_info.r13 = 0; + vcpu->run->tdx.get_tdvmcall_info.r14 = 0; + vcpu->arch.complete_userspace_io = tdx_complete_get_td_vm_call_info; + return 0; + default: + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); + return 1; } +} + +static int tdx_complete_simple(struct kvm_vcpu *vcpu) +{ + tdvmcall_set_return_code(vcpu, vcpu->run->tdx.unknown.ret); return 1; } +static int tdx_get_quote(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + u64 gpa = tdx->vp_enter_args.r12; + u64 size = tdx->vp_enter_args.r13; + + /* The gpa of buffer must have shared bit set. */ + if (vt_is_tdx_private_gpa(vcpu->kvm, gpa)) { + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); + return 1; + } + + vcpu->run->exit_reason = KVM_EXIT_TDX; + vcpu->run->tdx.flags = 0; + vcpu->run->tdx.nr = TDVMCALL_GET_QUOTE; + vcpu->run->tdx.get_quote.ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED; + vcpu->run->tdx.get_quote.gpa = gpa & ~gfn_to_gpa(kvm_gfn_direct_bits(tdx->vcpu.kvm)); + vcpu->run->tdx.get_quote.size = size; + + vcpu->arch.complete_userspace_io = tdx_complete_simple; + + return 0; +} + +static int tdx_setup_event_notify_interrupt(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + u64 vector = tdx->vp_enter_args.r12; + + if (vector < 32 || vector > 255) { + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); + return 1; + } + + vcpu->run->exit_reason = KVM_EXIT_TDX; + vcpu->run->tdx.flags = 0; + vcpu->run->tdx.nr = TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT; + vcpu->run->tdx.setup_event_notify.ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED; + vcpu->run->tdx.setup_event_notify.vector = vector; + + vcpu->arch.complete_userspace_io = tdx_complete_simple; + + return 0; +} + static int handle_tdvmcall(struct kvm_vcpu *vcpu) { switch (tdvmcall_leaf(vcpu)) { @@ -1472,11 +1565,15 @@ static int handle_tdvmcall(struct kvm_vcpu *vcpu) return tdx_report_fatal_error(vcpu); case TDVMCALL_GET_TD_VM_CALL_INFO: return tdx_get_td_vm_call_info(vcpu); + case TDVMCALL_GET_QUOTE: + return tdx_get_quote(vcpu); + case TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: + return tdx_setup_event_notify_interrupt(vcpu); default: break; } - tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED); return 1; } @@ -2172,25 +2269,26 @@ static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd) const struct tdx_sys_info_td_conf *td_conf = &tdx_sysinfo->td_conf; struct kvm_tdx_capabilities __user *user_caps; struct kvm_tdx_capabilities *caps = NULL; + u32 nr_user_entries; int ret = 0; /* flags is reserved for future use */ if (cmd->flags) return -EINVAL; - caps = kmalloc(sizeof(*caps) + + caps = kzalloc(sizeof(*caps) + sizeof(struct kvm_cpuid_entry2) * td_conf->num_cpuid_config, GFP_KERNEL); if (!caps) return -ENOMEM; user_caps = u64_to_user_ptr(cmd->data); - if (copy_from_user(caps, user_caps, sizeof(*caps))) { + if (get_user(nr_user_entries, &user_caps->cpuid.nent)) { ret = -EFAULT; goto out; } - if (caps->cpuid.nent < td_conf->num_cpuid_config) { + if (nr_user_entries < td_conf->num_cpuid_config) { ret = -E2BIG; goto out; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4953846cb30d..191a9ed0da22 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7291,7 +7291,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vmx_l1d_flush(vcpu); else if (static_branch_unlikely(&cpu_buf_vm_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) - mds_clear_cpu_buffers(); + x86_clear_cpu_buffers(); vmx_disable_fb_clear(vmx); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b58a74c1722d..93636f77c42d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3258,9 +3258,11 @@ int kvm_guest_time_update(struct kvm_vcpu *v) /* With all the info we got, fill in the values */ - if (kvm_caps.has_tsc_control) + if (kvm_caps.has_tsc_control) { tgt_tsc_khz = kvm_scale_tsc(tgt_tsc_khz, v->arch.l1_tsc_scaling_ratio); + tgt_tsc_khz = tgt_tsc_khz ? : 1; + } if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) { kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL, @@ -6186,6 +6188,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp, u32 user_tsc_khz; r = -EINVAL; + + if (vcpu->arch.guest_tsc_protected) + goto out; + user_tsc_khz = (u32)arg; if (kvm_caps.has_tsc_control && @@ -11035,7 +11041,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (unlikely(vcpu->arch.switch_db_regs && !(vcpu->arch.switch_db_regs & KVM_DEBUGREG_AUTO_SWITCH))) { - set_debugreg(0, 7); + set_debugreg(DR7_FIXED_1, 7); set_debugreg(vcpu->arch.eff_db[0], 0); set_debugreg(vcpu->arch.eff_db[1], 1); set_debugreg(vcpu->arch.eff_db[2], 2); @@ -11044,7 +11050,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) kvm_x86_call(set_dr6)(vcpu, vcpu->arch.dr6); } else if (unlikely(hw_breakpoint_active())) { - set_debugreg(0, 7); + set_debugreg(DR7_FIXED_1, 7); } vcpu->arch.host_debugctl = get_debugctlmsr(); diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 38b33cdd4232..d6b2a665b499 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1526,7 +1526,7 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, if (kvm_read_guest_virt(vcpu, (gva_t)sched_poll.ports, ports, sched_poll.nr_ports * sizeof(*ports), &e)) { *r = -EFAULT; - return true; + goto out; } for (i = 0; i < sched_poll.nr_ports; i++) { @@ -1571,7 +1571,8 @@ out: static void cancel_evtchn_poll(struct timer_list *t) { - struct kvm_vcpu *vcpu = from_timer(vcpu, t, arch.xen.poll_timer); + struct kvm_vcpu *vcpu = timer_container_of(vcpu, t, + arch.xen.poll_timer); kvm_make_request(KVM_REQ_UNBLOCK, vcpu); kvm_vcpu_kick(vcpu); @@ -1970,8 +1971,19 @@ int kvm_xen_setup_evtchn(struct kvm *kvm, { struct kvm_vcpu *vcpu; - if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm)) - return -EINVAL; + /* + * Don't check for the port being within range of max_evtchn_port(). + * Userspace can configure what ever targets it likes; events just won't + * be delivered if/while the target is invalid, just like userspace can + * configure MSIs which target non-existent APICs. + * + * This allow on Live Migration and Live Update, the IRQ routing table + * can be restored *independently* of other things like creating vCPUs, + * without imposing an ordering dependency on userspace. In this + * particular case, the problematic ordering would be with setting the + * Xen 'long mode' flag, which changes max_evtchn_port() to allow 4096 + * instead of 1024 event channels. + */ /* We only support 2 level event channels for now */ if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 607d6a2e66e2..8a34fff6ab2b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -30,7 +30,6 @@ #include <linux/initrd.h> #include <linux/cpumask.h> #include <linux/gfp.h> -#include <linux/execmem.h> #include <asm/asm.h> #include <asm/bios_ebda.h> @@ -749,8 +748,6 @@ void mark_rodata_ro(void) pr_info("Write protecting kernel text and read-only data: %luk\n", size >> 10); - execmem_cache_make_ro(); - kernel_set_to_readonly = 1; #ifdef CONFIG_CPA_DEBUG diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ee66fae9ebcc..fdb6cab524f0 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -34,7 +34,6 @@ #include <linux/gfp.h> #include <linux/kcore.h> #include <linux/bootmem_info.h> -#include <linux/execmem.h> #include <asm/processor.h> #include <asm/bios_ebda.h> @@ -1392,8 +1391,6 @@ void mark_rodata_ro(void) (end - start) >> 10); set_memory_ro(start, (end - start) >> PAGE_SHIFT); - execmem_cache_make_ro(); - kernel_set_to_readonly = 1; /* diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 46edc11726b7..8834c76f91c9 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1257,6 +1257,9 @@ static int collapse_pmd_page(pmd_t *pmd, unsigned long addr, pgprot_t pgprot; int i = 0; + if (!cpu_feature_enabled(X86_FEATURE_PSE)) + return 0; + addr &= PMD_MASK; pte = pte_offset_kernel(pmd, addr); first = *pte; diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 190299834011..c0c40b67524e 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -98,6 +98,11 @@ void __init pti_check_boottime_disable(void) return; setup_force_cpu_cap(X86_FEATURE_PTI); + + if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { + pr_debug("PTI enabled, disabling INVLPGB\n"); + setup_clear_cpu_cap(X86_FEATURE_INVLPGB); + } } static int __init pti_parse_cmdline(char *arg) diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 4933fb337983..c1efd5b0d198 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -8,13 +8,13 @@ obj-$(CONFIG_PCI_OLPC) += olpc.o obj-$(CONFIG_PCI_XEN) += xen.o obj-y += fixup.o -obj-$(CONFIG_X86_INTEL_CE) += ce4100.o obj-$(CONFIG_ACPI) += acpi.o obj-y += legacy.o irq.o -obj-$(CONFIG_X86_NUMACHIP) += numachip.o +obj-$(CONFIG_X86_INTEL_CE) += ce4100.o +obj-$(CONFIG_X86_INTEL_MID) += intel_mid.o -obj-$(CONFIG_X86_INTEL_MID) += intel_mid_pci.o +obj-$(CONFIG_X86_NUMACHIP) += numachip.o obj-y += common.o early.o obj-y += bus_numa.o diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid.c index b433b1753016..b433b1753016 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid.c diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c index a7c23f2a58c9..a2294c1649f6 100644 --- a/arch/x86/power/hibernate.c +++ b/arch/x86/power/hibernate.c @@ -192,7 +192,8 @@ out: int arch_resume_nosmt(void) { - int ret = 0; + int ret; + /* * We reached this while coming out of hibernation. This means * that SMT siblings are sleeping in hlt, as mwait is not safe @@ -206,18 +207,10 @@ int arch_resume_nosmt(void) * Called with hotplug disabled. */ cpu_hotplug_enable(); - if (cpu_smt_control == CPU_SMT_DISABLED || - cpu_smt_control == CPU_SMT_FORCE_DISABLED) { - enum cpuhp_smt_control old = cpu_smt_control; - - ret = cpuhp_smt_enable(); - if (ret) - goto out; - ret = cpuhp_smt_disable(old); - if (ret) - goto out; - } -out: + + ret = arch_cpu_rescan_dead_smt_siblings(); + cpu_hotplug_disable(); + return ret; } diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h index b07824500363..ddc144657efa 100644 --- a/arch/x86/um/asm/checksum.h +++ b/arch/x86/um/asm/checksum.h @@ -20,6 +20,9 @@ */ extern __wsum csum_partial(const void *buff, int len, __wsum sum); +/* Do not call this directly. Declared for export type visibility. */ +extern __visible __wsum csum_partial_copy_generic(const void *src, void *dst, int len); + /** * csum_fold - Fold and invert a 32bit checksum. * sum: 32bit unfolded sum diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h index 478710384b34..e222d2ae28fd 100644 --- a/arch/x86/um/asm/processor.h +++ b/arch/x86/um/asm/processor.h @@ -21,10 +21,10 @@ #include <asm/user.h> -/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ -static __always_inline void rep_nop(void) +/* PAUSE is a good thing to insert into busy-wait loops. */ +static __always_inline void native_pause(void) { - __asm__ __volatile__("rep;nop": : :"memory"); + __asm__ __volatile__("pause": : :"memory"); } static __always_inline void cpu_relax(void) @@ -33,7 +33,7 @@ static __always_inline void cpu_relax(void) time_travel_mode == TT_MODE_EXTERNAL) time_travel_ndelay(1); else - rep_nop(); + native_pause(); } #define task_pt_regs(t) (&(t)->thread.regs) diff --git a/arch/x86/um/os-Linux/mcontext.c b/arch/x86/um/os-Linux/mcontext.c index 37decaa74761..a21403df6663 100644 --- a/arch/x86/um/os-Linux/mcontext.c +++ b/arch/x86/um/os-Linux/mcontext.c @@ -1,7 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 -#include <sys/ucontext.h> #define __FRAME_OFFSETS +#include <linux/errno.h> +#include <linux/string.h> +#include <sys/ucontext.h> #include <asm/ptrace.h> +#include <asm/sigcontext.h> #include <sysdep/ptrace.h> #include <sysdep/mcontext.h> #include <arch.h> @@ -18,6 +21,10 @@ void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc) COPY2(UESP, ESP); /* sic */ COPY(EBX); COPY(EDX); COPY(ECX); COPY(EAX); COPY(EIP); COPY_SEG_CPL3(CS); COPY(EFL); COPY_SEG_CPL3(SS); +#undef COPY2 +#undef COPY +#undef COPY_SEG +#undef COPY_SEG_CPL3 #else #define COPY2(X,Y) regs->gp[X/sizeof(unsigned long)] = mc->gregs[REG_##Y] #define COPY(X) regs->gp[X/sizeof(unsigned long)] = mc->gregs[REG_##X] @@ -29,6 +36,8 @@ void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc) COPY2(EFLAGS, EFL); COPY2(CS, CSGSFS); regs->gp[SS / sizeof(unsigned long)] = mc->gregs[REG_CSGSFS] >> 48; +#undef COPY2 +#undef COPY #endif } @@ -42,3 +51,210 @@ void mc_set_rip(void *_mc, void *target) mc->gregs[REG_RIP] = (unsigned long)target; #endif } + +/* Same thing, but the copy macros are turned around. */ +void get_mc_from_regs(struct uml_pt_regs *regs, mcontext_t *mc, int single_stepping) +{ +#ifdef __i386__ +#define COPY2(X,Y) mc->gregs[REG_##Y] = regs->gp[X] +#define COPY(X) mc->gregs[REG_##X] = regs->gp[X] +#define COPY_SEG(X) mc->gregs[REG_##X] = regs->gp[X] & 0xffff; +#define COPY_SEG_CPL3(X) mc->gregs[REG_##X] = (regs->gp[X] & 0xffff) | 3; + COPY_SEG(GS); COPY_SEG(FS); COPY_SEG(ES); COPY_SEG(DS); + COPY(EDI); COPY(ESI); COPY(EBP); + COPY2(UESP, ESP); /* sic */ + COPY(EBX); COPY(EDX); COPY(ECX); COPY(EAX); + COPY(EIP); COPY_SEG_CPL3(CS); COPY(EFL); COPY_SEG_CPL3(SS); +#else +#define COPY2(X,Y) mc->gregs[REG_##Y] = regs->gp[X/sizeof(unsigned long)] +#define COPY(X) mc->gregs[REG_##X] = regs->gp[X/sizeof(unsigned long)] + COPY(R8); COPY(R9); COPY(R10); COPY(R11); + COPY(R12); COPY(R13); COPY(R14); COPY(R15); + COPY(RDI); COPY(RSI); COPY(RBP); COPY(RBX); + COPY(RDX); COPY(RAX); COPY(RCX); COPY(RSP); + COPY(RIP); + COPY2(EFLAGS, EFL); + mc->gregs[REG_CSGSFS] = mc->gregs[REG_CSGSFS] & 0xffffffffffffl; + mc->gregs[REG_CSGSFS] |= (regs->gp[SS / sizeof(unsigned long)] & 0xffff) << 48; +#endif + + if (single_stepping) + mc->gregs[REG_EFL] |= X86_EFLAGS_TF; + else + mc->gregs[REG_EFL] &= ~X86_EFLAGS_TF; +} + +#ifdef CONFIG_X86_32 +struct _xstate_64 { + struct _fpstate_64 fpstate; + struct _header xstate_hdr; + struct _ymmh_state ymmh; + /* New processor state extensions go here: */ +}; + +/* Not quite the right structures as these contain more information */ +int um_i387_from_fxsr(struct _fpstate_32 *i387, + const struct _fpstate_64 *fxsave); +int um_fxsr_from_i387(struct _fpstate_64 *fxsave, + const struct _fpstate_32 *from); +#else +#define _xstate_64 _xstate +#endif + +static struct _fpstate *get_fpstate(struct stub_data *data, + mcontext_t *mcontext, + int *fp_size) +{ + struct _fpstate *res; + + /* Assume floating point registers are on the same page */ + res = (void *)(((unsigned long)mcontext->fpregs & + (UM_KERN_PAGE_SIZE - 1)) + + (unsigned long)&data->sigstack[0]); + + if ((void *)res + sizeof(struct _fpstate) > + (void *)data->sigstack + sizeof(data->sigstack)) + return NULL; + + if (res->sw_reserved.magic1 != FP_XSTATE_MAGIC1) { + *fp_size = sizeof(struct _fpstate); + } else { + char *magic2_addr; + + magic2_addr = (void *)res; + magic2_addr += res->sw_reserved.extended_size; + magic2_addr -= FP_XSTATE_MAGIC2_SIZE; + + /* We still need to be within our stack */ + if ((void *)magic2_addr > + (void *)data->sigstack + sizeof(data->sigstack)) + return NULL; + + /* If we do not read MAGIC2, then we did something wrong */ + if (*(__u32 *)magic2_addr != FP_XSTATE_MAGIC2) + return NULL; + + /* Remove MAGIC2 from the size, we do not save/restore it */ + *fp_size = res->sw_reserved.extended_size - + FP_XSTATE_MAGIC2_SIZE; + } + + return res; +} + +int get_stub_state(struct uml_pt_regs *regs, struct stub_data *data, + unsigned long *fp_size_out) +{ + mcontext_t *mcontext; + struct _fpstate *fpstate_stub; + struct _xstate_64 *xstate_stub; + int fp_size, xstate_size; + + /* mctx_offset is verified by wait_stub_done_seccomp */ + mcontext = (void *)&data->sigstack[data->mctx_offset]; + + get_regs_from_mc(regs, mcontext); + + fpstate_stub = get_fpstate(data, mcontext, &fp_size); + if (!fpstate_stub) + return -EINVAL; + +#ifdef CONFIG_X86_32 + xstate_stub = (void *)&fpstate_stub->_fxsr_env; + xstate_size = fp_size - offsetof(struct _fpstate_32, _fxsr_env); +#else + xstate_stub = (void *)fpstate_stub; + xstate_size = fp_size; +#endif + + if (fp_size_out) + *fp_size_out = xstate_size; + + if (xstate_size > host_fp_size) + return -ENOSPC; + + memcpy(®s->fp, xstate_stub, xstate_size); + + /* We do not need to read the x86_64 FS_BASE/GS_BASE registers as + * we do not permit userspace to set them directly. + */ + +#ifdef CONFIG_X86_32 + /* Read the i387 legacy FP registers */ + if (um_fxsr_from_i387((void *)®s->fp, fpstate_stub)) + return -EINVAL; +#endif + + return 0; +} + +/* Copied because we cannot include regset.h here. */ +struct task_struct; +struct user_regset; +struct membuf { + void *p; + size_t left; +}; + +int fpregs_legacy_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to); + +int set_stub_state(struct uml_pt_regs *regs, struct stub_data *data, + int single_stepping) +{ + mcontext_t *mcontext; + struct _fpstate *fpstate_stub; + struct _xstate_64 *xstate_stub; + int fp_size, xstate_size; + + /* mctx_offset is verified by wait_stub_done_seccomp */ + mcontext = (void *)&data->sigstack[data->mctx_offset]; + + if ((unsigned long)mcontext < (unsigned long)data->sigstack || + (unsigned long)mcontext > + (unsigned long) data->sigstack + + sizeof(data->sigstack) - sizeof(*mcontext)) + return -EINVAL; + + get_mc_from_regs(regs, mcontext, single_stepping); + + fpstate_stub = get_fpstate(data, mcontext, &fp_size); + if (!fpstate_stub) + return -EINVAL; + +#ifdef CONFIG_X86_32 + xstate_stub = (void *)&fpstate_stub->_fxsr_env; + xstate_size = fp_size - offsetof(struct _fpstate_32, _fxsr_env); +#else + xstate_stub = (void *)fpstate_stub; + xstate_size = fp_size; +#endif + + memcpy(xstate_stub, ®s->fp, xstate_size); + +#ifdef __i386__ + /* + * On x86, the GDT entries are updated by arch_set_tls. + */ + + /* Store the i387 legacy FP registers which the host will use */ + if (um_i387_from_fxsr(fpstate_stub, (void *)®s->fp)) + return -EINVAL; +#else + /* + * On x86_64, we need to sync the FS_BASE/GS_BASE registers using the + * arch specific data. + */ + if (data->arch_data.fs_base != regs->gp[FS_BASE / sizeof(unsigned long)]) { + data->arch_data.fs_base = regs->gp[FS_BASE / sizeof(unsigned long)]; + data->arch_data.sync |= STUB_SYNC_FS_BASE; + } + if (data->arch_data.gs_base != regs->gp[GS_BASE / sizeof(unsigned long)]) { + data->arch_data.gs_base = regs->gp[GS_BASE / sizeof(unsigned long)]; + data->arch_data.sync |= STUB_SYNC_GS_BASE; + } +#endif + + return 0; +} diff --git a/arch/x86/um/ptrace.c b/arch/x86/um/ptrace.c index 57c504fd5626..fae8aabad10f 100644 --- a/arch/x86/um/ptrace.c +++ b/arch/x86/um/ptrace.c @@ -25,7 +25,8 @@ static inline unsigned short twd_i387_to_fxsr(unsigned short twd) return tmp; } -static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave) +static inline unsigned long +twd_fxsr_to_i387(const struct user_fxsr_struct *fxsave) { struct _fpxreg *st = NULL; unsigned long twd = (unsigned long) fxsave->twd; @@ -69,12 +70,16 @@ static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave) return ret; } -/* Get/set the old 32bit i387 registers (pre-FPX) */ -static int fpregs_legacy_get(struct task_struct *target, - const struct user_regset *regset, - struct membuf to) +/* + * Get/set the old 32bit i387 registers (pre-FPX) + * + * We provide simple wrappers for mcontext.c, they are only defined locally + * because mcontext.c is userspace facing and needs to a different definition + * of the structures. + */ +static int _um_i387_from_fxsr(struct membuf to, + const struct user_fxsr_struct *fxsave) { - struct user_fxsr_struct *fxsave = (void *)target->thread.regs.regs.fp; int i; membuf_store(&to, (unsigned long)fxsave->cwd | 0xffff0000ul); @@ -91,23 +96,36 @@ static int fpregs_legacy_get(struct task_struct *target, return 0; } -static int fpregs_legacy_set(struct task_struct *target, +int um_i387_from_fxsr(struct user_i387_struct *i387, + const struct user_fxsr_struct *fxsave); + +int um_i387_from_fxsr(struct user_i387_struct *i387, + const struct user_fxsr_struct *fxsave) +{ + struct membuf to = { + .p = i387, + .left = sizeof(*i387), + }; + + return _um_i387_from_fxsr(to, fxsave); +} + +static int fpregs_legacy_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) + struct membuf to) { struct user_fxsr_struct *fxsave = (void *)target->thread.regs.regs.fp; - const struct user_i387_struct *from; - struct user_i387_struct buf; - int i; - if (ubuf) { - if (copy_from_user(&buf, ubuf, sizeof(buf))) - return -EFAULT; - from = &buf; - } else { - from = kbuf; - } + return _um_i387_from_fxsr(to, fxsave); +} + +int um_fxsr_from_i387(struct user_fxsr_struct *fxsave, + const struct user_i387_struct *from); + +int um_fxsr_from_i387(struct user_fxsr_struct *fxsave, + const struct user_i387_struct *from) +{ + int i; fxsave->cwd = (unsigned short)(from->cwd & 0xffff); fxsave->swd = (unsigned short)(from->swd & 0xffff); @@ -125,6 +143,26 @@ static int fpregs_legacy_set(struct task_struct *target, return 0; } + +static int fpregs_legacy_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct user_fxsr_struct *fxsave = (void *)target->thread.regs.regs.fp; + const struct user_i387_struct *from; + struct user_i387_struct buf; + + if (ubuf) { + if (copy_from_user(&buf, ubuf, sizeof(buf))) + return -EFAULT; + from = &buf; + } else { + from = kbuf; + } + + return um_fxsr_from_i387(fxsave, from); +} #endif static int genregs_get(struct task_struct *target, diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h index 48de3a71f845..6fd1ed400399 100644 --- a/arch/x86/um/shared/sysdep/kernel-offsets.h +++ b/arch/x86/um/shared/sysdep/kernel-offsets.h @@ -4,7 +4,9 @@ #include <linux/elf.h> #include <linux/crypto.h> #include <linux/kbuild.h> +#include <linux/audit.h> #include <asm/mman.h> +#include <asm/seccomp.h> /* workaround for a warning with -Wmissing-prototypes */ void foo(void); diff --git a/arch/x86/um/shared/sysdep/mcontext.h b/arch/x86/um/shared/sysdep/mcontext.h index b724c54da316..6fe490cc5b98 100644 --- a/arch/x86/um/shared/sysdep/mcontext.h +++ b/arch/x86/um/shared/sysdep/mcontext.h @@ -6,7 +6,16 @@ #ifndef __SYS_SIGCONTEXT_X86_H #define __SYS_SIGCONTEXT_X86_H +#include <stub-data.h> + extern void get_regs_from_mc(struct uml_pt_regs *, mcontext_t *); +extern void get_mc_from_regs(struct uml_pt_regs *regs, mcontext_t *mc, + int single_stepping); + +extern int get_stub_state(struct uml_pt_regs *regs, struct stub_data *data, + unsigned long *fp_size_out); +extern int set_stub_state(struct uml_pt_regs *regs, struct stub_data *data, + int single_stepping); #ifdef __i386__ diff --git a/arch/x86/um/shared/sysdep/stub-data.h b/arch/x86/um/shared/sysdep/stub-data.h new file mode 100644 index 000000000000..82b1b7f8ac3d --- /dev/null +++ b/arch/x86/um/shared/sysdep/stub-data.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ARCH_STUB_DATA_H +#define __ARCH_STUB_DATA_H + +#ifdef __i386__ +#include <generated/asm-offsets.h> +#include <asm/ldt.h> + +struct stub_data_arch { + int sync; + struct user_desc tls[UM_KERN_GDT_ENTRY_TLS_ENTRIES]; +}; +#else +#define STUB_SYNC_FS_BASE (1 << 0) +#define STUB_SYNC_GS_BASE (1 << 1) +struct stub_data_arch { + int sync; + unsigned long fs_base; + unsigned long gs_base; +}; +#endif + +#endif /* __ARCH_STUB_DATA_H */ diff --git a/arch/x86/um/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub.h index dc89f4423454..4fa58f5b4fca 100644 --- a/arch/x86/um/shared/sysdep/stub.h +++ b/arch/x86/um/shared/sysdep/stub.h @@ -13,3 +13,5 @@ extern void stub_segv_handler(int, siginfo_t *, void *); extern void stub_syscall_handler(void); +extern void stub_signal_interrupt(int, siginfo_t *, void *); +extern void stub_signal_restorer(void); diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h index 390988132c0a..df568fc3ceb4 100644 --- a/arch/x86/um/shared/sysdep/stub_32.h +++ b/arch/x86/um/shared/sysdep/stub_32.h @@ -131,4 +131,17 @@ static __always_inline void *get_stub_data(void) "call *%%eax ;" \ :: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \ "i" (&fn)) + +static __always_inline void +stub_seccomp_restore_state(struct stub_data_arch *arch) +{ + for (int i = 0; i < sizeof(arch->tls) / sizeof(arch->tls[0]); i++) { + if (arch->sync & (1 << i)) + stub_syscall1(__NR_set_thread_area, + (unsigned long) &arch->tls[i]); + } + + arch->sync = 0; +} + #endif diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h index 294affbec742..9cfd31afa769 100644 --- a/arch/x86/um/shared/sysdep/stub_64.h +++ b/arch/x86/um/shared/sysdep/stub_64.h @@ -10,6 +10,7 @@ #include <sysdep/ptrace_user.h> #include <generated/asm-offsets.h> #include <linux/stddef.h> +#include <asm/prctl.h> #define STUB_MMAP_NR __NR_mmap #define MMAP_OFFSET(o) (o) @@ -134,4 +135,20 @@ static __always_inline void *get_stub_data(void) "call *%%rax ;" \ :: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \ "i" (&fn)) + +static __always_inline void +stub_seccomp_restore_state(struct stub_data_arch *arch) +{ + /* + * We could use _writefsbase_u64/_writegsbase_u64 if the host reports + * support in the hwcaps (HWCAP2_FSGSBASE). + */ + if (arch->sync & STUB_SYNC_FS_BASE) + stub_syscall2(__NR_arch_prctl, ARCH_SET_FS, arch->fs_base); + if (arch->sync & STUB_SYNC_GS_BASE) + stub_syscall2(__NR_arch_prctl, ARCH_SET_GS, arch->gs_base); + + arch->sync = 0; +} + #endif diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c index fbb129023080..cb3f17627d16 100644 --- a/arch/x86/um/tls_32.c +++ b/arch/x86/um/tls_32.c @@ -12,6 +12,7 @@ #include <skas.h> #include <sysdep/tls.h> #include <asm/desc.h> +#include <stub-data.h> /* * If needed we can detect when it's uninitialized. @@ -21,14 +22,25 @@ static int host_supports_tls = -1; int host_gdt_entry_tls_min; -static int do_set_thread_area(struct user_desc *info) +static int do_set_thread_area(struct task_struct* task, struct user_desc *info) { int ret; - u32 cpu; - cpu = get_cpu(); - ret = os_set_thread_area(info, userspace_pid[cpu]); - put_cpu(); + if (info->entry_number < host_gdt_entry_tls_min || + info->entry_number >= host_gdt_entry_tls_min + GDT_ENTRY_TLS_ENTRIES) + return -EINVAL; + + if (using_seccomp) { + int idx = info->entry_number - host_gdt_entry_tls_min; + struct stub_data *data = (void *)task->mm->context.id.stack; + + data->arch_data.tls[idx] = *info; + data->arch_data.sync |= BIT(idx); + + return 0; + } + + ret = os_set_thread_area(info, task->mm->context.id.pid); if (ret) printk(KERN_ERR "PTRACE_SET_THREAD_AREA failed, err = %d, " @@ -97,7 +109,7 @@ static int load_TLS(int flags, struct task_struct *to) if (!(flags & O_FORCE) && curr->flushed) continue; - ret = do_set_thread_area(&curr->tls); + ret = do_set_thread_area(current, &curr->tls); if (ret) goto out; @@ -275,7 +287,7 @@ SYSCALL_DEFINE1(set_thread_area, struct user_desc __user *, user_desc) return -EFAULT; } - ret = do_set_thread_area(&info); + ret = do_set_thread_area(current, &info); if (ret) return ret; return set_tls_entry(current, &info, idx, 1); diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index 2457d13c3f9e..c7a9a087ccaf 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -75,8 +75,9 @@ static inline void seamcall_err_ret(u64 fn, u64 err, args->r9, args->r10, args->r11); } -static inline int sc_retry_prerr(sc_func_t func, sc_err_func_t err_func, - u64 fn, struct tdx_module_args *args) +static __always_inline int sc_retry_prerr(sc_func_t func, + sc_err_func_t err_func, + u64 fn, struct tdx_module_args *args) { u64 sret = sc_retry(func, fn, args); diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index cb1725c40e36..d62aa1c316fc 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -349,7 +349,7 @@ ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile index f28b8e3d717e..d3ef0407401f 100644 --- a/arch/xtensa/kernel/Makefile +++ b/arch/xtensa/kernel/Makefile @@ -3,7 +3,7 @@ # Makefile for the Linux/Xtensa kernel. # -extra-y := vmlinux.lds +always-$(KBUILD_BUILTIN) := vmlinux.lds obj-y := head.o align.o coprocessor.o entry.o irq.o platform.o process.o \ ptrace.o setup.o signal.o stacktrace.o syscall.o time.o traps.o \ diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index c6d8c62695e1..f0a63b2f85cc 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -338,7 +338,7 @@ static int iss_net_poll(struct iss_net_private *lp) static void iss_net_timer(struct timer_list *t) { - struct iss_net_private *lp = from_timer(lp, t, timer); + struct iss_net_private *lp = timer_container_of(lp, t, timer); iss_net_poll(lp); mod_timer(&lp->timer, jiffies + lp->timer_val); |