diff options
Diffstat (limited to 'arch/arm64/include')
-rw-r--r-- | arch/arm64/include/asm/archrandom.h | 48 | ||||
-rw-r--r-- | arch/arm64/include/asm/cputype.h | 2 | ||||
-rw-r--r-- | arch/arm64/include/asm/efi.h | 19 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_arm.h | 8 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_asm.h | 7 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_host.h | 76 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_hyp.h | 3 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_mmu.h | 2 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_pgtable.h | 175 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_pkvm.h | 38 | ||||
-rw-r--r-- | arch/arm64/include/asm/mte.h | 65 | ||||
-rw-r--r-- | arch/arm64/include/asm/pgtable.h | 10 | ||||
-rw-r--r-- | arch/arm64/include/asm/ptdump.h | 1 | ||||
-rw-r--r-- | arch/arm64/include/asm/stackprotector.h | 9 | ||||
-rw-r--r-- | arch/arm64/include/asm/syscall_wrapper.h | 2 | ||||
-rw-r--r-- | arch/arm64/include/uapi/asm/kvm.h | 1 |
16 files changed, 373 insertions, 93 deletions
diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h index 109e2a4454be..2f5f3da34782 100644 --- a/arch/arm64/include/asm/archrandom.h +++ b/arch/arm64/include/asm/archrandom.h @@ -5,6 +5,7 @@ #include <linux/arm-smccc.h> #include <linux/bug.h> #include <linux/kernel.h> +#include <linux/irqflags.h> #include <asm/cpufeature.h> #define ARM_SMCCC_TRNG_MIN_VERSION 0x10000UL @@ -58,6 +59,13 @@ static inline bool __arm64_rndrrs(unsigned long *v) return ok; } +static __always_inline bool __cpu_has_rng(void) +{ + if (unlikely(!system_capabilities_finalized() && !preemptible())) + return this_cpu_has_cap(ARM64_HAS_RNG); + return cpus_have_const_cap(ARM64_HAS_RNG); +} + static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs) { /* @@ -66,7 +74,7 @@ static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t * cpufeature code and with potential scheduling between CPUs * with and without the feature. */ - if (max_longs && cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v)) + if (max_longs && __cpu_has_rng() && __arm64_rndr(v)) return 1; return 0; } @@ -108,7 +116,7 @@ static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, s * reseeded after each invocation. This is not a 100% fit but good * enough to implement this API if no other entropy source exists. */ - if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndrrs(v)) + if (__cpu_has_rng() && __arm64_rndrrs(v)) return 1; return 0; @@ -121,40 +129,4 @@ static inline bool __init __early_cpu_has_rndr(void) return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf; } -static inline size_t __init __must_check -arch_get_random_seed_longs_early(unsigned long *v, size_t max_longs) -{ - WARN_ON(system_state != SYSTEM_BOOTING); - - if (!max_longs) - return 0; - - if (smccc_trng_available) { - struct arm_smccc_res res; - - max_longs = min_t(size_t, 3, max_longs); - arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND64, max_longs * 64, &res); - if ((int)res.a0 >= 0) { - switch (max_longs) { - case 3: - *v++ = res.a1; - fallthrough; - case 2: - *v++ = res.a2; - fallthrough; - case 1: - *v++ = res.a3; - break; - } - return max_longs; - } - } - - if (__early_cpu_has_rndr() && __arm64_rndr(v)) - return 1; - - return 0; -} -#define arch_get_random_seed_longs_early arch_get_random_seed_longs_early - #endif /* _ASM_ARCHRANDOM_H */ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 4b1ad810436f..4e8b66c74ea2 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -41,7 +41,7 @@ (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) #define MIDR_CPU_MODEL(imp, partnum) \ - (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \ + ((_AT(u32, imp) << MIDR_IMPLEMENTOR_SHIFT) | \ (0xf << MIDR_ARCHITECTURE_SHIFT) | \ ((partnum) << MIDR_PARTNUM_SHIFT)) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index d6cf535d8352..31d13a6001df 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -33,6 +33,7 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); ({ \ efi_virtmap_load(); \ __efi_fpsimd_begin(); \ + spin_lock(&efi_rt_lock); \ }) #undef arch_efi_call_virt @@ -41,10 +42,12 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); #define arch_efi_call_virt_teardown() \ ({ \ + spin_unlock(&efi_rt_lock); \ __efi_fpsimd_end(); \ efi_virtmap_unload(); \ }) +extern spinlock_t efi_rt_lock; efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...); #define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) @@ -84,13 +87,23 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr) return (image_addr & ~(SZ_1G - 1UL)) + (1UL << (VA_BITS_MIN - 1)); } -#define alloc_screen_info(x...) &screen_info - -static inline void free_screen_info(struct screen_info *si) +static inline unsigned long efi_get_kimg_min_align(void) { + extern bool efi_nokaslr; + + /* + * Although relocatable kernels can fix up the misalignment with + * respect to MIN_KIMG_ALIGN, the resulting virtual text addresses are + * subtly out of sync with those recorded in the vmlinux when kaslr is + * disabled but the image required relocation anyway. Therefore retain + * 2M alignment if KASLR was explicitly disabled, even if it was not + * going to be activated to begin with. + */ + return efi_nokaslr ? MIN_KIMG_ALIGN : EFI_KIMG_ALIGN; } #define EFI_ALLOC_ALIGN SZ_64K +#define EFI_ALLOC_LIMIT ((1UL << 48) - 1) /* * On ARM systems, virtually remapped UEFI runtime services are set up in two diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 8aa8492dafc0..0df3fc3a0173 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -135,7 +135,7 @@ * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are * not known to exist and will break with this configuration. * - * The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2(). + * The VTCR_EL2 is configured per VM and is initialised in kvm_init_stage2_mmu. * * Note that when using 4K pages, we concatenate two first level page tables * together. With 16K pages, we concatenate 16 first level page tables. @@ -340,9 +340,13 @@ * We have * PAR [PA_Shift - 1 : 12] = PA [PA_Shift - 1 : 12] * HPFAR [PA_Shift - 9 : 4] = FIPA [PA_Shift - 1 : 12] + * + * Always assume 52 bit PA since at this point, we don't know how many PA bits + * the page table has been set up for. This should be safe since unused address + * bits in PAR are res0. */ #define PAR_TO_HPFAR(par) \ - (((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8) + (((par) & GENMASK_ULL(52 - 1, 12)) >> 8) #define ECN(x) { ESR_ELx_EC_##x, #x } diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 53035763e48e..43c3bc0f9544 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -76,6 +76,9 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs, __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs, __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps, + __KVM_HOST_SMCCC_FUNC___pkvm_init_vm, + __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu, + __KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm, }; #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] @@ -106,7 +109,7 @@ enum __kvm_host_smccc_func { #define per_cpu_ptr_nvhe_sym(sym, cpu) \ ({ \ unsigned long base, off; \ - base = kvm_arm_hyp_percpu_base[cpu]; \ + base = kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]; \ off = (unsigned long)&CHOOSE_NVHE_SYM(sym) - \ (unsigned long)&CHOOSE_NVHE_SYM(__per_cpu_start); \ base ? (typeof(CHOOSE_NVHE_SYM(sym))*)(base + off) : NULL; \ @@ -211,7 +214,7 @@ DECLARE_KVM_HYP_SYM(__kvm_hyp_vector); #define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init) #define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector) -extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; +extern unsigned long kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[]; DECLARE_KVM_NVHE_SYM(__per_cpu_start); DECLARE_KVM_NVHE_SYM(__per_cpu_end); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index fd34ab155d0b..35a159d131b5 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -73,6 +73,63 @@ u32 __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu); +struct kvm_hyp_memcache { + phys_addr_t head; + unsigned long nr_pages; +}; + +static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc, + phys_addr_t *p, + phys_addr_t (*to_pa)(void *virt)) +{ + *p = mc->head; + mc->head = to_pa(p); + mc->nr_pages++; +} + +static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc, + void *(*to_va)(phys_addr_t phys)) +{ + phys_addr_t *p = to_va(mc->head); + + if (!mc->nr_pages) + return NULL; + + mc->head = *p; + mc->nr_pages--; + + return p; +} + +static inline int __topup_hyp_memcache(struct kvm_hyp_memcache *mc, + unsigned long min_pages, + void *(*alloc_fn)(void *arg), + phys_addr_t (*to_pa)(void *virt), + void *arg) +{ + while (mc->nr_pages < min_pages) { + phys_addr_t *p = alloc_fn(arg); + + if (!p) + return -ENOMEM; + push_hyp_memcache(mc, p, to_pa); + } + + return 0; +} + +static inline void __free_hyp_memcache(struct kvm_hyp_memcache *mc, + void (*free_fn)(void *virt, void *arg), + void *(*to_va)(phys_addr_t phys), + void *arg) +{ + while (mc->nr_pages) + free_fn(pop_hyp_memcache(mc, to_va), arg); +} + +void free_hyp_memcache(struct kvm_hyp_memcache *mc); +int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages); + struct kvm_vmid { atomic64_t id; }; @@ -115,6 +172,13 @@ struct kvm_smccc_features { unsigned long vendor_hyp_bmap; }; +typedef unsigned int pkvm_handle_t; + +struct kvm_protected_vm { + pkvm_handle_t handle; + struct kvm_hyp_memcache teardown_mc; +}; + struct kvm_arch { struct kvm_s2_mmu mmu; @@ -163,9 +227,19 @@ struct kvm_arch { u8 pfr0_csv2; u8 pfr0_csv3; + struct { + u8 imp:4; + u8 unimp:4; + } dfr0_pmuver; /* Hypercall features firmware registers' descriptor */ struct kvm_smccc_features smccc_feat; + + /* + * For an untrusted host VM, 'pkvm.handle' is used to lookup + * the associated pKVM instance in the hypervisor. + */ + struct kvm_protected_vm pkvm; }; struct kvm_vcpu_fault_info { @@ -925,8 +999,6 @@ int kvm_set_ipa_limit(void); #define __KVM_HAVE_ARCH_VM_ALLOC struct kvm *kvm_arch_alloc_vm(void); -int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type); - static inline bool kvm_vm_is_protected(struct kvm *kvm) { return false; diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index aa7fa2a08f06..6797eafe7890 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -123,4 +123,7 @@ extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val); +extern unsigned long kvm_nvhe_sym(__icache_flags); +extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits); + #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 7784081088e7..e4a7e6369499 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -166,7 +166,7 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); -int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu); +int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type); void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t pa, unsigned long size, bool writable); diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 3252eb50ecfe..63f81b27a4e3 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -42,6 +42,8 @@ typedef u64 kvm_pte_t; #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) #define KVM_PTE_ADDR_51_48 GENMASK(15, 12) +#define KVM_PHYS_INVALID (-1ULL) + static inline bool kvm_pte_valid(kvm_pte_t pte) { return pte & KVM_PTE_VALID; @@ -57,6 +59,18 @@ static inline u64 kvm_pte_to_phys(kvm_pte_t pte) return pa; } +static inline kvm_pte_t kvm_phys_to_pte(u64 pa) +{ + kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK; + + if (PAGE_SHIFT == 16) { + pa &= GENMASK(51, 48); + pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); + } + + return pte; +} + static inline u64 kvm_granule_shift(u32 level) { /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */ @@ -85,6 +99,8 @@ static inline bool kvm_level_supports_block_mapping(u32 level) * allocation is physically contiguous. * @free_pages_exact: Free an exact number of memory pages previously * allocated by zalloc_pages_exact. + * @free_removed_table: Free a removed paging structure by unlinking and + * dropping references. * @get_page: Increment the refcount on a page. * @put_page: Decrement the refcount on a page. When the * refcount reaches 0 the page is automatically @@ -103,6 +119,7 @@ struct kvm_pgtable_mm_ops { void* (*zalloc_page)(void *arg); void* (*zalloc_pages_exact)(size_t size); void (*free_pages_exact)(void *addr, size_t size); + void (*free_removed_table)(void *addr, u32 level); void (*get_page)(void *addr); void (*put_page)(void *addr); int (*page_count)(void *addr); @@ -162,29 +179,6 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end, enum kvm_pgtable_prot prot); /** - * struct kvm_pgtable - KVM page-table. - * @ia_bits: Maximum input address size, in bits. - * @start_level: Level at which the page-table walk starts. - * @pgd: Pointer to the first top-level entry of the page-table. - * @mm_ops: Memory management callbacks. - * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables. - * @flags: Stage-2 page-table flags. - * @force_pte_cb: Function that returns true if page level mappings must - * be used instead of block mappings. - */ -struct kvm_pgtable { - u32 ia_bits; - u32 start_level; - kvm_pte_t *pgd; - struct kvm_pgtable_mm_ops *mm_ops; - - /* Stage-2 only */ - struct kvm_s2_mmu *mmu; - enum kvm_pgtable_stage2_flags flags; - kvm_pgtable_force_pte_cb_t force_pte_cb; -}; - -/** * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk. * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid * entries. @@ -192,17 +186,34 @@ struct kvm_pgtable { * children. * @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their * children. + * @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared + * with other software walkers. */ enum kvm_pgtable_walk_flags { KVM_PGTABLE_WALK_LEAF = BIT(0), KVM_PGTABLE_WALK_TABLE_PRE = BIT(1), KVM_PGTABLE_WALK_TABLE_POST = BIT(2), + KVM_PGTABLE_WALK_SHARED = BIT(3), +}; + +struct kvm_pgtable_visit_ctx { + kvm_pte_t *ptep; + kvm_pte_t old; + void *arg; + struct kvm_pgtable_mm_ops *mm_ops; + u64 addr; + u64 end; + u32 level; + enum kvm_pgtable_walk_flags flags; }; -typedef int (*kvm_pgtable_visitor_fn_t)(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg); +typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit); + +static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx) +{ + return ctx->flags & KVM_PGTABLE_WALK_SHARED; +} /** * struct kvm_pgtable_walker - Hook into a page-table walk. @@ -217,6 +228,94 @@ struct kvm_pgtable_walker { const enum kvm_pgtable_walk_flags flags; }; +/* + * RCU cannot be used in a non-kernel context such as the hyp. As such, page + * table walkers used in hyp do not call into RCU and instead use other + * synchronization mechanisms (such as a spinlock). + */ +#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) + +typedef kvm_pte_t *kvm_pteref_t; + +static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker, + kvm_pteref_t pteref) +{ + return pteref; +} + +static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) +{ + /* + * Due to the lack of RCU (or a similar protection scheme), only + * non-shared table walkers are allowed in the hypervisor. + */ + if (walker->flags & KVM_PGTABLE_WALK_SHARED) + return -EPERM; + + return 0; +} + +static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) {} + +static inline bool kvm_pgtable_walk_lock_held(void) +{ + return true; +} + +#else + +typedef kvm_pte_t __rcu *kvm_pteref_t; + +static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker, + kvm_pteref_t pteref) +{ + return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED)); +} + +static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) +{ + if (walker->flags & KVM_PGTABLE_WALK_SHARED) + rcu_read_lock(); + + return 0; +} + +static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) +{ + if (walker->flags & KVM_PGTABLE_WALK_SHARED) + rcu_read_unlock(); +} + +static inline bool kvm_pgtable_walk_lock_held(void) +{ + return rcu_read_lock_held(); +} + +#endif + +/** + * struct kvm_pgtable - KVM page-table. + * @ia_bits: Maximum input address size, in bits. + * @start_level: Level at which the page-table walk starts. + * @pgd: Pointer to the first top-level entry of the page-table. + * @mm_ops: Memory management callbacks. + * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables. + * @flags: Stage-2 page-table flags. + * @force_pte_cb: Function that returns true if page level mappings must + * be used instead of block mappings. + */ +struct kvm_pgtable { + u32 ia_bits; + u32 start_level; + kvm_pteref_t pgd; + struct kvm_pgtable_mm_ops *mm_ops; + + /* Stage-2 only */ + struct kvm_s2_mmu *mmu; + enum kvm_pgtable_stage2_flags flags; + kvm_pgtable_force_pte_cb_t force_pte_cb; +}; + /** * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table. * @pgt: Uninitialised page-table structure to initialise. @@ -297,6 +396,14 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift); /** + * kvm_pgtable_stage2_pgd_size() - Helper to compute size of a stage-2 PGD + * @vtcr: Content of the VTCR register. + * + * Return: the size (in bytes) of the stage-2 PGD + */ +size_t kvm_pgtable_stage2_pgd_size(u64 vtcr); + +/** * __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table. * @pgt: Uninitialised page-table structure to initialise. * @mmu: S2 MMU context for this S2 translation @@ -325,6 +432,17 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); /** + * kvm_pgtable_stage2_free_removed() - Free a removed stage-2 paging structure. + * @mm_ops: Memory management callbacks. + * @pgtable: Unlinked stage-2 paging structure to be freed. + * @level: Level of the stage-2 paging structure to be freed. + * + * The page-table is assumed to be unreachable by any hardware walkers prior to + * freeing and therefore no TLB invalidation is performed. + */ +void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level); + +/** * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address at which to place the mapping. @@ -333,6 +451,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); * @prot: Permissions and attributes for the mapping. * @mc: Cache of pre-allocated and zeroed memory from which to allocate * page-table pages. + * @flags: Flags to control the page-table walk (ex. a shared walk) * * The offset of @addr within a page is ignored, @size is rounded-up to * the next page boundary and @phys is rounded-down to the previous page @@ -354,7 +473,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); */ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot, - void *mc); + void *mc, enum kvm_pgtable_walk_flags flags); /** * kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 9f4ad2a8df59..01129b0d4c68 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -9,11 +9,49 @@ #include <linux/memblock.h> #include <asm/kvm_pgtable.h> +/* Maximum number of VMs that can co-exist under pKVM. */ +#define KVM_MAX_PVMS 255 + #define HYP_MEMBLOCK_REGIONS 128 +int pkvm_init_host_vm(struct kvm *kvm); +int pkvm_create_hyp_vm(struct kvm *kvm); +void pkvm_destroy_hyp_vm(struct kvm *kvm); + extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; extern unsigned int kvm_nvhe_sym(hyp_memblock_nr); +static inline unsigned long +hyp_vmemmap_memblock_size(struct memblock_region *reg, size_t vmemmap_entry_size) +{ + unsigned long nr_pages = reg->size >> PAGE_SHIFT; + unsigned long start, end; + + start = (reg->base >> PAGE_SHIFT) * vmemmap_entry_size; + end = start + nr_pages * vmemmap_entry_size; + start = ALIGN_DOWN(start, PAGE_SIZE); + end = ALIGN(end, PAGE_SIZE); + + return end - start; +} + +static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size) +{ + unsigned long res = 0, i; + + for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) { + res += hyp_vmemmap_memblock_size(&kvm_nvhe_sym(hyp_memory)[i], + vmemmap_entry_size); + } + + return res >> PAGE_SHIFT; +} + +static inline unsigned long hyp_vm_table_pages(void) +{ + return PAGE_ALIGN(KVM_MAX_PVMS * sizeof(void *)) >> PAGE_SHIFT; +} + static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages) { unsigned long total = 0, i; diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 760c62f8e22f..20dd06d70af5 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -25,7 +25,7 @@ unsigned long mte_copy_tags_to_user(void __user *to, void *from, unsigned long n); int mte_save_tags(struct page *page); void mte_save_page_tags(const void *page_addr, void *tag_storage); -bool mte_restore_tags(swp_entry_t entry, struct page *page); +void mte_restore_tags(swp_entry_t entry, struct page *page); void mte_restore_page_tags(void *page_addr, const void *tag_storage); void mte_invalidate_tags(int type, pgoff_t offset); void mte_invalidate_tags_area(int type); @@ -36,6 +36,58 @@ void mte_free_tag_storage(char *storage); /* track which pages have valid allocation tags */ #define PG_mte_tagged PG_arch_2 +/* simple lock to avoid multiple threads tagging the same page */ +#define PG_mte_lock PG_arch_3 + +static inline void set_page_mte_tagged(struct page *page) +{ + /* + * Ensure that the tags written prior to this function are visible + * before the page flags update. + */ + smp_wmb(); + set_bit(PG_mte_tagged, &page->flags); +} + +static inline bool page_mte_tagged(struct page *page) +{ + bool ret = test_bit(PG_mte_tagged, &page->flags); + + /* + * If the page is tagged, ensure ordering with a likely subsequent + * read of the tags. + */ + if (ret) + smp_rmb(); + return ret; +} + +/* + * Lock the page for tagging and return 'true' if the page can be tagged, + * 'false' if already tagged. PG_mte_tagged is never cleared and therefore the + * locking only happens once for page initialisation. + * + * The page MTE lock state: + * + * Locked: PG_mte_lock && !PG_mte_tagged + * Unlocked: !PG_mte_lock || PG_mte_tagged + * + * Acquire semantics only if the page is tagged (returning 'false'). + */ +static inline bool try_page_mte_tagging(struct page *page) +{ + if (!test_and_set_bit(PG_mte_lock, &page->flags)) + return true; + + /* + * The tags are either being initialised or may have been initialised + * already. Check if the PG_mte_tagged flag has been set or wait + * otherwise. + */ + smp_cond_load_acquire(&page->flags, VAL & (1UL << PG_mte_tagged)); + + return false; +} void mte_zero_clear_page_tags(void *addr); void mte_sync_tags(pte_t old_pte, pte_t pte); @@ -56,6 +108,17 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size); /* unused if !CONFIG_ARM64_MTE, silence the compiler */ #define PG_mte_tagged 0 +static inline void set_page_mte_tagged(struct page *page) +{ +} +static inline bool page_mte_tagged(struct page *page) +{ + return false; +} +static inline bool try_page_mte_tagging(struct page *page) +{ + return false; +} static inline void mte_zero_clear_page_tags(void *addr) { } diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 323ee234043f..b4bbeed80fb6 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -862,12 +862,12 @@ static inline bool pte_user_accessible_page(pte_t pte) static inline bool pmd_user_accessible_page(pmd_t pmd) { - return pmd_present(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); + return pmd_leaf(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); } static inline bool pud_user_accessible_page(pud_t pud) { - return pud_present(pud) && pud_user(pud); + return pud_leaf(pud) && pud_user(pud); } #endif @@ -1020,8 +1020,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, */ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS) -extern int kern_addr_valid(unsigned long addr); - #ifdef CONFIG_ARM64_MTE #define __HAVE_ARCH_PREPARE_TO_SWAP @@ -1048,8 +1046,8 @@ static inline void arch_swap_invalidate_area(int type) #define __HAVE_ARCH_SWAP_RESTORE static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) { - if (system_supports_mte() && mte_restore_tags(entry, &folio->page)) - set_bit(PG_mte_tagged, &folio->flags); + if (system_supports_mte()) + mte_restore_tags(entry, &folio->page); } #endif /* CONFIG_ARM64_MTE */ diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index b1dd7ecff7ef..581caac525b0 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -23,6 +23,7 @@ struct ptdump_info { void ptdump_walk(struct seq_file *s, struct ptdump_info *info); #ifdef CONFIG_PTDUMP_DEBUGFS +#define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64 void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name); #else static inline void ptdump_debugfs_register(struct ptdump_info *info, diff --git a/arch/arm64/include/asm/stackprotector.h b/arch/arm64/include/asm/stackprotector.h index 33f1bb453150..ae3ad80f51fe 100644 --- a/arch/arm64/include/asm/stackprotector.h +++ b/arch/arm64/include/asm/stackprotector.h @@ -13,8 +13,6 @@ #ifndef __ASM_STACKPROTECTOR_H #define __ASM_STACKPROTECTOR_H -#include <linux/random.h> -#include <linux/version.h> #include <asm/pointer_auth.h> extern unsigned long __stack_chk_guard; @@ -28,12 +26,7 @@ extern unsigned long __stack_chk_guard; static __always_inline void boot_init_stack_canary(void) { #if defined(CONFIG_STACKPROTECTOR) - unsigned long canary; - - /* Try to get a semi random initial value. */ - get_random_bytes(&canary, sizeof(canary)); - canary ^= LINUX_VERSION_CODE; - canary &= CANARY_MASK; + unsigned long canary = get_random_canary(); current->stack_canary = canary; if (!IS_ENABLED(CONFIG_STACKPROTECTOR_PER_TASK)) diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h index b383b4802a7b..d30217c21eff 100644 --- a/arch/arm64/include/asm/syscall_wrapper.h +++ b/arch/arm64/include/asm/syscall_wrapper.h @@ -8,7 +8,7 @@ #ifndef __ASM_SYSCALL_WRAPPER_H #define __ASM_SYSCALL_WRAPPER_H -struct pt_regs; +#include <asm/ptrace.h> #define SC_ARM64_REGS_TO_ARGS(x, ...) \ __MAP(x,__SC_ARGS \ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 316917b98707..a7a857f1784d 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -43,6 +43,7 @@ #define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +#define KVM_DIRTY_LOG_PAGE_OFFSET 64 #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) |