summaryrefslogtreecommitdiff
path: root/arch/arm64/include
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/include')
-rw-r--r--arch/arm64/include/asm/archrandom.h48
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/efi.h19
-rw-r--r--arch/arm64/include/asm/kvm_arm.h8
-rw-r--r--arch/arm64/include/asm/kvm_asm.h7
-rw-r--r--arch/arm64/include/asm/kvm_host.h76
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h3
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h2
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h175
-rw-r--r--arch/arm64/include/asm/kvm_pkvm.h38
-rw-r--r--arch/arm64/include/asm/mte.h65
-rw-r--r--arch/arm64/include/asm/pgtable.h10
-rw-r--r--arch/arm64/include/asm/ptdump.h1
-rw-r--r--arch/arm64/include/asm/stackprotector.h9
-rw-r--r--arch/arm64/include/asm/syscall_wrapper.h2
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h1
16 files changed, 373 insertions, 93 deletions
diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h
index 109e2a4454be..2f5f3da34782 100644
--- a/arch/arm64/include/asm/archrandom.h
+++ b/arch/arm64/include/asm/archrandom.h
@@ -5,6 +5,7 @@
#include <linux/arm-smccc.h>
#include <linux/bug.h>
#include <linux/kernel.h>
+#include <linux/irqflags.h>
#include <asm/cpufeature.h>
#define ARM_SMCCC_TRNG_MIN_VERSION 0x10000UL
@@ -58,6 +59,13 @@ static inline bool __arm64_rndrrs(unsigned long *v)
return ok;
}
+static __always_inline bool __cpu_has_rng(void)
+{
+ if (unlikely(!system_capabilities_finalized() && !preemptible()))
+ return this_cpu_has_cap(ARM64_HAS_RNG);
+ return cpus_have_const_cap(ARM64_HAS_RNG);
+}
+
static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs)
{
/*
@@ -66,7 +74,7 @@ static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t
* cpufeature code and with potential scheduling between CPUs
* with and without the feature.
*/
- if (max_longs && cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v))
+ if (max_longs && __cpu_has_rng() && __arm64_rndr(v))
return 1;
return 0;
}
@@ -108,7 +116,7 @@ static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, s
* reseeded after each invocation. This is not a 100% fit but good
* enough to implement this API if no other entropy source exists.
*/
- if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndrrs(v))
+ if (__cpu_has_rng() && __arm64_rndrrs(v))
return 1;
return 0;
@@ -121,40 +129,4 @@ static inline bool __init __early_cpu_has_rndr(void)
return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf;
}
-static inline size_t __init __must_check
-arch_get_random_seed_longs_early(unsigned long *v, size_t max_longs)
-{
- WARN_ON(system_state != SYSTEM_BOOTING);
-
- if (!max_longs)
- return 0;
-
- if (smccc_trng_available) {
- struct arm_smccc_res res;
-
- max_longs = min_t(size_t, 3, max_longs);
- arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND64, max_longs * 64, &res);
- if ((int)res.a0 >= 0) {
- switch (max_longs) {
- case 3:
- *v++ = res.a1;
- fallthrough;
- case 2:
- *v++ = res.a2;
- fallthrough;
- case 1:
- *v++ = res.a3;
- break;
- }
- return max_longs;
- }
- }
-
- if (__early_cpu_has_rndr() && __arm64_rndr(v))
- return 1;
-
- return 0;
-}
-#define arch_get_random_seed_longs_early arch_get_random_seed_longs_early
-
#endif /* _ASM_ARCHRANDOM_H */
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 4b1ad810436f..4e8b66c74ea2 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -41,7 +41,7 @@
(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
#define MIDR_CPU_MODEL(imp, partnum) \
- (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \
+ ((_AT(u32, imp) << MIDR_IMPLEMENTOR_SHIFT) | \
(0xf << MIDR_ARCHITECTURE_SHIFT) | \
((partnum) << MIDR_PARTNUM_SHIFT))
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index d6cf535d8352..31d13a6001df 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -33,6 +33,7 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
({ \
efi_virtmap_load(); \
__efi_fpsimd_begin(); \
+ spin_lock(&efi_rt_lock); \
})
#undef arch_efi_call_virt
@@ -41,10 +42,12 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
#define arch_efi_call_virt_teardown() \
({ \
+ spin_unlock(&efi_rt_lock); \
__efi_fpsimd_end(); \
efi_virtmap_unload(); \
})
+extern spinlock_t efi_rt_lock;
efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...);
#define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
@@ -84,13 +87,23 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr)
return (image_addr & ~(SZ_1G - 1UL)) + (1UL << (VA_BITS_MIN - 1));
}
-#define alloc_screen_info(x...) &screen_info
-
-static inline void free_screen_info(struct screen_info *si)
+static inline unsigned long efi_get_kimg_min_align(void)
{
+ extern bool efi_nokaslr;
+
+ /*
+ * Although relocatable kernels can fix up the misalignment with
+ * respect to MIN_KIMG_ALIGN, the resulting virtual text addresses are
+ * subtly out of sync with those recorded in the vmlinux when kaslr is
+ * disabled but the image required relocation anyway. Therefore retain
+ * 2M alignment if KASLR was explicitly disabled, even if it was not
+ * going to be activated to begin with.
+ */
+ return efi_nokaslr ? MIN_KIMG_ALIGN : EFI_KIMG_ALIGN;
}
#define EFI_ALLOC_ALIGN SZ_64K
+#define EFI_ALLOC_LIMIT ((1UL << 48) - 1)
/*
* On ARM systems, virtually remapped UEFI runtime services are set up in two
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 8aa8492dafc0..0df3fc3a0173 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -135,7 +135,7 @@
* 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
* not known to exist and will break with this configuration.
*
- * The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2().
+ * The VTCR_EL2 is configured per VM and is initialised in kvm_init_stage2_mmu.
*
* Note that when using 4K pages, we concatenate two first level page tables
* together. With 16K pages, we concatenate 16 first level page tables.
@@ -340,9 +340,13 @@
* We have
* PAR [PA_Shift - 1 : 12] = PA [PA_Shift - 1 : 12]
* HPFAR [PA_Shift - 9 : 4] = FIPA [PA_Shift - 1 : 12]
+ *
+ * Always assume 52 bit PA since at this point, we don't know how many PA bits
+ * the page table has been set up for. This should be safe since unused address
+ * bits in PAR are res0.
*/
#define PAR_TO_HPFAR(par) \
- (((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8)
+ (((par) & GENMASK_ULL(52 - 1, 12)) >> 8)
#define ECN(x) { ESR_ELx_EC_##x, #x }
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 53035763e48e..43c3bc0f9544 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -76,6 +76,9 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs,
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps,
+ __KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
+ __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
+ __KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
};
#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
@@ -106,7 +109,7 @@ enum __kvm_host_smccc_func {
#define per_cpu_ptr_nvhe_sym(sym, cpu) \
({ \
unsigned long base, off; \
- base = kvm_arm_hyp_percpu_base[cpu]; \
+ base = kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]; \
off = (unsigned long)&CHOOSE_NVHE_SYM(sym) - \
(unsigned long)&CHOOSE_NVHE_SYM(__per_cpu_start); \
base ? (typeof(CHOOSE_NVHE_SYM(sym))*)(base + off) : NULL; \
@@ -211,7 +214,7 @@ DECLARE_KVM_HYP_SYM(__kvm_hyp_vector);
#define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init)
#define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector)
-extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
+extern unsigned long kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[];
DECLARE_KVM_NVHE_SYM(__per_cpu_start);
DECLARE_KVM_NVHE_SYM(__per_cpu_end);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index fd34ab155d0b..35a159d131b5 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -73,6 +73,63 @@ u32 __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
+struct kvm_hyp_memcache {
+ phys_addr_t head;
+ unsigned long nr_pages;
+};
+
+static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
+ phys_addr_t *p,
+ phys_addr_t (*to_pa)(void *virt))
+{
+ *p = mc->head;
+ mc->head = to_pa(p);
+ mc->nr_pages++;
+}
+
+static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc,
+ void *(*to_va)(phys_addr_t phys))
+{
+ phys_addr_t *p = to_va(mc->head);
+
+ if (!mc->nr_pages)
+ return NULL;
+
+ mc->head = *p;
+ mc->nr_pages--;
+
+ return p;
+}
+
+static inline int __topup_hyp_memcache(struct kvm_hyp_memcache *mc,
+ unsigned long min_pages,
+ void *(*alloc_fn)(void *arg),
+ phys_addr_t (*to_pa)(void *virt),
+ void *arg)
+{
+ while (mc->nr_pages < min_pages) {
+ phys_addr_t *p = alloc_fn(arg);
+
+ if (!p)
+ return -ENOMEM;
+ push_hyp_memcache(mc, p, to_pa);
+ }
+
+ return 0;
+}
+
+static inline void __free_hyp_memcache(struct kvm_hyp_memcache *mc,
+ void (*free_fn)(void *virt, void *arg),
+ void *(*to_va)(phys_addr_t phys),
+ void *arg)
+{
+ while (mc->nr_pages)
+ free_fn(pop_hyp_memcache(mc, to_va), arg);
+}
+
+void free_hyp_memcache(struct kvm_hyp_memcache *mc);
+int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages);
+
struct kvm_vmid {
atomic64_t id;
};
@@ -115,6 +172,13 @@ struct kvm_smccc_features {
unsigned long vendor_hyp_bmap;
};
+typedef unsigned int pkvm_handle_t;
+
+struct kvm_protected_vm {
+ pkvm_handle_t handle;
+ struct kvm_hyp_memcache teardown_mc;
+};
+
struct kvm_arch {
struct kvm_s2_mmu mmu;
@@ -163,9 +227,19 @@ struct kvm_arch {
u8 pfr0_csv2;
u8 pfr0_csv3;
+ struct {
+ u8 imp:4;
+ u8 unimp:4;
+ } dfr0_pmuver;
/* Hypercall features firmware registers' descriptor */
struct kvm_smccc_features smccc_feat;
+
+ /*
+ * For an untrusted host VM, 'pkvm.handle' is used to lookup
+ * the associated pKVM instance in the hypervisor.
+ */
+ struct kvm_protected_vm pkvm;
};
struct kvm_vcpu_fault_info {
@@ -925,8 +999,6 @@ int kvm_set_ipa_limit(void);
#define __KVM_HAVE_ARCH_VM_ALLOC
struct kvm *kvm_arch_alloc_vm(void);
-int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
-
static inline bool kvm_vm_is_protected(struct kvm *kvm)
{
return false;
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index aa7fa2a08f06..6797eafe7890 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -123,4 +123,7 @@ extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val);
+extern unsigned long kvm_nvhe_sym(__icache_flags);
+extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
+
#endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 7784081088e7..e4a7e6369499 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -166,7 +166,7 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
void free_hyp_pgds(void);
void stage2_unmap_vm(struct kvm *kvm);
-int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu);
+int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type);
void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t pa, unsigned long size, bool writable);
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 3252eb50ecfe..63f81b27a4e3 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -42,6 +42,8 @@ typedef u64 kvm_pte_t;
#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT)
#define KVM_PTE_ADDR_51_48 GENMASK(15, 12)
+#define KVM_PHYS_INVALID (-1ULL)
+
static inline bool kvm_pte_valid(kvm_pte_t pte)
{
return pte & KVM_PTE_VALID;
@@ -57,6 +59,18 @@ static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
return pa;
}
+static inline kvm_pte_t kvm_phys_to_pte(u64 pa)
+{
+ kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
+
+ if (PAGE_SHIFT == 16) {
+ pa &= GENMASK(51, 48);
+ pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
+ }
+
+ return pte;
+}
+
static inline u64 kvm_granule_shift(u32 level)
{
/* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
@@ -85,6 +99,8 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
* allocation is physically contiguous.
* @free_pages_exact: Free an exact number of memory pages previously
* allocated by zalloc_pages_exact.
+ * @free_removed_table: Free a removed paging structure by unlinking and
+ * dropping references.
* @get_page: Increment the refcount on a page.
* @put_page: Decrement the refcount on a page. When the
* refcount reaches 0 the page is automatically
@@ -103,6 +119,7 @@ struct kvm_pgtable_mm_ops {
void* (*zalloc_page)(void *arg);
void* (*zalloc_pages_exact)(size_t size);
void (*free_pages_exact)(void *addr, size_t size);
+ void (*free_removed_table)(void *addr, u32 level);
void (*get_page)(void *addr);
void (*put_page)(void *addr);
int (*page_count)(void *addr);
@@ -162,29 +179,6 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
enum kvm_pgtable_prot prot);
/**
- * struct kvm_pgtable - KVM page-table.
- * @ia_bits: Maximum input address size, in bits.
- * @start_level: Level at which the page-table walk starts.
- * @pgd: Pointer to the first top-level entry of the page-table.
- * @mm_ops: Memory management callbacks.
- * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
- * @flags: Stage-2 page-table flags.
- * @force_pte_cb: Function that returns true if page level mappings must
- * be used instead of block mappings.
- */
-struct kvm_pgtable {
- u32 ia_bits;
- u32 start_level;
- kvm_pte_t *pgd;
- struct kvm_pgtable_mm_ops *mm_ops;
-
- /* Stage-2 only */
- struct kvm_s2_mmu *mmu;
- enum kvm_pgtable_stage2_flags flags;
- kvm_pgtable_force_pte_cb_t force_pte_cb;
-};
-
-/**
* enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk.
* @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid
* entries.
@@ -192,17 +186,34 @@ struct kvm_pgtable {
* children.
* @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their
* children.
+ * @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared
+ * with other software walkers.
*/
enum kvm_pgtable_walk_flags {
KVM_PGTABLE_WALK_LEAF = BIT(0),
KVM_PGTABLE_WALK_TABLE_PRE = BIT(1),
KVM_PGTABLE_WALK_TABLE_POST = BIT(2),
+ KVM_PGTABLE_WALK_SHARED = BIT(3),
+};
+
+struct kvm_pgtable_visit_ctx {
+ kvm_pte_t *ptep;
+ kvm_pte_t old;
+ void *arg;
+ struct kvm_pgtable_mm_ops *mm_ops;
+ u64 addr;
+ u64 end;
+ u32 level;
+ enum kvm_pgtable_walk_flags flags;
};
-typedef int (*kvm_pgtable_visitor_fn_t)(u64 addr, u64 end, u32 level,
- kvm_pte_t *ptep,
- enum kvm_pgtable_walk_flags flag,
- void * const arg);
+typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx,
+ enum kvm_pgtable_walk_flags visit);
+
+static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx)
+{
+ return ctx->flags & KVM_PGTABLE_WALK_SHARED;
+}
/**
* struct kvm_pgtable_walker - Hook into a page-table walk.
@@ -217,6 +228,94 @@ struct kvm_pgtable_walker {
const enum kvm_pgtable_walk_flags flags;
};
+/*
+ * RCU cannot be used in a non-kernel context such as the hyp. As such, page
+ * table walkers used in hyp do not call into RCU and instead use other
+ * synchronization mechanisms (such as a spinlock).
+ */
+#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
+
+typedef kvm_pte_t *kvm_pteref_t;
+
+static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker,
+ kvm_pteref_t pteref)
+{
+ return pteref;
+}
+
+static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
+{
+ /*
+ * Due to the lack of RCU (or a similar protection scheme), only
+ * non-shared table walkers are allowed in the hypervisor.
+ */
+ if (walker->flags & KVM_PGTABLE_WALK_SHARED)
+ return -EPERM;
+
+ return 0;
+}
+
+static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) {}
+
+static inline bool kvm_pgtable_walk_lock_held(void)
+{
+ return true;
+}
+
+#else
+
+typedef kvm_pte_t __rcu *kvm_pteref_t;
+
+static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker,
+ kvm_pteref_t pteref)
+{
+ return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
+}
+
+static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
+{
+ if (walker->flags & KVM_PGTABLE_WALK_SHARED)
+ rcu_read_lock();
+
+ return 0;
+}
+
+static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker)
+{
+ if (walker->flags & KVM_PGTABLE_WALK_SHARED)
+ rcu_read_unlock();
+}
+
+static inline bool kvm_pgtable_walk_lock_held(void)
+{
+ return rcu_read_lock_held();
+}
+
+#endif
+
+/**
+ * struct kvm_pgtable - KVM page-table.
+ * @ia_bits: Maximum input address size, in bits.
+ * @start_level: Level at which the page-table walk starts.
+ * @pgd: Pointer to the first top-level entry of the page-table.
+ * @mm_ops: Memory management callbacks.
+ * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
+ * @flags: Stage-2 page-table flags.
+ * @force_pte_cb: Function that returns true if page level mappings must
+ * be used instead of block mappings.
+ */
+struct kvm_pgtable {
+ u32 ia_bits;
+ u32 start_level;
+ kvm_pteref_t pgd;
+ struct kvm_pgtable_mm_ops *mm_ops;
+
+ /* Stage-2 only */
+ struct kvm_s2_mmu *mmu;
+ enum kvm_pgtable_stage2_flags flags;
+ kvm_pgtable_force_pte_cb_t force_pte_cb;
+};
+
/**
* kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
* @pgt: Uninitialised page-table structure to initialise.
@@ -297,6 +396,14 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
/**
+ * kvm_pgtable_stage2_pgd_size() - Helper to compute size of a stage-2 PGD
+ * @vtcr: Content of the VTCR register.
+ *
+ * Return: the size (in bytes) of the stage-2 PGD
+ */
+size_t kvm_pgtable_stage2_pgd_size(u64 vtcr);
+
+/**
* __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
* @pgt: Uninitialised page-table structure to initialise.
* @mmu: S2 MMU context for this S2 translation
@@ -325,6 +432,17 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
/**
+ * kvm_pgtable_stage2_free_removed() - Free a removed stage-2 paging structure.
+ * @mm_ops: Memory management callbacks.
+ * @pgtable: Unlinked stage-2 paging structure to be freed.
+ * @level: Level of the stage-2 paging structure to be freed.
+ *
+ * The page-table is assumed to be unreachable by any hardware walkers prior to
+ * freeing and therefore no TLB invalidation is performed.
+ */
+void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
+
+/**
* kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Intermediate physical address at which to place the mapping.
@@ -333,6 +451,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
* @prot: Permissions and attributes for the mapping.
* @mc: Cache of pre-allocated and zeroed memory from which to allocate
* page-table pages.
+ * @flags: Flags to control the page-table walk (ex. a shared walk)
*
* The offset of @addr within a page is ignored, @size is rounded-up to
* the next page boundary and @phys is rounded-down to the previous page
@@ -354,7 +473,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
*/
int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
u64 phys, enum kvm_pgtable_prot prot,
- void *mc);
+ void *mc, enum kvm_pgtable_walk_flags flags);
/**
* kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index 9f4ad2a8df59..01129b0d4c68 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -9,11 +9,49 @@
#include <linux/memblock.h>
#include <asm/kvm_pgtable.h>
+/* Maximum number of VMs that can co-exist under pKVM. */
+#define KVM_MAX_PVMS 255
+
#define HYP_MEMBLOCK_REGIONS 128
+int pkvm_init_host_vm(struct kvm *kvm);
+int pkvm_create_hyp_vm(struct kvm *kvm);
+void pkvm_destroy_hyp_vm(struct kvm *kvm);
+
extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
+static inline unsigned long
+hyp_vmemmap_memblock_size(struct memblock_region *reg, size_t vmemmap_entry_size)
+{
+ unsigned long nr_pages = reg->size >> PAGE_SHIFT;
+ unsigned long start, end;
+
+ start = (reg->base >> PAGE_SHIFT) * vmemmap_entry_size;
+ end = start + nr_pages * vmemmap_entry_size;
+ start = ALIGN_DOWN(start, PAGE_SIZE);
+ end = ALIGN(end, PAGE_SIZE);
+
+ return end - start;
+}
+
+static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size)
+{
+ unsigned long res = 0, i;
+
+ for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
+ res += hyp_vmemmap_memblock_size(&kvm_nvhe_sym(hyp_memory)[i],
+ vmemmap_entry_size);
+ }
+
+ return res >> PAGE_SHIFT;
+}
+
+static inline unsigned long hyp_vm_table_pages(void)
+{
+ return PAGE_ALIGN(KVM_MAX_PVMS * sizeof(void *)) >> PAGE_SHIFT;
+}
+
static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
{
unsigned long total = 0, i;
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 760c62f8e22f..20dd06d70af5 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -25,7 +25,7 @@ unsigned long mte_copy_tags_to_user(void __user *to, void *from,
unsigned long n);
int mte_save_tags(struct page *page);
void mte_save_page_tags(const void *page_addr, void *tag_storage);
-bool mte_restore_tags(swp_entry_t entry, struct page *page);
+void mte_restore_tags(swp_entry_t entry, struct page *page);
void mte_restore_page_tags(void *page_addr, const void *tag_storage);
void mte_invalidate_tags(int type, pgoff_t offset);
void mte_invalidate_tags_area(int type);
@@ -36,6 +36,58 @@ void mte_free_tag_storage(char *storage);
/* track which pages have valid allocation tags */
#define PG_mte_tagged PG_arch_2
+/* simple lock to avoid multiple threads tagging the same page */
+#define PG_mte_lock PG_arch_3
+
+static inline void set_page_mte_tagged(struct page *page)
+{
+ /*
+ * Ensure that the tags written prior to this function are visible
+ * before the page flags update.
+ */
+ smp_wmb();
+ set_bit(PG_mte_tagged, &page->flags);
+}
+
+static inline bool page_mte_tagged(struct page *page)
+{
+ bool ret = test_bit(PG_mte_tagged, &page->flags);
+
+ /*
+ * If the page is tagged, ensure ordering with a likely subsequent
+ * read of the tags.
+ */
+ if (ret)
+ smp_rmb();
+ return ret;
+}
+
+/*
+ * Lock the page for tagging and return 'true' if the page can be tagged,
+ * 'false' if already tagged. PG_mte_tagged is never cleared and therefore the
+ * locking only happens once for page initialisation.
+ *
+ * The page MTE lock state:
+ *
+ * Locked: PG_mte_lock && !PG_mte_tagged
+ * Unlocked: !PG_mte_lock || PG_mte_tagged
+ *
+ * Acquire semantics only if the page is tagged (returning 'false').
+ */
+static inline bool try_page_mte_tagging(struct page *page)
+{
+ if (!test_and_set_bit(PG_mte_lock, &page->flags))
+ return true;
+
+ /*
+ * The tags are either being initialised or may have been initialised
+ * already. Check if the PG_mte_tagged flag has been set or wait
+ * otherwise.
+ */
+ smp_cond_load_acquire(&page->flags, VAL & (1UL << PG_mte_tagged));
+
+ return false;
+}
void mte_zero_clear_page_tags(void *addr);
void mte_sync_tags(pte_t old_pte, pte_t pte);
@@ -56,6 +108,17 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size);
/* unused if !CONFIG_ARM64_MTE, silence the compiler */
#define PG_mte_tagged 0
+static inline void set_page_mte_tagged(struct page *page)
+{
+}
+static inline bool page_mte_tagged(struct page *page)
+{
+ return false;
+}
+static inline bool try_page_mte_tagging(struct page *page)
+{
+ return false;
+}
static inline void mte_zero_clear_page_tags(void *addr)
{
}
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 323ee234043f..b4bbeed80fb6 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -862,12 +862,12 @@ static inline bool pte_user_accessible_page(pte_t pte)
static inline bool pmd_user_accessible_page(pmd_t pmd)
{
- return pmd_present(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
+ return pmd_leaf(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
}
static inline bool pud_user_accessible_page(pud_t pud)
{
- return pud_present(pud) && pud_user(pud);
+ return pud_leaf(pud) && pud_user(pud);
}
#endif
@@ -1020,8 +1020,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
*/
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
-extern int kern_addr_valid(unsigned long addr);
-
#ifdef CONFIG_ARM64_MTE
#define __HAVE_ARCH_PREPARE_TO_SWAP
@@ -1048,8 +1046,8 @@ static inline void arch_swap_invalidate_area(int type)
#define __HAVE_ARCH_SWAP_RESTORE
static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
{
- if (system_supports_mte() && mte_restore_tags(entry, &folio->page))
- set_bit(PG_mte_tagged, &folio->flags);
+ if (system_supports_mte())
+ mte_restore_tags(entry, &folio->page);
}
#endif /* CONFIG_ARM64_MTE */
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index b1dd7ecff7ef..581caac525b0 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -23,6 +23,7 @@ struct ptdump_info {
void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
#ifdef CONFIG_PTDUMP_DEBUGFS
+#define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64
void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
#else
static inline void ptdump_debugfs_register(struct ptdump_info *info,
diff --git a/arch/arm64/include/asm/stackprotector.h b/arch/arm64/include/asm/stackprotector.h
index 33f1bb453150..ae3ad80f51fe 100644
--- a/arch/arm64/include/asm/stackprotector.h
+++ b/arch/arm64/include/asm/stackprotector.h
@@ -13,8 +13,6 @@
#ifndef __ASM_STACKPROTECTOR_H
#define __ASM_STACKPROTECTOR_H
-#include <linux/random.h>
-#include <linux/version.h>
#include <asm/pointer_auth.h>
extern unsigned long __stack_chk_guard;
@@ -28,12 +26,7 @@ extern unsigned long __stack_chk_guard;
static __always_inline void boot_init_stack_canary(void)
{
#if defined(CONFIG_STACKPROTECTOR)
- unsigned long canary;
-
- /* Try to get a semi random initial value. */
- get_random_bytes(&canary, sizeof(canary));
- canary ^= LINUX_VERSION_CODE;
- canary &= CANARY_MASK;
+ unsigned long canary = get_random_canary();
current->stack_canary = canary;
if (!IS_ENABLED(CONFIG_STACKPROTECTOR_PER_TASK))
diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h
index b383b4802a7b..d30217c21eff 100644
--- a/arch/arm64/include/asm/syscall_wrapper.h
+++ b/arch/arm64/include/asm/syscall_wrapper.h
@@ -8,7 +8,7 @@
#ifndef __ASM_SYSCALL_WRAPPER_H
#define __ASM_SYSCALL_WRAPPER_H
-struct pt_regs;
+#include <asm/ptrace.h>
#define SC_ARM64_REGS_TO_ARGS(x, ...) \
__MAP(x,__SC_ARGS \
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 316917b98707..a7a857f1784d 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -43,6 +43,7 @@
#define __KVM_HAVE_VCPU_EVENTS
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+#define KVM_DIRTY_LOG_PAGE_OFFSET 64
#define KVM_REG_SIZE(id) \
(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))