summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm/pkvm.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm/pkvm.c')
-rw-r--r--arch/arm64/kvm/pkvm.c278
1 files changed, 241 insertions, 37 deletions
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index b7be96a53597..0f89157d31fd 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -7,6 +7,7 @@
#include <linux/init.h>
#include <linux/kmemleak.h>
#include <linux/kvm_host.h>
+#include <asm/kvm_mmu.h>
#include <linux/memblock.h>
#include <linux/mutex.h>
#include <linux/sort.h>
@@ -110,6 +111,29 @@ static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
host_kvm->arch.pkvm.handle = 0;
free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
+ free_hyp_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc);
+}
+
+static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
+{
+ size_t hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
+ pkvm_handle_t handle = vcpu->kvm->arch.pkvm.handle;
+ void *hyp_vcpu;
+ int ret;
+
+ vcpu->arch.pkvm_memcache.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
+
+ hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
+ if (!hyp_vcpu)
+ return -ENOMEM;
+
+ ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, vcpu, hyp_vcpu);
+ if (!ret)
+ vcpu_set_flag(vcpu, VCPU_PKVM_FINALIZED);
+ else
+ free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
+
+ return ret;
}
/*
@@ -124,11 +148,8 @@ static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
*/
static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
{
- size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz;
- struct kvm_vcpu *host_vcpu;
- pkvm_handle_t handle;
+ size_t pgd_sz, hyp_vm_sz;
void *pgd, *hyp_vm;
- unsigned long idx;
int ret;
if (host_kvm->created_vcpus < 1)
@@ -160,40 +181,11 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
if (ret < 0)
goto free_vm;
- handle = ret;
-
- host_kvm->arch.pkvm.handle = handle;
-
- /* Donate memory for the vcpus at hyp and initialize it. */
- hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
- kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
- void *hyp_vcpu;
-
- /* Indexing of the vcpus to be sequential starting at 0. */
- if (WARN_ON(host_vcpu->vcpu_idx != idx)) {
- ret = -EINVAL;
- goto destroy_vm;
- }
-
- hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
- if (!hyp_vcpu) {
- ret = -ENOMEM;
- goto destroy_vm;
- }
-
- ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu,
- hyp_vcpu);
- if (ret) {
- free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
- goto destroy_vm;
- }
- }
+ host_kvm->arch.pkvm.handle = ret;
+ host_kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
+ kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE);
return 0;
-
-destroy_vm:
- __pkvm_destroy_hyp_vm(host_kvm);
- return ret;
free_vm:
free_pages_exact(hyp_vm, hyp_vm_sz);
free_pgd:
@@ -213,6 +205,18 @@ int pkvm_create_hyp_vm(struct kvm *host_kvm)
return ret;
}
+int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
+{
+ int ret = 0;
+
+ mutex_lock(&vcpu->kvm->arch.config_lock);
+ if (!vcpu_get_flag(vcpu, VCPU_PKVM_FINALIZED))
+ ret = __pkvm_create_hyp_vcpu(vcpu);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
+
+ return ret;
+}
+
void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
{
mutex_lock(&host_kvm->arch.config_lock);
@@ -222,7 +226,6 @@ void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
int pkvm_init_host_vm(struct kvm *host_kvm)
{
- mutex_init(&host_kvm->lock);
return 0;
}
@@ -259,6 +262,7 @@ static int __init finalize_pkvm(void)
* at, which would end badly once inaccessible.
*/
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
+ kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start);
kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
ret = pkvm_drop_host_privileges();
@@ -268,3 +272,203 @@ static int __init finalize_pkvm(void)
return ret;
}
device_initcall_sync(finalize_pkvm);
+
+static int cmp_mappings(struct rb_node *node, const struct rb_node *parent)
+{
+ struct pkvm_mapping *a = rb_entry(node, struct pkvm_mapping, node);
+ struct pkvm_mapping *b = rb_entry(parent, struct pkvm_mapping, node);
+
+ if (a->gfn < b->gfn)
+ return -1;
+ if (a->gfn > b->gfn)
+ return 1;
+ return 0;
+}
+
+static struct rb_node *find_first_mapping_node(struct rb_root *root, u64 gfn)
+{
+ struct rb_node *node = root->rb_node, *prev = NULL;
+ struct pkvm_mapping *mapping;
+
+ while (node) {
+ mapping = rb_entry(node, struct pkvm_mapping, node);
+ if (mapping->gfn == gfn)
+ return node;
+ prev = node;
+ node = (gfn < mapping->gfn) ? node->rb_left : node->rb_right;
+ }
+
+ return prev;
+}
+
+/*
+ * __tmp is updated to rb_next(__tmp) *before* entering the body of the loop to allow freeing
+ * of __map inline.
+ */
+#define for_each_mapping_in_range_safe(__pgt, __start, __end, __map) \
+ for (struct rb_node *__tmp = find_first_mapping_node(&(__pgt)->pkvm_mappings, \
+ ((__start) >> PAGE_SHIFT)); \
+ __tmp && ({ \
+ __map = rb_entry(__tmp, struct pkvm_mapping, node); \
+ __tmp = rb_next(__tmp); \
+ true; \
+ }); \
+ ) \
+ if (__map->gfn < ((__start) >> PAGE_SHIFT)) \
+ continue; \
+ else if (__map->gfn >= ((__end) >> PAGE_SHIFT)) \
+ break; \
+ else
+
+int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
+ struct kvm_pgtable_mm_ops *mm_ops)
+{
+ pgt->pkvm_mappings = RB_ROOT;
+ pgt->mmu = mmu;
+
+ return 0;
+}
+
+void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
+{
+ struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
+ pkvm_handle_t handle = kvm->arch.pkvm.handle;
+ struct pkvm_mapping *mapping;
+ struct rb_node *node;
+
+ if (!handle)
+ return;
+
+ node = rb_first(&pgt->pkvm_mappings);
+ while (node) {
+ mapping = rb_entry(node, struct pkvm_mapping, node);
+ kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn);
+ node = rb_next(node);
+ rb_erase(&mapping->node, &pgt->pkvm_mappings);
+ kfree(mapping);
+ }
+}
+
+int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ u64 phys, enum kvm_pgtable_prot prot,
+ void *mc, enum kvm_pgtable_walk_flags flags)
+{
+ struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
+ struct pkvm_mapping *mapping = NULL;
+ struct kvm_hyp_memcache *cache = mc;
+ u64 gfn = addr >> PAGE_SHIFT;
+ u64 pfn = phys >> PAGE_SHIFT;
+ int ret;
+
+ if (size != PAGE_SIZE)
+ return -EINVAL;
+
+ lockdep_assert_held_write(&kvm->mmu_lock);
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot);
+ if (ret) {
+ /* Is the gfn already mapped due to a racing vCPU? */
+ if (ret == -EPERM)
+ return -EAGAIN;
+ }
+
+ swap(mapping, cache->mapping);
+ mapping->gfn = gfn;
+ mapping->pfn = pfn;
+ WARN_ON(rb_find_add(&mapping->node, &pgt->pkvm_mappings, cmp_mappings));
+
+ return ret;
+}
+
+int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
+{
+ struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
+ pkvm_handle_t handle = kvm->arch.pkvm.handle;
+ struct pkvm_mapping *mapping;
+ int ret = 0;
+
+ lockdep_assert_held_write(&kvm->mmu_lock);
+ for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn);
+ if (WARN_ON(ret))
+ break;
+ rb_erase(&mapping->node, &pgt->pkvm_mappings);
+ kfree(mapping);
+ }
+
+ return ret;
+}
+
+int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
+{
+ struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
+ pkvm_handle_t handle = kvm->arch.pkvm.handle;
+ struct pkvm_mapping *mapping;
+ int ret = 0;
+
+ lockdep_assert_held(&kvm->mmu_lock);
+ for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
+ ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn);
+ if (WARN_ON(ret))
+ break;
+ }
+
+ return ret;
+}
+
+int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
+{
+ struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
+ struct pkvm_mapping *mapping;
+
+ lockdep_assert_held(&kvm->mmu_lock);
+ for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
+ __clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn), PAGE_SIZE);
+
+ return 0;
+}
+
+bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold)
+{
+ struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
+ pkvm_handle_t handle = kvm->arch.pkvm.handle;
+ struct pkvm_mapping *mapping;
+ bool young = false;
+
+ lockdep_assert_held(&kvm->mmu_lock);
+ for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
+ young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
+ mkold);
+
+ return young;
+}
+
+int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
+ enum kvm_pgtable_walk_flags flags)
+{
+ return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot);
+}
+
+void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
+ enum kvm_pgtable_walk_flags flags)
+{
+ WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT));
+}
+
+void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
+{
+ WARN_ON_ONCE(1);
+}
+
+kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
+ enum kvm_pgtable_prot prot, void *mc, bool force_pte)
+{
+ WARN_ON_ONCE(1);
+ return NULL;
+}
+
+int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ struct kvm_mmu_memory_cache *mc)
+{
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+}