summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm/pkvm.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm/pkvm.c')
-rw-r--r--arch/arm64/kvm/pkvm.c270
1 files changed, 270 insertions, 0 deletions
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
new file mode 100644
index 000000000000..b7be96a53597
--- /dev/null
+++ b/arch/arm64/kvm/pkvm.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 - Google LLC
+ * Author: Quentin Perret <qperret@google.com>
+ */
+
+#include <linux/init.h>
+#include <linux/kmemleak.h>
+#include <linux/kvm_host.h>
+#include <linux/memblock.h>
+#include <linux/mutex.h>
+#include <linux/sort.h>
+
+#include <asm/kvm_pkvm.h>
+
+#include "hyp_constants.h"
+
+DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
+
+static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
+static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
+
+phys_addr_t hyp_mem_base;
+phys_addr_t hyp_mem_size;
+
+static int cmp_hyp_memblock(const void *p1, const void *p2)
+{
+ const struct memblock_region *r1 = p1;
+ const struct memblock_region *r2 = p2;
+
+ return r1->base < r2->base ? -1 : (r1->base > r2->base);
+}
+
+static void __init sort_memblock_regions(void)
+{
+ sort(hyp_memory,
+ *hyp_memblock_nr_ptr,
+ sizeof(struct memblock_region),
+ cmp_hyp_memblock,
+ NULL);
+}
+
+static int __init register_memblock_regions(void)
+{
+ struct memblock_region *reg;
+
+ for_each_mem_region(reg) {
+ if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
+ return -ENOMEM;
+
+ hyp_memory[*hyp_memblock_nr_ptr] = *reg;
+ (*hyp_memblock_nr_ptr)++;
+ }
+ sort_memblock_regions();
+
+ return 0;
+}
+
+void __init kvm_hyp_reserve(void)
+{
+ u64 hyp_mem_pages = 0;
+ int ret;
+
+ if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
+ return;
+
+ if (kvm_get_mode() != KVM_MODE_PROTECTED)
+ return;
+
+ ret = register_memblock_regions();
+ if (ret) {
+ *hyp_memblock_nr_ptr = 0;
+ kvm_err("Failed to register hyp memblocks: %d\n", ret);
+ return;
+ }
+
+ hyp_mem_pages += hyp_s1_pgtable_pages();
+ hyp_mem_pages += host_s2_pgtable_pages();
+ hyp_mem_pages += hyp_vm_table_pages();
+ hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
+ hyp_mem_pages += hyp_ffa_proxy_pages();
+
+ /*
+ * Try to allocate a PMD-aligned region to reduce TLB pressure once
+ * this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
+ */
+ hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
+ hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
+ PMD_SIZE);
+ if (!hyp_mem_base)
+ hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
+ else
+ hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
+
+ if (!hyp_mem_base) {
+ kvm_err("Failed to reserve hyp memory\n");
+ return;
+ }
+
+ kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
+ hyp_mem_base);
+}
+
+static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
+{
+ if (host_kvm->arch.pkvm.handle) {
+ WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
+ host_kvm->arch.pkvm.handle));
+ }
+
+ host_kvm->arch.pkvm.handle = 0;
+ free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
+}
+
+/*
+ * Allocates and donates memory for hypervisor VM structs at EL2.
+ *
+ * Allocates space for the VM state, which includes the hyp vm as well as
+ * the hyp vcpus.
+ *
+ * Stores an opaque handler in the kvm struct for future reference.
+ *
+ * Return 0 on success, negative error code on failure.
+ */
+static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
+{
+ size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz;
+ struct kvm_vcpu *host_vcpu;
+ pkvm_handle_t handle;
+ void *pgd, *hyp_vm;
+ unsigned long idx;
+ int ret;
+
+ if (host_kvm->created_vcpus < 1)
+ return -EINVAL;
+
+ pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr);
+
+ /*
+ * The PGD pages will be reclaimed using a hyp_memcache which implies
+ * page granularity. So, use alloc_pages_exact() to get individual
+ * refcounts.
+ */
+ pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
+ if (!pgd)
+ return -ENOMEM;
+
+ /* Allocate memory to donate to hyp for vm and vcpu pointers. */
+ hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
+ size_mul(sizeof(void *),
+ host_kvm->created_vcpus)));
+ hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
+ if (!hyp_vm) {
+ ret = -ENOMEM;
+ goto free_pgd;
+ }
+
+ /* Donate the VM memory to hyp and let hyp initialize it. */
+ ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd);
+ if (ret < 0)
+ goto free_vm;
+
+ handle = ret;
+
+ host_kvm->arch.pkvm.handle = handle;
+
+ /* Donate memory for the vcpus at hyp and initialize it. */
+ hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
+ kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
+ void *hyp_vcpu;
+
+ /* Indexing of the vcpus to be sequential starting at 0. */
+ if (WARN_ON(host_vcpu->vcpu_idx != idx)) {
+ ret = -EINVAL;
+ goto destroy_vm;
+ }
+
+ hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
+ if (!hyp_vcpu) {
+ ret = -ENOMEM;
+ goto destroy_vm;
+ }
+
+ ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu,
+ hyp_vcpu);
+ if (ret) {
+ free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
+ goto destroy_vm;
+ }
+ }
+
+ return 0;
+
+destroy_vm:
+ __pkvm_destroy_hyp_vm(host_kvm);
+ return ret;
+free_vm:
+ free_pages_exact(hyp_vm, hyp_vm_sz);
+free_pgd:
+ free_pages_exact(pgd, pgd_sz);
+ return ret;
+}
+
+int pkvm_create_hyp_vm(struct kvm *host_kvm)
+{
+ int ret = 0;
+
+ mutex_lock(&host_kvm->arch.config_lock);
+ if (!host_kvm->arch.pkvm.handle)
+ ret = __pkvm_create_hyp_vm(host_kvm);
+ mutex_unlock(&host_kvm->arch.config_lock);
+
+ return ret;
+}
+
+void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
+{
+ mutex_lock(&host_kvm->arch.config_lock);
+ __pkvm_destroy_hyp_vm(host_kvm);
+ mutex_unlock(&host_kvm->arch.config_lock);
+}
+
+int pkvm_init_host_vm(struct kvm *host_kvm)
+{
+ mutex_init(&host_kvm->lock);
+ return 0;
+}
+
+static void __init _kvm_host_prot_finalize(void *arg)
+{
+ int *err = arg;
+
+ if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
+ WRITE_ONCE(*err, -EINVAL);
+}
+
+static int __init pkvm_drop_host_privileges(void)
+{
+ int ret = 0;
+
+ /*
+ * Flip the static key upfront as that may no longer be possible
+ * once the host stage 2 is installed.
+ */
+ static_branch_enable(&kvm_protected_mode_initialized);
+ on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
+ return ret;
+}
+
+static int __init finalize_pkvm(void)
+{
+ int ret;
+
+ if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised())
+ return 0;
+
+ /*
+ * Exclude HYP sections from kmemleak so that they don't get peeked
+ * at, which would end badly once inaccessible.
+ */
+ kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
+ kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
+
+ ret = pkvm_drop_host_privileges();
+ if (ret)
+ pr_err("Failed to finalize Hyp protection: %d\n", ret);
+
+ return ret;
+}
+device_initcall_sync(finalize_pkvm);