summaryrefslogtreecommitdiff
path: root/arch/powerpc/kvm/book3s_hv_uvmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kvm/book3s_hv_uvmem.c')
-rw-r--r--arch/powerpc/kvm/book3s_hv_uvmem.c818
1 files changed, 628 insertions, 190 deletions
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 2de264fc3156..92f33115144b 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -47,7 +47,7 @@
* Locking order
*
* 1. kvm->srcu - Protects KVM memslots
- * 2. kvm->mm->mmap_sem - find_vma, migrate_vma_pages and helpers, ksm_madvise
+ * 2. kvm->mm->mmap_lock - find_vma, migrate_vma_pages and helpers, ksm_madvise
* 3. kvm->arch.uvmem_lock - protects read/writes to uvmem slots thus acting
* as sync-points for page-in/out
*/
@@ -90,15 +90,138 @@
#include <linux/migrate.h>
#include <linux/kvm_host.h>
#include <linux/ksm.h>
+#include <linux/of.h>
+#include <linux/memremap.h>
#include <asm/ultravisor.h>
#include <asm/mman.h>
#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s_uvmem.h>
static struct dev_pagemap kvmppc_uvmem_pgmap;
static unsigned long *kvmppc_uvmem_bitmap;
static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock);
-#define KVMPPC_UVMEM_PFN (1UL << 63)
+/*
+ * States of a GFN
+ * ---------------
+ * The GFN can be in one of the following states.
+ *
+ * (a) Secure - The GFN is secure. The GFN is associated with
+ * a Secure VM, the contents of the GFN is not accessible
+ * to the Hypervisor. This GFN can be backed by a secure-PFN,
+ * or can be backed by a normal-PFN with contents encrypted.
+ * The former is true when the GFN is paged-in into the
+ * ultravisor. The latter is true when the GFN is paged-out
+ * of the ultravisor.
+ *
+ * (b) Shared - The GFN is shared. The GFN is associated with a
+ * a secure VM. The contents of the GFN is accessible to
+ * Hypervisor. This GFN is backed by a normal-PFN and its
+ * content is un-encrypted.
+ *
+ * (c) Normal - The GFN is a normal. The GFN is associated with
+ * a normal VM. The contents of the GFN is accessible to
+ * the Hypervisor. Its content is never encrypted.
+ *
+ * States of a VM.
+ * ---------------
+ *
+ * Normal VM: A VM whose contents are always accessible to
+ * the hypervisor. All its GFNs are normal-GFNs.
+ *
+ * Secure VM: A VM whose contents are not accessible to the
+ * hypervisor without the VM's consent. Its GFNs are
+ * either Shared-GFN or Secure-GFNs.
+ *
+ * Transient VM: A Normal VM that is transitioning to secure VM.
+ * The transition starts on successful return of
+ * H_SVM_INIT_START, and ends on successful return
+ * of H_SVM_INIT_DONE. This transient VM, can have GFNs
+ * in any of the three states; i.e Secure-GFN, Shared-GFN,
+ * and Normal-GFN. The VM never executes in this state
+ * in supervisor-mode.
+ *
+ * Memory slot State.
+ * -----------------------------
+ * The state of a memory slot mirrors the state of the
+ * VM the memory slot is associated with.
+ *
+ * VM State transition.
+ * --------------------
+ *
+ * A VM always starts in Normal Mode.
+ *
+ * H_SVM_INIT_START moves the VM into transient state. During this
+ * time the Ultravisor may request some of its GFNs to be shared or
+ * secured. So its GFNs can be in one of the three GFN states.
+ *
+ * H_SVM_INIT_DONE moves the VM entirely from transient state to
+ * secure-state. At this point any left-over normal-GFNs are
+ * transitioned to Secure-GFN.
+ *
+ * H_SVM_INIT_ABORT moves the transient VM back to normal VM.
+ * All its GFNs are moved to Normal-GFNs.
+ *
+ * UV_TERMINATE transitions the secure-VM back to normal-VM. All
+ * the secure-GFN and shared-GFNs are tranistioned to normal-GFN
+ * Note: The contents of the normal-GFN is undefined at this point.
+ *
+ * GFN state implementation:
+ * -------------------------
+ *
+ * Secure GFN is associated with a secure-PFN; also called uvmem_pfn,
+ * when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag
+ * set, and contains the value of the secure-PFN.
+ * It is associated with a normal-PFN; also called mem_pfn, when
+ * the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set.
+ * The value of the normal-PFN is not tracked.
+ *
+ * Shared GFN is associated with a normal-PFN. Its pfn[] has
+ * KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN
+ * is not tracked.
+ *
+ * Normal GFN is associated with normal-PFN. Its pfn[] has
+ * no flag set. The value of the normal-PFN is not tracked.
+ *
+ * Life cycle of a GFN
+ * --------------------
+ *
+ * --------------------------------------------------------------
+ * | | Share | Unshare | SVM |H_SVM_INIT_DONE|
+ * | |operation |operation | abort/ | |
+ * | | | | terminate | |
+ * -------------------------------------------------------------
+ * | | | | | |
+ * | Secure | Shared | Secure |Normal |Secure |
+ * | | | | | |
+ * | Shared | Shared | Secure |Normal |Shared |
+ * | | | | | |
+ * | Normal | Shared | Secure |Normal |Secure |
+ * --------------------------------------------------------------
+ *
+ * Life cycle of a VM
+ * --------------------
+ *
+ * --------------------------------------------------------------------
+ * | | start | H_SVM_ |H_SVM_ |H_SVM_ |UV_SVM_ |
+ * | | VM |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE |
+ * | | | | | | |
+ * --------- ----------------------------------------------------------
+ * | | | | | | |
+ * | Normal | Normal | Transient|Error |Error |Normal |
+ * | | | | | | |
+ * | Secure | Error | Error |Error |Error |Normal |
+ * | | | | | | |
+ * |Transient| N/A | Error |Secure |Normal |Normal |
+ * --------------------------------------------------------------------
+ */
+
+#define KVMPPC_GFN_UVMEM_PFN (1UL << 63)
+#define KVMPPC_GFN_MEM_PFN (1UL << 62)
+#define KVMPPC_GFN_SHARED (1UL << 61)
+#define KVMPPC_GFN_SECURE (KVMPPC_GFN_UVMEM_PFN | KVMPPC_GFN_MEM_PFN)
+#define KVMPPC_GFN_FLAG_MASK (KVMPPC_GFN_SECURE | KVMPPC_GFN_SHARED)
+#define KVMPPC_GFN_PFN_MASK (~KVMPPC_GFN_FLAG_MASK)
struct kvmppc_uvmem_slot {
struct list_head list;
@@ -106,13 +229,22 @@ struct kvmppc_uvmem_slot {
unsigned long base_pfn;
unsigned long *pfns;
};
-
struct kvmppc_uvmem_page_pvt {
struct kvm *kvm;
unsigned long gpa;
bool skip_page_out;
+ bool remove_gfn;
};
+bool kvmppc_uvmem_available(void)
+{
+ /*
+ * If kvmppc_uvmem_bitmap != NULL, then there is an ultravisor
+ * and our data structures have been initialized successfully.
+ */
+ return !!kvmppc_uvmem_bitmap;
+}
+
int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot)
{
struct kvmppc_uvmem_slot *p;
@@ -120,7 +252,7 @@ int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot)
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return -ENOMEM;
- p->pfns = vzalloc(array_size(slot->npages, sizeof(*p->pfns)));
+ p->pfns = vcalloc(slot->npages, sizeof(*p->pfns));
if (!p->pfns) {
kfree(p);
return -ENOMEM;
@@ -154,8 +286,8 @@ void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot)
mutex_unlock(&kvm->arch.uvmem_lock);
}
-static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn,
- struct kvm *kvm)
+static void kvmppc_mark_gfn(unsigned long gfn, struct kvm *kvm,
+ unsigned long flag, unsigned long uvmem_pfn)
{
struct kvmppc_uvmem_slot *p;
@@ -163,24 +295,41 @@ static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn,
if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
unsigned long index = gfn - p->base_pfn;
- p->pfns[index] = uvmem_pfn | KVMPPC_UVMEM_PFN;
+ if (flag == KVMPPC_GFN_UVMEM_PFN)
+ p->pfns[index] = uvmem_pfn | flag;
+ else
+ p->pfns[index] = flag;
return;
}
}
}
-static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm)
+/* mark the GFN as secure-GFN associated with @uvmem pfn device-PFN. */
+static void kvmppc_gfn_secure_uvmem_pfn(unsigned long gfn,
+ unsigned long uvmem_pfn, struct kvm *kvm)
{
- struct kvmppc_uvmem_slot *p;
+ kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_UVMEM_PFN, uvmem_pfn);
+}
- list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
- if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
- p->pfns[gfn - p->base_pfn] = 0;
- return;
- }
- }
+/* mark the GFN as secure-GFN associated with a memory-PFN. */
+static void kvmppc_gfn_secure_mem_pfn(unsigned long gfn, struct kvm *kvm)
+{
+ kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_MEM_PFN, 0);
+}
+
+/* mark the GFN as a shared GFN. */
+static void kvmppc_gfn_shared(unsigned long gfn, struct kvm *kvm)
+{
+ kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_SHARED, 0);
+}
+
+/* mark the GFN as a non-existent GFN. */
+static void kvmppc_gfn_remove(unsigned long gfn, struct kvm *kvm)
+{
+ kvmppc_mark_gfn(gfn, kvm, 0, 0);
}
+/* return true, if the GFN is a secure-GFN backed by a secure-PFN */
static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
unsigned long *uvmem_pfn)
{
@@ -190,10 +339,10 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
unsigned long index = gfn - p->base_pfn;
- if (p->pfns[index] & KVMPPC_UVMEM_PFN) {
+ if (p->pfns[index] & KVMPPC_GFN_UVMEM_PFN) {
if (uvmem_pfn)
*uvmem_pfn = p->pfns[index] &
- ~KVMPPC_UVMEM_PFN;
+ KVMPPC_GFN_PFN_MASK;
return true;
} else
return false;
@@ -202,12 +351,125 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
return false;
}
+/*
+ * starting from *gfn search for the next available GFN that is not yet
+ * transitioned to a secure GFN. return the value of that GFN in *gfn. If a
+ * GFN is found, return true, else return false
+ *
+ * Must be called with kvm->arch.uvmem_lock held.
+ */
+static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot,
+ struct kvm *kvm, unsigned long *gfn)
+{
+ struct kvmppc_uvmem_slot *p = NULL, *iter;
+ bool ret = false;
+ unsigned long i;
+
+ list_for_each_entry(iter, &kvm->arch.uvmem_pfns, list)
+ if (*gfn >= iter->base_pfn && *gfn < iter->base_pfn + iter->nr_pfns) {
+ p = iter;
+ break;
+ }
+ if (!p)
+ return ret;
+ /*
+ * The code below assumes, one to one correspondence between
+ * kvmppc_uvmem_slot and memslot.
+ */
+ for (i = *gfn; i < p->base_pfn + p->nr_pfns; i++) {
+ unsigned long index = i - p->base_pfn;
+
+ if (!(p->pfns[index] & KVMPPC_GFN_FLAG_MASK)) {
+ *gfn = i;
+ ret = true;
+ break;
+ }
+ }
+ return ret;
+}
+
+static int kvmppc_memslot_page_merge(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot, bool merge)
+{
+ unsigned long gfn = memslot->base_gfn;
+ unsigned long end, start = gfn_to_hva(kvm, gfn);
+ unsigned long vm_flags;
+ int ret = 0;
+ struct vm_area_struct *vma;
+ int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE;
+
+ if (kvm_is_error_hva(start))
+ return H_STATE;
+
+ end = start + (memslot->npages << PAGE_SHIFT);
+
+ mmap_write_lock(kvm->mm);
+ do {
+ vma = find_vma_intersection(kvm->mm, start, end);
+ if (!vma) {
+ ret = H_STATE;
+ break;
+ }
+ vma_start_write(vma);
+ /* Copy vm_flags to avoid partial modifications in ksm_madvise */
+ vm_flags = vma->vm_flags;
+ ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
+ merge_flag, &vm_flags);
+ if (ret) {
+ ret = H_STATE;
+ break;
+ }
+ vm_flags_reset(vma, vm_flags);
+ start = vma->vm_end;
+ } while (end > vma->vm_end);
+
+ mmap_write_unlock(kvm->mm);
+ return ret;
+}
+
+static void __kvmppc_uvmem_memslot_delete(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
+ kvmppc_uvmem_slot_free(kvm, memslot);
+ kvmppc_memslot_page_merge(kvm, memslot, true);
+}
+
+static int __kvmppc_uvmem_memslot_create(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ int ret = H_PARAMETER;
+
+ if (kvmppc_memslot_page_merge(kvm, memslot, false))
+ return ret;
+
+ if (kvmppc_uvmem_slot_init(kvm, memslot))
+ goto out1;
+
+ ret = uv_register_mem_slot(kvm->arch.lpid,
+ memslot->base_gfn << PAGE_SHIFT,
+ memslot->npages * PAGE_SIZE,
+ 0, memslot->id);
+ if (ret < 0) {
+ ret = H_PARAMETER;
+ goto out;
+ }
+ return 0;
+out:
+ kvmppc_uvmem_slot_free(kvm, memslot);
+out1:
+ kvmppc_memslot_page_merge(kvm, memslot, true);
+ return ret;
+}
+
unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
{
struct kvm_memslots *slots;
- struct kvm_memory_slot *memslot;
+ struct kvm_memory_slot *memslot, *m;
int ret = H_SUCCESS;
- int srcu_idx;
+ int srcu_idx, bkt;
+
+ kvm->arch.secure_guest = KVMPPC_SECURE_INIT_START;
if (!kvmppc_uvmem_bitmap)
return H_UNSUPPORTED;
@@ -216,37 +478,125 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
if (!kvm_is_radix(kvm))
return H_UNSUPPORTED;
+ /* NAK the transition to secure if not enabled */
+ if (!kvm->arch.svm_enabled)
+ return H_AUTHORITY;
+
srcu_idx = srcu_read_lock(&kvm->srcu);
+
+ /* register the memslot */
slots = kvm_memslots(kvm);
- kvm_for_each_memslot(memslot, slots) {
- if (kvmppc_uvmem_slot_init(kvm, memslot)) {
- ret = H_PARAMETER;
- goto out;
- }
- ret = uv_register_mem_slot(kvm->arch.lpid,
- memslot->base_gfn << PAGE_SHIFT,
- memslot->npages * PAGE_SIZE,
- 0, memslot->id);
- if (ret < 0) {
- kvmppc_uvmem_slot_free(kvm, memslot);
- ret = H_PARAMETER;
- goto out;
+ kvm_for_each_memslot(memslot, bkt, slots) {
+ ret = __kvmppc_uvmem_memslot_create(kvm, memslot);
+ if (ret)
+ break;
+ }
+
+ if (ret) {
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(m, bkt, slots) {
+ if (m == memslot)
+ break;
+ __kvmppc_uvmem_memslot_delete(kvm, memslot);
}
}
- kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_START;
-out:
+
srcu_read_unlock(&kvm->srcu, srcu_idx);
return ret;
}
-unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
+/*
+ * Provision a new page on HV side and copy over the contents
+ * from secure memory using UV_PAGE_OUT uvcall.
+ * Caller must held kvm->arch.uvmem_lock.
+ */
+static int __kvmppc_svm_page_out(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end, unsigned long page_shift,
+ struct kvm *kvm, unsigned long gpa, struct page *fault_page)
{
- if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
- return H_UNSUPPORTED;
+ unsigned long src_pfn, dst_pfn = 0;
+ struct migrate_vma mig = { 0 };
+ struct page *dpage, *spage;
+ struct kvmppc_uvmem_page_pvt *pvt;
+ unsigned long pfn;
+ int ret = U_SUCCESS;
- kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE;
- pr_info("LPID %d went secure\n", kvm->arch.lpid);
- return H_SUCCESS;
+ memset(&mig, 0, sizeof(mig));
+ mig.vma = vma;
+ mig.start = start;
+ mig.end = end;
+ mig.src = &src_pfn;
+ mig.dst = &dst_pfn;
+ mig.pgmap_owner = &kvmppc_uvmem_pgmap;
+ mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
+ mig.fault_page = fault_page;
+
+ /* The requested page is already paged-out, nothing to do */
+ if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL))
+ return ret;
+
+ ret = migrate_vma_setup(&mig);
+ if (ret)
+ return -1;
+
+ spage = migrate_pfn_to_page(*mig.src);
+ if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE))
+ goto out_finalize;
+
+ if (!is_zone_device_page(spage))
+ goto out_finalize;
+
+ dpage = alloc_page_vma(GFP_HIGHUSER, vma, start);
+ if (!dpage) {
+ ret = -1;
+ goto out_finalize;
+ }
+
+ lock_page(dpage);
+ pvt = spage->zone_device_data;
+ pfn = page_to_pfn(dpage);
+
+ /*
+ * This function is used in two cases:
+ * - When HV touches a secure page, for which we do UV_PAGE_OUT
+ * - When a secure page is converted to shared page, we *get*
+ * the page to essentially unmap the device page. In this
+ * case we skip page-out.
+ */
+ if (!pvt->skip_page_out)
+ ret = uv_page_out(kvm->arch.lpid, pfn << page_shift,
+ gpa, 0, page_shift);
+
+ if (ret == U_SUCCESS)
+ *mig.dst = migrate_pfn(pfn);
+ else {
+ unlock_page(dpage);
+ __free_page(dpage);
+ goto out_finalize;
+ }
+
+ migrate_vma_pages(&mig);
+
+out_finalize:
+ migrate_vma_finalize(&mig);
+ return ret;
+}
+
+static inline int kvmppc_svm_page_out(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ unsigned long page_shift,
+ struct kvm *kvm, unsigned long gpa,
+ struct page *fault_page)
+{
+ int ret;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa,
+ fault_page);
+ mutex_unlock(&kvm->arch.uvmem_lock);
+
+ return ret;
}
/*
@@ -257,33 +607,81 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
* fault on them, do fault time migration to replace the device PTEs in
* QEMU page table with normal PTEs from newly allocated pages.
*/
-void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
- struct kvm *kvm)
+void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot,
+ struct kvm *kvm, bool skip_page_out)
{
int i;
struct kvmppc_uvmem_page_pvt *pvt;
- unsigned long pfn, uvmem_pfn;
- unsigned long gfn = free->base_gfn;
+ struct page *uvmem_page;
+ struct vm_area_struct *vma = NULL;
+ unsigned long uvmem_pfn, gfn;
+ unsigned long addr;
+
+ mmap_read_lock(kvm->mm);
- for (i = free->npages; i; --i, ++gfn) {
- struct page *uvmem_page;
+ addr = slot->userspace_addr;
+
+ gfn = slot->base_gfn;
+ for (i = slot->npages; i; --i, ++gfn, addr += PAGE_SIZE) {
+
+ /* Fetch the VMA if addr is not in the latest fetched one */
+ if (!vma || addr >= vma->vm_end) {
+ vma = vma_lookup(kvm->mm, addr);
+ if (!vma) {
+ pr_err("Can't find VMA for gfn:0x%lx\n", gfn);
+ break;
+ }
+ }
mutex_lock(&kvm->arch.uvmem_lock);
- if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
- mutex_unlock(&kvm->arch.uvmem_lock);
- continue;
+
+ if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
+ uvmem_page = pfn_to_page(uvmem_pfn);
+ pvt = uvmem_page->zone_device_data;
+ pvt->skip_page_out = skip_page_out;
+ pvt->remove_gfn = true;
+
+ if (__kvmppc_svm_page_out(vma, addr, addr + PAGE_SIZE,
+ PAGE_SHIFT, kvm, pvt->gpa, NULL))
+ pr_err("Can't page out gpa:0x%lx addr:0x%lx\n",
+ pvt->gpa, addr);
+ } else {
+ /* Remove the shared flag if any */
+ kvmppc_gfn_remove(gfn, kvm);
}
- uvmem_page = pfn_to_page(uvmem_pfn);
- pvt = uvmem_page->zone_device_data;
- pvt->skip_page_out = true;
mutex_unlock(&kvm->arch.uvmem_lock);
-
- pfn = gfn_to_pfn(kvm, gfn);
- if (is_error_noslot_pfn(pfn))
- continue;
- kvm_release_pfn_clean(pfn);
}
+
+ mmap_read_unlock(kvm->mm);
+}
+
+unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
+{
+ int srcu_idx, bkt;
+ struct kvm_memory_slot *memslot;
+
+ /*
+ * Expect to be called only after INIT_START and before INIT_DONE.
+ * If INIT_DONE was completed, use normal VM termination sequence.
+ */
+ if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+ return H_UNSUPPORTED;
+
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+ return H_STATE;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+
+ kvm_for_each_memslot(memslot, bkt, kvm_memslots(kvm))
+ kvmppc_uvmem_drop_pages(memslot, kvm, false);
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ kvm->arch.secure_guest = 0;
+ uv_svm_terminate(kvm->arch.lpid);
+
+ return H_PARAMETER;
}
/*
@@ -301,9 +699,9 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
struct kvmppc_uvmem_page_pvt *pvt;
unsigned long pfn_last, pfn_first;
- pfn_first = kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT;
+ pfn_first = kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT;
pfn_last = pfn_first +
- (resource_size(&kvmppc_uvmem_pgmap.res) >> PAGE_SHIFT);
+ (range_len(&kvmppc_uvmem_pgmap.range) >> PAGE_SHIFT);
spin_lock(&kvmppc_uvmem_bitmap_lock);
bit = find_first_zero_bit(kvmppc_uvmem_bitmap,
@@ -318,15 +716,14 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
goto out_clear;
uvmem_pfn = bit + pfn_first;
- kvmppc_uvmem_pfn_insert(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
+ kvmppc_gfn_secure_uvmem_pfn(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
pvt->gpa = gpa;
pvt->kvm = kvm;
dpage = pfn_to_page(uvmem_pfn);
dpage->zone_device_data = pvt;
- get_page(dpage);
- lock_page(dpage);
+ zone_device_page_init(dpage);
return dpage;
out_clear:
spin_lock(&kvmppc_uvmem_bitmap_lock);
@@ -337,16 +734,17 @@ out:
}
/*
- * Alloc a PFN from private device memory pool and copy page from normal
- * memory to secure memory using UV_PAGE_IN uvcall.
+ * Alloc a PFN from private device memory pool. If @pagein is true,
+ * copy page from normal memory to secure memory using UV_PAGE_IN uvcall.
*/
-static int
-kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, unsigned long gpa, struct kvm *kvm,
- unsigned long page_shift, bool *downgrade)
+static int kvmppc_svm_page_in(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end, unsigned long gpa, struct kvm *kvm,
+ unsigned long page_shift,
+ bool pagein)
{
unsigned long src_pfn, dst_pfn = 0;
- struct migrate_vma mig;
+ struct migrate_vma mig = { 0 };
struct page *spage;
unsigned long pfn;
struct page *dpage;
@@ -358,18 +756,7 @@ kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
mig.end = end;
mig.src = &src_pfn;
mig.dst = &dst_pfn;
-
- /*
- * We come here with mmap_sem write lock held just for
- * ksm_madvise(), otherwise we only need read mmap_sem.
- * Hence downgrade to read lock once ksm_madvise() is done.
- */
- ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
- MADV_UNMERGEABLE, &vma->vm_flags);
- downgrade_write(&kvm->mm->mmap_sem);
- *downgrade = true;
- if (ret)
- return ret;
+ mig.flags = MIGRATE_VMA_SELECT_SYSTEM;
ret = migrate_vma_setup(&mig);
if (ret)
@@ -386,19 +773,98 @@ kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
goto out_finalize;
}
- pfn = *mig.src >> MIGRATE_PFN_SHIFT;
- spage = migrate_pfn_to_page(*mig.src);
- if (spage)
- uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
- page_shift);
+ if (pagein) {
+ pfn = *mig.src >> MIGRATE_PFN_SHIFT;
+ spage = migrate_pfn_to_page(*mig.src);
+ if (spage) {
+ ret = uv_page_in(kvm->arch.lpid, pfn << page_shift,
+ gpa, 0, page_shift);
+ if (ret)
+ goto out_finalize;
+ }
+ }
- *mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+ *mig.dst = migrate_pfn(page_to_pfn(dpage));
migrate_vma_pages(&mig);
out_finalize:
migrate_vma_finalize(&mig);
return ret;
}
+static int kvmppc_uv_migrate_mem_slot(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ unsigned long gfn = memslot->base_gfn;
+ struct vm_area_struct *vma;
+ unsigned long start, end;
+ int ret = 0;
+
+ mmap_read_lock(kvm->mm);
+ mutex_lock(&kvm->arch.uvmem_lock);
+ while (kvmppc_next_nontransitioned_gfn(memslot, kvm, &gfn)) {
+ ret = H_STATE;
+ start = gfn_to_hva(kvm, gfn);
+ if (kvm_is_error_hva(start))
+ break;
+
+ end = start + (1UL << PAGE_SHIFT);
+ vma = find_vma_intersection(kvm->mm, start, end);
+ if (!vma || vma->vm_start > start || vma->vm_end < end)
+ break;
+
+ ret = kvmppc_svm_page_in(vma, start, end,
+ (gfn << PAGE_SHIFT), kvm, PAGE_SHIFT, false);
+ if (ret) {
+ ret = H_STATE;
+ break;
+ }
+
+ /* relinquish the cpu if needed */
+ cond_resched();
+ }
+ mutex_unlock(&kvm->arch.uvmem_lock);
+ mmap_read_unlock(kvm->mm);
+ return ret;
+}
+
+unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ int srcu_idx, bkt;
+ long ret = H_SUCCESS;
+
+ if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+ return H_UNSUPPORTED;
+
+ /* migrate any unmoved normal pfn to device pfns*/
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(memslot, bkt, slots) {
+ ret = kvmppc_uv_migrate_mem_slot(kvm, memslot);
+ if (ret) {
+ /*
+ * The pages will remain transitioned.
+ * Its the callers responsibility to
+ * terminate the VM, which will undo
+ * all state of the VM. Till then
+ * this VM is in a erroneous state.
+ * Its KVMPPC_SECURE_INIT_DONE will
+ * remain unset.
+ */
+ ret = H_STATE;
+ goto out;
+ }
+ }
+
+ kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE;
+ pr_info("LPID %lld went secure\n", kvm->arch.lpid);
+
+out:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return ret;
+}
+
/*
* Shares the page with HV, thus making it a normal page.
*
@@ -408,8 +874,8 @@ out_finalize:
* In the former case, uses dev_pagemap_ops.migrate_to_ram handler
* to unmap the device page from QEMU's page tables.
*/
-static unsigned long
-kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift)
+static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa,
+ unsigned long page_shift)
{
int ret = H_PARAMETER;
@@ -426,6 +892,11 @@ kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift)
uvmem_page = pfn_to_page(uvmem_pfn);
pvt = uvmem_page->zone_device_data;
pvt->skip_page_out = true;
+ /*
+ * do not drop the GFN. It is a valid GFN
+ * that is transitioned to a shared GFN.
+ */
+ pvt->remove_gfn = false;
}
retry:
@@ -439,12 +910,16 @@ retry:
uvmem_page = pfn_to_page(uvmem_pfn);
pvt = uvmem_page->zone_device_data;
pvt->skip_page_out = true;
+ pvt->remove_gfn = false; /* it continues to be a valid GFN */
kvm_release_pfn_clean(pfn);
goto retry;
}
- if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift))
+ if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
+ page_shift)) {
+ kvmppc_gfn_shared(gfn, kvm);
ret = H_SUCCESS;
+ }
kvm_release_pfn_clean(pfn);
mutex_unlock(&kvm->arch.uvmem_lock);
out:
@@ -458,11 +933,10 @@ out:
* H_PAGE_IN_SHARED flag makes the page shared which means that the same
* memory in is visible from both UV and HV.
*/
-unsigned long
-kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
- unsigned long flags, unsigned long page_shift)
+unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
+ unsigned long flags,
+ unsigned long page_shift)
{
- bool downgrade = false;
unsigned long start, end;
struct vm_area_struct *vma;
int srcu_idx;
@@ -483,7 +957,7 @@ kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
ret = H_PARAMETER;
srcu_idx = srcu_read_lock(&kvm->srcu);
- down_write(&kvm->mm->mmap_sem);
+ mmap_read_lock(kvm->mm);
start = gfn_to_hva(kvm, gfn);
if (kvm_is_error_hva(start))
@@ -499,95 +973,20 @@ kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
if (!vma || vma->vm_start > start || vma->vm_end < end)
goto out_unlock;
- if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
- &downgrade))
- ret = H_SUCCESS;
+ if (kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
+ true))
+ goto out_unlock;
+
+ ret = H_SUCCESS;
+
out_unlock:
mutex_unlock(&kvm->arch.uvmem_lock);
out:
- if (downgrade)
- up_read(&kvm->mm->mmap_sem);
- else
- up_write(&kvm->mm->mmap_sem);
+ mmap_read_unlock(kvm->mm);
srcu_read_unlock(&kvm->srcu, srcu_idx);
return ret;
}
-/*
- * Provision a new page on HV side and copy over the contents
- * from secure memory using UV_PAGE_OUT uvcall.
- */
-static int
-kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, unsigned long page_shift,
- struct kvm *kvm, unsigned long gpa)
-{
- unsigned long src_pfn, dst_pfn = 0;
- struct migrate_vma mig;
- struct page *dpage, *spage;
- struct kvmppc_uvmem_page_pvt *pvt;
- unsigned long pfn;
- int ret = U_SUCCESS;
-
- memset(&mig, 0, sizeof(mig));
- mig.vma = vma;
- mig.start = start;
- mig.end = end;
- mig.src = &src_pfn;
- mig.dst = &dst_pfn;
-
- mutex_lock(&kvm->arch.uvmem_lock);
- /* The requested page is already paged-out, nothing to do */
- if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL))
- goto out;
-
- ret = migrate_vma_setup(&mig);
- if (ret)
- return ret;
-
- spage = migrate_pfn_to_page(*mig.src);
- if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE))
- goto out_finalize;
-
- if (!is_zone_device_page(spage))
- goto out_finalize;
-
- dpage = alloc_page_vma(GFP_HIGHUSER, vma, start);
- if (!dpage) {
- ret = -1;
- goto out_finalize;
- }
-
- lock_page(dpage);
- pvt = spage->zone_device_data;
- pfn = page_to_pfn(dpage);
-
- /*
- * This function is used in two cases:
- * - When HV touches a secure page, for which we do UV_PAGE_OUT
- * - When a secure page is converted to shared page, we *get*
- * the page to essentially unmap the device page. In this
- * case we skip page-out.
- */
- if (!pvt->skip_page_out)
- ret = uv_page_out(kvm->arch.lpid, pfn << page_shift,
- gpa, 0, page_shift);
-
- if (ret == U_SUCCESS)
- *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED;
- else {
- unlock_page(dpage);
- __free_page(dpage);
- goto out_finalize;
- }
-
- migrate_vma_pages(&mig);
-out_finalize:
- migrate_vma_finalize(&mig);
-out:
- mutex_unlock(&kvm->arch.uvmem_lock);
- return ret;
-}
/*
* Fault handler callback that gets called when HV touches any page that
@@ -603,7 +1002,7 @@ static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf)
if (kvmppc_svm_page_out(vmf->vma, vmf->address,
vmf->address + PAGE_SIZE, PAGE_SHIFT,
- pvt->kvm, pvt->gpa))
+ pvt->kvm, pvt->gpa, vmf->page))
return VM_FAULT_SIGBUS;
else
return 0;
@@ -612,13 +1011,14 @@ static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf)
/*
* Release the device PFN back to the pool
*
- * Gets called when secure page becomes a normal page during H_SVM_PAGE_OUT.
+ * Gets called when secure GFN tranistions from a secure-PFN
+ * to a normal PFN during H_SVM_PAGE_OUT.
* Gets called with kvm->arch.uvmem_lock held.
*/
static void kvmppc_uvmem_page_free(struct page *page)
{
unsigned long pfn = page_to_pfn(page) -
- (kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT);
+ (kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT);
struct kvmppc_uvmem_page_pvt *pvt;
spin_lock(&kvmppc_uvmem_bitmap_lock);
@@ -627,7 +1027,10 @@ static void kvmppc_uvmem_page_free(struct page *page)
pvt = page->zone_device_data;
page->zone_device_data = NULL;
- kvmppc_uvmem_pfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
+ if (pvt->remove_gfn)
+ kvmppc_gfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
+ else
+ kvmppc_gfn_secure_mem_pfn(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
kfree(pvt);
}
@@ -660,7 +1063,7 @@ kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa,
ret = H_PARAMETER;
srcu_idx = srcu_read_lock(&kvm->srcu);
- down_read(&kvm->mm->mmap_sem);
+ mmap_read_lock(kvm->mm);
start = gfn_to_hva(kvm, gfn);
if (kvm_is_error_hva(start))
goto out;
@@ -670,10 +1073,10 @@ kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa,
if (!vma || vma->vm_start > start || vma->vm_end < end)
goto out;
- if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa))
+ if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa, NULL))
ret = H_SUCCESS;
out:
- up_read(&kvm->mm->mmap_sem);
+ mmap_read_unlock(kvm->mm);
srcu_read_unlock(&kvm->srcu, srcu_idx);
return ret;
}
@@ -699,6 +1102,21 @@ out:
return (ret == U_SUCCESS) ? RESUME_GUEST : -EFAULT;
}
+int kvmppc_uvmem_memslot_create(struct kvm *kvm, const struct kvm_memory_slot *new)
+{
+ int ret = __kvmppc_uvmem_memslot_create(kvm, new);
+
+ if (!ret)
+ ret = kvmppc_uv_migrate_mem_slot(kvm, new);
+
+ return ret;
+}
+
+void kvmppc_uvmem_memslot_delete(struct kvm *kvm, const struct kvm_memory_slot *old)
+{
+ __kvmppc_uvmem_memslot_delete(kvm, old);
+}
+
static u64 kvmppc_get_secmem_size(void)
{
struct device_node *np;
@@ -706,6 +1124,20 @@ static u64 kvmppc_get_secmem_size(void)
const __be32 *prop;
u64 size = 0;
+ /*
+ * First try the new ibm,secure-memory nodes which supersede the
+ * secure-memory-ranges property.
+ * If we found some, no need to read the deprecated ones.
+ */
+ for_each_compatible_node(np, NULL, "ibm,secure-memory") {
+ prop = of_get_property(np, "reg", &len);
+ if (!prop)
+ continue;
+ size += of_read_number(prop + 2, 2);
+ }
+ if (size)
+ return size;
+
np = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware");
if (!np)
goto out;
@@ -749,8 +1181,12 @@ int kvmppc_uvmem_init(void)
}
kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE;
- kvmppc_uvmem_pgmap.res = *res;
+ kvmppc_uvmem_pgmap.range.start = res->start;
+ kvmppc_uvmem_pgmap.range.end = res->end;
+ kvmppc_uvmem_pgmap.nr_range = 1;
kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops;
+ /* just one global instance: */
+ kvmppc_uvmem_pgmap.owner = &kvmppc_uvmem_pgmap;
addr = memremap_pages(&kvmppc_uvmem_pgmap, NUMA_NO_NODE);
if (IS_ERR(addr)) {
ret = PTR_ERR(addr);
@@ -759,8 +1195,7 @@ int kvmppc_uvmem_init(void)
pfn_first = res->start >> PAGE_SHIFT;
pfn_last = pfn_first + (resource_size(res) >> PAGE_SHIFT);
- kvmppc_uvmem_bitmap = kcalloc(BITS_TO_LONGS(pfn_last - pfn_first),
- sizeof(unsigned long), GFP_KERNEL);
+ kvmppc_uvmem_bitmap = bitmap_zalloc(pfn_last - pfn_first, GFP_KERNEL);
if (!kvmppc_uvmem_bitmap) {
ret = -ENOMEM;
goto out_unmap;
@@ -778,8 +1213,11 @@ out:
void kvmppc_uvmem_free(void)
{
+ if (!kvmppc_uvmem_bitmap)
+ return;
+
memunmap_pages(&kvmppc_uvmem_pgmap);
- release_mem_region(kvmppc_uvmem_pgmap.res.start,
- resource_size(&kvmppc_uvmem_pgmap.res));
- kfree(kvmppc_uvmem_bitmap);
+ release_mem_region(kvmppc_uvmem_pgmap.range.start,
+ range_len(&kvmppc_uvmem_pgmap.range));
+ bitmap_free(kvmppc_uvmem_bitmap);
}