summaryrefslogtreecommitdiff
path: root/arch/s390/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/kvm')
-rw-r--r--arch/s390/kvm/Makefile2
-rw-r--r--arch/s390/kvm/gaccess.c44
-rw-r--r--arch/s390/kvm/gmap-vsie.c142
-rw-r--r--arch/s390/kvm/gmap.c212
-rw-r--r--arch/s390/kvm/gmap.h39
-rw-r--r--arch/s390/kvm/intercept.c7
-rw-r--r--arch/s390/kvm/interrupt.c19
-rw-r--r--arch/s390/kvm/kvm-s390.c237
-rw-r--r--arch/s390/kvm/kvm-s390.h19
-rw-r--r--arch/s390/kvm/pv.c21
-rw-r--r--arch/s390/kvm/vsie.c106
11 files changed, 757 insertions, 91 deletions
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 02217fb4ae10..f0ffe874adc2 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o
+kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap.o gmap-vsie.o
kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 9816b0060fbe..f6fded15633a 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -16,6 +16,7 @@
#include <asm/gmap.h>
#include <asm/dat-bits.h>
#include "kvm-s390.h"
+#include "gmap.h"
#include "gaccess.h"
/*
@@ -1393,6 +1394,44 @@ shadow_pgt:
}
/**
+ * shadow_pgt_lookup() - find a shadow page table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: the address in the shadow aguest address space
+ * @pgt: parent gmap address of the page table to get shadowed
+ * @dat_protection: if the pgtable is marked as protected by dat
+ * @fake: pgt references contiguous guest memory block, not a pgtable
+ *
+ * Returns 0 if the shadow page table was found and -EAGAIN if the page
+ * table was not found.
+ *
+ * Called with sg->mm->mmap_lock in read.
+ */
+static int shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt,
+ int *dat_protection, int *fake)
+{
+ unsigned long pt_index;
+ unsigned long *table;
+ struct page *page;
+ int rc;
+
+ spin_lock(&sg->guest_table_lock);
+ table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
+ if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
+ /* Shadow page tables are full pages (pte+pgste) */
+ page = pfn_to_page(*table >> PAGE_SHIFT);
+ pt_index = gmap_pgste_get_pgt_addr(page_to_virt(page));
+ *pgt = pt_index & ~GMAP_SHADOW_FAKE_TABLE;
+ *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
+ *fake = !!(pt_index & GMAP_SHADOW_FAKE_TABLE);
+ rc = 0;
+ } else {
+ rc = -EAGAIN;
+ }
+ spin_unlock(&sg->guest_table_lock);
+ return rc;
+}
+
+/**
* kvm_s390_shadow_fault - handle fault on a shadow page table
* @vcpu: virtual cpu
* @sg: pointer to the shadow guest address space structure
@@ -1415,6 +1454,9 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
int dat_protection, fake;
int rc;
+ if (KVM_BUG_ON(!gmap_is_shadow(sg), vcpu->kvm))
+ return -EFAULT;
+
mmap_read_lock(sg->mm);
/*
* We don't want any guest-2 tables to change - so the parent
@@ -1423,7 +1465,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
*/
ipte_lock(vcpu->kvm);
- rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
+ rc = shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
if (rc)
rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection,
&fake);
diff --git a/arch/s390/kvm/gmap-vsie.c b/arch/s390/kvm/gmap-vsie.c
new file mode 100644
index 000000000000..a6d1dbb04c97
--- /dev/null
+++ b/arch/s390/kvm/gmap-vsie.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Guest memory management for KVM/s390 nested VMs.
+ *
+ * Copyright IBM Corp. 2008, 2020, 2024
+ *
+ * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * David Hildenbrand <david@redhat.com>
+ * Janosch Frank <frankja@linux.vnet.ibm.com>
+ */
+
+#include <linux/compiler.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/pgtable.h>
+#include <linux/pagemap.h>
+#include <linux/mman.h>
+
+#include <asm/lowcore.h>
+#include <asm/gmap.h>
+#include <asm/uv.h>
+
+#include "kvm-s390.h"
+#include "gmap.h"
+
+/**
+ * gmap_find_shadow - find a specific asce in the list of shadow tables
+ * @parent: pointer to the parent gmap
+ * @asce: ASCE for which the shadow table is created
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns the pointer to a gmap if a shadow table with the given asce is
+ * already available, ERR_PTR(-EAGAIN) if another one is just being created,
+ * otherwise NULL
+ *
+ * Context: Called with parent->shadow_lock held
+ */
+static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, int edat_level)
+{
+ struct gmap *sg;
+
+ lockdep_assert_held(&parent->shadow_lock);
+ list_for_each_entry(sg, &parent->children, list) {
+ if (!gmap_shadow_valid(sg, asce, edat_level))
+ continue;
+ if (!sg->initialized)
+ return ERR_PTR(-EAGAIN);
+ refcount_inc(&sg->ref_count);
+ return sg;
+ }
+ return NULL;
+}
+
+/**
+ * gmap_shadow - create/find a shadow guest address space
+ * @parent: pointer to the parent gmap
+ * @asce: ASCE for which the shadow table is created
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * The pages of the top level page table referred by the asce parameter
+ * will be set to read-only and marked in the PGSTEs of the kvm process.
+ * The shadow table will be removed automatically on any change to the
+ * PTE mapping for the source table.
+ *
+ * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory,
+ * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the
+ * parent gmap table could not be protected.
+ */
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level)
+{
+ struct gmap *sg, *new;
+ unsigned long limit;
+ int rc;
+
+ if (KVM_BUG_ON(parent->mm->context.allow_gmap_hpage_1m, (struct kvm *)parent->private) ||
+ KVM_BUG_ON(gmap_is_shadow(parent), (struct kvm *)parent->private))
+ return ERR_PTR(-EFAULT);
+ spin_lock(&parent->shadow_lock);
+ sg = gmap_find_shadow(parent, asce, edat_level);
+ spin_unlock(&parent->shadow_lock);
+ if (sg)
+ return sg;
+ /* Create a new shadow gmap */
+ limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11));
+ if (asce & _ASCE_REAL_SPACE)
+ limit = -1UL;
+ new = gmap_alloc(limit);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+ new->mm = parent->mm;
+ new->parent = gmap_get(parent);
+ new->private = parent->private;
+ new->orig_asce = asce;
+ new->edat_level = edat_level;
+ new->initialized = false;
+ spin_lock(&parent->shadow_lock);
+ /* Recheck if another CPU created the same shadow */
+ sg = gmap_find_shadow(parent, asce, edat_level);
+ if (sg) {
+ spin_unlock(&parent->shadow_lock);
+ gmap_free(new);
+ return sg;
+ }
+ if (asce & _ASCE_REAL_SPACE) {
+ /* only allow one real-space gmap shadow */
+ list_for_each_entry(sg, &parent->children, list) {
+ if (sg->orig_asce & _ASCE_REAL_SPACE) {
+ spin_lock(&sg->guest_table_lock);
+ gmap_unshadow(sg);
+ spin_unlock(&sg->guest_table_lock);
+ list_del(&sg->list);
+ gmap_put(sg);
+ break;
+ }
+ }
+ }
+ refcount_set(&new->ref_count, 2);
+ list_add(&new->list, &parent->children);
+ if (asce & _ASCE_REAL_SPACE) {
+ /* nothing to protect, return right away */
+ new->initialized = true;
+ spin_unlock(&parent->shadow_lock);
+ return new;
+ }
+ spin_unlock(&parent->shadow_lock);
+ /* protect after insertion, so it will get properly invalidated */
+ mmap_read_lock(parent->mm);
+ rc = __kvm_s390_mprotect_many(parent, asce & _ASCE_ORIGIN,
+ ((asce & _ASCE_TABLE_LENGTH) + 1),
+ PROT_READ, GMAP_NOTIFY_SHADOW);
+ mmap_read_unlock(parent->mm);
+ spin_lock(&parent->shadow_lock);
+ new->initialized = true;
+ if (rc) {
+ list_del(&new->list);
+ gmap_free(new);
+ new = ERR_PTR(rc);
+ }
+ spin_unlock(&parent->shadow_lock);
+ return new;
+}
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
new file mode 100644
index 000000000000..02adf151d4de
--- /dev/null
+++ b/arch/s390/kvm/gmap.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Guest memory management for KVM/s390
+ *
+ * Copyright IBM Corp. 2008, 2020, 2024
+ *
+ * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * David Hildenbrand <david@redhat.com>
+ * Janosch Frank <frankja@linux.vnet.ibm.com>
+ */
+
+#include <linux/compiler.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/pgtable.h>
+#include <linux/pagemap.h>
+
+#include <asm/lowcore.h>
+#include <asm/gmap.h>
+#include <asm/uv.h>
+
+#include "gmap.h"
+
+/**
+ * should_export_before_import - Determine whether an export is needed
+ * before an import-like operation
+ * @uvcb: the Ultravisor control block of the UVC to be performed
+ * @mm: the mm of the process
+ *
+ * Returns whether an export is needed before every import-like operation.
+ * This is needed for shared pages, which don't trigger a secure storage
+ * exception when accessed from a different guest.
+ *
+ * Although considered as one, the Unpin Page UVC is not an actual import,
+ * so it is not affected.
+ *
+ * No export is needed also when there is only one protected VM, because the
+ * page cannot belong to the wrong VM in that case (there is no "other VM"
+ * it can belong to).
+ *
+ * Return: true if an export is needed before every import, otherwise false.
+ */
+static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+{
+ /*
+ * The misc feature indicates, among other things, that importing a
+ * shared page from a different protected VM will automatically also
+ * transfer its ownership.
+ */
+ if (uv_has_feature(BIT_UV_FEAT_MISC))
+ return false;
+ if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
+ return false;
+ return atomic_read(&mm->context.protected_count) > 1;
+}
+
+static int __gmap_make_secure(struct gmap *gmap, struct page *page, void *uvcb)
+{
+ struct folio *folio = page_folio(page);
+ int rc;
+
+ /*
+ * Secure pages cannot be huge and userspace should not combine both.
+ * In case userspace does it anyway this will result in an -EFAULT for
+ * the unpack. The guest is thus never reaching secure mode.
+ * If userspace plays dirty tricks and decides to map huge pages at a
+ * later point in time, it will receive a segmentation fault or
+ * KVM_RUN will return -EFAULT.
+ */
+ if (folio_test_hugetlb(folio))
+ return -EFAULT;
+ if (folio_test_large(folio)) {
+ mmap_read_unlock(gmap->mm);
+ rc = kvm_s390_wiggle_split_folio(gmap->mm, folio, true);
+ mmap_read_lock(gmap->mm);
+ if (rc)
+ return rc;
+ folio = page_folio(page);
+ }
+
+ if (!folio_trylock(folio))
+ return -EAGAIN;
+ if (should_export_before_import(uvcb, gmap->mm))
+ uv_convert_from_secure(folio_to_phys(folio));
+ rc = make_folio_secure(folio, uvcb);
+ folio_unlock(folio);
+
+ /*
+ * In theory a race is possible and the folio might have become
+ * large again before the folio_trylock() above. In that case, no
+ * action is performed and -EAGAIN is returned; the callers will
+ * have to try again later.
+ * In most cases this implies running the VM again, getting the same
+ * exception again, and make another attempt in this function.
+ * This is expected to happen extremely rarely.
+ */
+ if (rc == -E2BIG)
+ return -EAGAIN;
+ /* The folio has too many references, try to shake some off */
+ if (rc == -EBUSY) {
+ mmap_read_unlock(gmap->mm);
+ kvm_s390_wiggle_split_folio(gmap->mm, folio, false);
+ mmap_read_lock(gmap->mm);
+ return -EAGAIN;
+ }
+
+ return rc;
+}
+
+/**
+ * gmap_make_secure() - make one guest page secure
+ * @gmap: the guest gmap
+ * @gaddr: the guest address that needs to be made secure
+ * @uvcb: the UVCB specifying which operation needs to be performed
+ *
+ * Context: needs to be called with kvm->srcu held.
+ * Return: 0 on success, < 0 in case of error (see __gmap_make_secure()).
+ */
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
+{
+ struct kvm *kvm = gmap->private;
+ struct page *page;
+ int rc = 0;
+
+ lockdep_assert_held(&kvm->srcu);
+
+ page = gfn_to_page(kvm, gpa_to_gfn(gaddr));
+ mmap_read_lock(gmap->mm);
+ if (page)
+ rc = __gmap_make_secure(gmap, page, uvcb);
+ kvm_release_page_clean(page);
+ mmap_read_unlock(gmap->mm);
+
+ return rc;
+}
+
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
+{
+ struct uv_cb_cts uvcb = {
+ .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
+ .header.len = sizeof(uvcb),
+ .guest_handle = gmap->guest_handle,
+ .gaddr = gaddr,
+ };
+
+ return gmap_make_secure(gmap, gaddr, &uvcb);
+}
+
+/**
+ * __gmap_destroy_page() - Destroy a guest page.
+ * @gmap: the gmap of the guest
+ * @page: the page to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ *
+ * Context: must be called holding the mm lock for gmap->mm
+ */
+static int __gmap_destroy_page(struct gmap *gmap, struct page *page)
+{
+ struct folio *folio = page_folio(page);
+ int rc;
+
+ /*
+ * See gmap_make_secure(): large folios cannot be secure. Small
+ * folio implies FW_LEVEL_PTE.
+ */
+ if (folio_test_large(folio))
+ return -EFAULT;
+
+ rc = uv_destroy_folio(folio);
+ /*
+ * Fault handlers can race; it is possible that two CPUs will fault
+ * on the same secure page. One CPU can destroy the page, reboot,
+ * re-enter secure mode and import it, while the second CPU was
+ * stuck at the beginning of the handler. At some point the second
+ * CPU will be able to progress, and it will not be able to destroy
+ * the page. In that case we do not want to terminate the process,
+ * we instead try to export the page.
+ */
+ if (rc)
+ rc = uv_convert_from_secure_folio(folio);
+
+ return rc;
+}
+
+/**
+ * gmap_destroy_page() - Destroy a guest page.
+ * @gmap: the gmap of the guest
+ * @gaddr: the guest address to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ *
+ * Context: may sleep.
+ */
+int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
+{
+ struct page *page;
+ int rc = 0;
+
+ mmap_read_lock(gmap->mm);
+ page = gfn_to_page(gmap->private, gpa_to_gfn(gaddr));
+ if (page)
+ rc = __gmap_destroy_page(gmap, page);
+ kvm_release_page_clean(page);
+ mmap_read_unlock(gmap->mm);
+ return rc;
+}
diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h
new file mode 100644
index 000000000000..c8f031c9ea5f
--- /dev/null
+++ b/arch/s390/kvm/gmap.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KVM guest address space mapping code
+ *
+ * Copyright IBM Corp. 2007, 2016, 2025
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Claudio Imbrenda <imbrenda@linux.ibm.com>
+ */
+
+#ifndef ARCH_KVM_S390_GMAP_H
+#define ARCH_KVM_S390_GMAP_H
+
+#define GMAP_SHADOW_FAKE_TABLE 1ULL
+
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
+int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level);
+
+/**
+ * gmap_shadow_valid - check if a shadow guest address space matches the
+ * given properties and is still valid
+ * @sg: pointer to the shadow guest address space structure
+ * @asce: ASCE for which the shadow table is requested
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns 1 if the gmap shadow is still valid and matches the given
+ * properties, the caller can continue using it. Returns 0 otherwise, the
+ * caller has to request a new shadow gmap in this case.
+ *
+ */
+static inline int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
+{
+ if (sg->removed)
+ return 0;
+ return sg->orig_asce == asce && sg->edat_level == edat_level;
+}
+
+#endif
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 5bbaadf75dc6..610dd44a948b 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -21,6 +21,7 @@
#include "gaccess.h"
#include "trace.h"
#include "trace-s390.h"
+#include "gmap.h"
u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
{
@@ -367,7 +368,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
reg2, &srcaddr, GACC_FETCH, 0);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
- rc = gmap_fault(vcpu->arch.gmap, srcaddr, 0);
+ rc = kvm_s390_handle_dat_fault(vcpu, srcaddr, 0);
if (rc != 0)
return rc;
@@ -376,7 +377,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
reg1, &dstaddr, GACC_STORE, 0);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
- rc = gmap_fault(vcpu->arch.gmap, dstaddr, FAULT_FLAG_WRITE);
+ rc = kvm_s390_handle_dat_fault(vcpu, dstaddr, FOLL_WRITE);
if (rc != 0)
return rc;
@@ -549,7 +550,7 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
* If the unpin did not succeed, the guest will exit again for the UVC
* and we will retry the unpin.
*/
- if (rc == -EINVAL)
+ if (rc == -EINVAL || rc == -ENXIO)
return 0;
/*
* If we got -EAGAIN here, we simply return it. It will eventually
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index d4f031e086fc..07ff0e10cb7f 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2893,7 +2893,8 @@ int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
- u64 uaddr;
+ u64 uaddr_s, uaddr_i;
+ int idx;
switch (ue->type) {
/* we store the userspace addresses instead of the guest addresses */
@@ -2901,14 +2902,16 @@ int kvm_set_routing_entry(struct kvm *kvm,
if (kvm_is_ucontrol(kvm))
return -EINVAL;
e->set = set_adapter_int;
- uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr);
- if (uaddr == -EFAULT)
- return -EFAULT;
- e->adapter.summary_addr = uaddr;
- uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr);
- if (uaddr == -EFAULT)
+
+ idx = srcu_read_lock(&kvm->srcu);
+ uaddr_s = gpa_to_hva(kvm, ue->u.adapter.summary_addr);
+ uaddr_i = gpa_to_hva(kvm, ue->u.adapter.ind_addr);
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ if (kvm_is_error_hva(uaddr_s) || kvm_is_error_hva(uaddr_i))
return -EFAULT;
- e->adapter.ind_addr = uaddr;
+ e->adapter.summary_addr = uaddr_s;
+ e->adapter.ind_addr = uaddr_i;
e->adapter.summary_offset = ue->u.adapter.summary_offset;
e->adapter.ind_offset = ue->u.adapter.ind_offset;
e->adapter.adapter_id = ue->u.adapter.adapter_id;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d8080c27d45b..ebecb96bacce 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -50,6 +50,7 @@
#include "kvm-s390.h"
#include "gaccess.h"
#include "pci.h"
+#include "gmap.h"
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -3428,8 +3429,20 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
VM_EVENT(kvm, 3, "vm created with type %lu", type);
if (type & KVM_VM_S390_UCONTROL) {
+ struct kvm_userspace_memory_region2 fake_memslot = {
+ .slot = KVM_S390_UCONTROL_MEMSLOT,
+ .guest_phys_addr = 0,
+ .userspace_addr = 0,
+ .memory_size = ALIGN_DOWN(TASK_SIZE, _SEGMENT_SIZE),
+ .flags = 0,
+ };
+
kvm->arch.gmap = NULL;
kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
+ /* one flat fake memslot covering the whole address-space */
+ mutex_lock(&kvm->slots_lock);
+ KVM_BUG_ON(kvm_set_internal_memslot(kvm, &fake_memslot), kvm);
+ mutex_unlock(&kvm->slots_lock);
} else {
if (sclp.hamax == U64_MAX)
kvm->arch.mem_limit = TASK_SIZE_MAX;
@@ -4498,6 +4511,75 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu)
return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
}
+static int __kvm_s390_fixup_fault_sync(struct gmap *gmap, gpa_t gaddr, unsigned int flags)
+{
+ struct kvm *kvm = gmap->private;
+ gfn_t gfn = gpa_to_gfn(gaddr);
+ bool unlocked;
+ hva_t vmaddr;
+ gpa_t tmp;
+ int rc;
+
+ if (kvm_is_ucontrol(kvm)) {
+ tmp = __gmap_translate(gmap, gaddr);
+ gfn = gpa_to_gfn(tmp);
+ }
+
+ vmaddr = gfn_to_hva(kvm, gfn);
+ rc = fixup_user_fault(gmap->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
+ if (!rc)
+ rc = __gmap_link(gmap, gaddr, vmaddr);
+ return rc;
+}
+
+/**
+ * __kvm_s390_mprotect_many() - Apply specified protection to guest pages
+ * @gmap: the gmap of the guest
+ * @gpa: the starting guest address
+ * @npages: how many pages to protect
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bits: pgste notification bits to set
+ *
+ * Returns: 0 in case of success, < 0 in case of error - see gmap_protect_one()
+ *
+ * Context: kvm->srcu and gmap->mm need to be held in read mode
+ */
+int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
+ unsigned long bits)
+{
+ unsigned int fault_flag = (prot & PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
+ gpa_t end = gpa + npages * PAGE_SIZE;
+ int rc;
+
+ for (; gpa < end; gpa = ALIGN(gpa + 1, rc)) {
+ rc = gmap_protect_one(gmap, gpa, prot, bits);
+ if (rc == -EAGAIN) {
+ __kvm_s390_fixup_fault_sync(gmap, gpa, fault_flag);
+ rc = gmap_protect_one(gmap, gpa, prot, bits);
+ }
+ if (rc < 0)
+ return rc;
+ }
+
+ return 0;
+}
+
+static int kvm_s390_mprotect_notify_prefix(struct kvm_vcpu *vcpu)
+{
+ gpa_t gaddr = kvm_s390_get_prefix(vcpu);
+ int idx, rc;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ mmap_read_lock(vcpu->arch.gmap->mm);
+
+ rc = __kvm_s390_mprotect_many(vcpu->arch.gmap, gaddr, 2, PROT_WRITE, GMAP_NOTIFY_MPROT);
+
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ return rc;
+}
+
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
{
retry:
@@ -4513,9 +4595,8 @@ retry:
*/
if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
int rc;
- rc = gmap_mprotect_notify(vcpu->arch.gmap,
- kvm_s390_get_prefix(vcpu),
- PAGE_SIZE * 2, PROT_WRITE);
+
+ rc = kvm_s390_mprotect_notify_prefix(vcpu);
if (rc) {
kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
return rc;
@@ -4766,11 +4847,111 @@ static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu)
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
+static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu)
+{
+ KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
+ "Unexpected program interrupt 0x%x, TEID 0x%016lx",
+ current->thread.gmap_int_code, current->thread.gmap_teid.val);
+}
+
+/*
+ * __kvm_s390_handle_dat_fault() - handle a dat fault for the gmap of a vcpu
+ * @vcpu: the vCPU whose gmap is to be fixed up
+ * @gfn: the guest frame number used for memslots (including fake memslots)
+ * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps
+ * @flags: FOLL_* flags
+ *
+ * Return: 0 on success, < 0 in case of error.
+ * Context: The mm lock must not be held before calling. May sleep.
+ */
+int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags)
+{
+ struct kvm_memory_slot *slot;
+ unsigned int fault_flags;
+ bool writable, unlocked;
+ unsigned long vmaddr;
+ struct page *page;
+ kvm_pfn_t pfn;
+ int rc;
+
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+ if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
+ return vcpu_post_run_addressing_exception(vcpu);
+
+ fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0;
+ if (vcpu->arch.gmap->pfault_enabled)
+ flags |= FOLL_NOWAIT;
+ vmaddr = __gfn_to_hva_memslot(slot, gfn);
+
+try_again:
+ pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page);
+
+ /* Access outside memory, inject addressing exception */
+ if (is_noslot_pfn(pfn))
+ return vcpu_post_run_addressing_exception(vcpu);
+ /* Signal pending: try again */
+ if (pfn == KVM_PFN_ERR_SIGPENDING)
+ return -EAGAIN;
+
+ /* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */
+ if (pfn == KVM_PFN_ERR_NEEDS_IO) {
+ trace_kvm_s390_major_guest_pfault(vcpu);
+ if (kvm_arch_setup_async_pf(vcpu))
+ return 0;
+ vcpu->stat.pfault_sync++;
+ /* Could not setup async pfault, try again synchronously */
+ flags &= ~FOLL_NOWAIT;
+ goto try_again;
+ }
+ /* Any other error */
+ if (is_error_pfn(pfn))
+ return -EFAULT;
+
+ /* Success */
+ mmap_read_lock(vcpu->arch.gmap->mm);
+ /* Mark the userspace PTEs as young and/or dirty, to avoid page fault loops */
+ rc = fixup_user_fault(vcpu->arch.gmap->mm, vmaddr, fault_flags, &unlocked);
+ if (!rc)
+ rc = __gmap_link(vcpu->arch.gmap, gaddr, vmaddr);
+ scoped_guard(spinlock, &vcpu->kvm->mmu_lock) {
+ kvm_release_faultin_page(vcpu->kvm, page, false, writable);
+ }
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ return rc;
+}
+
+static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags)
+{
+ unsigned long gaddr_tmp;
+ gfn_t gfn;
+
+ gfn = gpa_to_gfn(gaddr);
+ if (kvm_is_ucontrol(vcpu->kvm)) {
+ /*
+ * This translates the per-vCPU guest address into a
+ * fake guest address, which can then be used with the
+ * fake memslots that are identity mapping userspace.
+ * This allows ucontrol VMs to use the normal fault
+ * resolution path, like normal VMs.
+ */
+ mmap_read_lock(vcpu->arch.gmap->mm);
+ gaddr_tmp = __gmap_translate(vcpu->arch.gmap, gaddr);
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ if (gaddr_tmp == -EFAULT) {
+ vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
+ vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
+ vcpu->run->s390_ucontrol.pgm_code = PGM_SEGMENT_TRANSLATION;
+ return -EREMOTE;
+ }
+ gfn = gpa_to_gfn(gaddr_tmp);
+ }
+ return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags);
+}
+
static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
{
unsigned int flags = 0;
unsigned long gaddr;
- int rc = 0;
gaddr = current->thread.gmap_teid.addr * PAGE_SIZE;
if (kvm_s390_cur_gmap_fault_is_write())
@@ -4781,9 +4962,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
vcpu->stat.exit_null++;
break;
case PGM_NON_SECURE_STORAGE_ACCESS:
- KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
- "Unexpected program interrupt 0x%x, TEID 0x%016lx",
- current->thread.gmap_int_code, current->thread.gmap_teid.val);
+ kvm_s390_assert_primary_as(vcpu);
/*
* This is normal operation; a page belonging to a protected
* guest has not been imported yet. Try to import the page into
@@ -4794,9 +4973,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
break;
case PGM_SECURE_STORAGE_ACCESS:
case PGM_SECURE_STORAGE_VIOLATION:
- KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
- "Unexpected program interrupt 0x%x, TEID 0x%016lx",
- current->thread.gmap_int_code, current->thread.gmap_teid.val);
+ kvm_s390_assert_primary_as(vcpu);
/*
* This can happen after a reboot with asynchronous teardown;
* the new guest (normal or protected) will run on top of the
@@ -4825,40 +5002,15 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
case PGM_REGION_FIRST_TRANS:
case PGM_REGION_SECOND_TRANS:
case PGM_REGION_THIRD_TRANS:
- KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
- "Unexpected program interrupt 0x%x, TEID 0x%016lx",
- current->thread.gmap_int_code, current->thread.gmap_teid.val);
- if (vcpu->arch.gmap->pfault_enabled) {
- rc = gmap_fault(vcpu->arch.gmap, gaddr, flags | FAULT_FLAG_RETRY_NOWAIT);
- if (rc == -EFAULT)
- return vcpu_post_run_addressing_exception(vcpu);
- if (rc == -EAGAIN) {
- trace_kvm_s390_major_guest_pfault(vcpu);
- if (kvm_arch_setup_async_pf(vcpu))
- return 0;
- vcpu->stat.pfault_sync++;
- } else {
- return rc;
- }
- }
- rc = gmap_fault(vcpu->arch.gmap, gaddr, flags);
- if (rc == -EFAULT) {
- if (kvm_is_ucontrol(vcpu->kvm)) {
- vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
- vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
- vcpu->run->s390_ucontrol.pgm_code = 0x10;
- return -EREMOTE;
- }
- return vcpu_post_run_addressing_exception(vcpu);
- }
- break;
+ kvm_s390_assert_primary_as(vcpu);
+ return vcpu_dat_fault_handler(vcpu, gaddr, flags);
default:
KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx",
current->thread.gmap_int_code, current->thread.gmap_teid.val);
send_sig(SIGSEGV, current, 0);
break;
}
- return rc;
+ return 0;
}
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
@@ -5737,7 +5889,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
#endif
case KVM_S390_VCPU_FAULT: {
- r = gmap_fault(vcpu->arch.gmap, arg, 0);
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = vcpu_dat_fault_handler(vcpu, arg, 0);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
case KVM_ENABLE_CAP:
@@ -5853,7 +6007,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
{
gpa_t size;
- if (kvm_is_ucontrol(kvm))
+ if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS)
return -EINVAL;
/* When we are protected, we should not change the memory slots */
@@ -5905,6 +6059,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
{
int rc = 0;
+ if (kvm_is_ucontrol(kvm))
+ return;
+
switch (change) {
case KVM_MR_DELETE:
rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 597d7a71deeb..8d3bbb2dd8d2 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -20,6 +20,8 @@
#include <asm/processor.h>
#include <asm/sclp.h>
+#define KVM_S390_UCONTROL_MEMSLOT (KVM_USER_MEM_SLOTS + 0)
+
static inline void kvm_s390_fpu_store(struct kvm_run *run)
{
fpu_stfpc(&run->s.regs.fpc);
@@ -279,6 +281,15 @@ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm)
return gd;
}
+static inline hva_t gpa_to_hva(struct kvm *kvm, gpa_t gpa)
+{
+ hva_t hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
+
+ if (!kvm_is_error_hva(hva))
+ hva |= offset_in_page(gpa);
+ return hva;
+}
+
/* implemented in pv.c */
int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
@@ -408,6 +419,14 @@ void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc);
+int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags);
+int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
+ unsigned long bits);
+
+static inline int kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gpa_t gaddr, unsigned int flags)
+{
+ return __kvm_s390_handle_dat_fault(vcpu, gpa_to_gfn(gaddr), gaddr, flags);
+}
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 75e81ba26d04..22c012aa5206 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -17,6 +17,7 @@
#include <linux/sched/mm.h>
#include <linux/mmu_notifier.h>
#include "kvm-s390.h"
+#include "gmap.h"
bool kvm_s390_pv_is_protected(struct kvm *kvm)
{
@@ -638,10 +639,28 @@ static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
.tweak[1] = offset,
};
int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
+ unsigned long vmaddr;
+ bool unlocked;
*rc = uvcb.header.rc;
*rrc = uvcb.header.rrc;
+ if (ret == -ENXIO) {
+ mmap_read_lock(kvm->mm);
+ vmaddr = gfn_to_hva(kvm, gpa_to_gfn(addr));
+ if (kvm_is_error_hva(vmaddr)) {
+ ret = -EFAULT;
+ } else {
+ ret = fixup_user_fault(kvm->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
+ if (!ret)
+ ret = __gmap_link(kvm->arch.gmap, addr, vmaddr);
+ }
+ mmap_read_unlock(kvm->mm);
+ if (!ret)
+ return -EAGAIN;
+ return ret;
+ }
+
if (ret && ret != -EAGAIN)
KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
uvcb.gaddr, *rc, *rrc);
@@ -660,6 +679,8 @@ int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
addr, size);
+ guard(srcu)(&kvm->srcu);
+
while (offset < size) {
ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
if (ret == -EAGAIN) {
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index a687695d8f68..a78df3a4f353 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -13,6 +13,7 @@
#include <linux/bitmap.h>
#include <linux/sched/signal.h>
#include <linux/io.h>
+#include <linux/mman.h>
#include <asm/gmap.h>
#include <asm/mmu_context.h>
@@ -22,6 +23,11 @@
#include <asm/facility.h>
#include "kvm-s390.h"
#include "gaccess.h"
+#include "gmap.h"
+
+enum vsie_page_flags {
+ VSIE_PAGE_IN_USE = 0,
+};
struct vsie_page {
struct kvm_s390_sie_block scb_s; /* 0x0000 */
@@ -46,7 +52,18 @@ struct vsie_page {
gpa_t gvrd_gpa; /* 0x0240 */
gpa_t riccbd_gpa; /* 0x0248 */
gpa_t sdnx_gpa; /* 0x0250 */
- __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */
+ /*
+ * guest address of the original SCB. Remains set for free vsie
+ * pages, so we can properly look them up in our addr_to_page
+ * radix tree.
+ */
+ gpa_t scb_gpa; /* 0x0258 */
+ /*
+ * Flags: must be set/cleared atomically after the vsie page can be
+ * looked up by other CPUs.
+ */
+ unsigned long flags; /* 0x0260 */
+ __u8 reserved[0x0700 - 0x0268]; /* 0x0268 */
struct kvm_s390_crypto_cb crycb; /* 0x0700 */
__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
};
@@ -584,7 +601,6 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
struct kvm *kvm = gmap->private;
struct vsie_page *cur;
unsigned long prefix;
- struct page *page;
int i;
if (!gmap_is_shadow(gmap))
@@ -594,10 +610,9 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
* therefore we can safely reference them all the time.
*/
for (i = 0; i < kvm->arch.vsie.page_count; i++) {
- page = READ_ONCE(kvm->arch.vsie.pages[i]);
- if (!page)
+ cur = READ_ONCE(kvm->arch.vsie.pages[i]);
+ if (!cur)
continue;
- cur = page_to_virt(page);
if (READ_ONCE(cur->gmap) != gmap)
continue;
prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
@@ -1345,6 +1360,20 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
return rc;
}
+/* Try getting a given vsie page, returning "true" on success. */
+static inline bool try_get_vsie_page(struct vsie_page *vsie_page)
+{
+ if (test_bit(VSIE_PAGE_IN_USE, &vsie_page->flags))
+ return false;
+ return !test_and_set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
+}
+
+/* Put a vsie page acquired through get_vsie_page / try_get_vsie_page. */
+static void put_vsie_page(struct vsie_page *vsie_page)
+{
+ clear_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
+}
+
/*
* Get or create a vsie page for a scb address.
*
@@ -1355,16 +1384,21 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
{
struct vsie_page *vsie_page;
- struct page *page;
int nr_vcpus;
rcu_read_lock();
- page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
+ vsie_page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
rcu_read_unlock();
- if (page) {
- if (page_ref_inc_return(page) == 2)
- return page_to_virt(page);
- page_ref_dec(page);
+ if (vsie_page) {
+ if (try_get_vsie_page(vsie_page)) {
+ if (vsie_page->scb_gpa == addr)
+ return vsie_page;
+ /*
+ * We raced with someone reusing + putting this vsie
+ * page before we grabbed it.
+ */
+ put_vsie_page(vsie_page);
+ }
}
/*
@@ -1375,36 +1409,40 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
mutex_lock(&kvm->arch.vsie.mutex);
if (kvm->arch.vsie.page_count < nr_vcpus) {
- page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA);
- if (!page) {
+ vsie_page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA);
+ if (!vsie_page) {
mutex_unlock(&kvm->arch.vsie.mutex);
return ERR_PTR(-ENOMEM);
}
- page_ref_inc(page);
- kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page;
+ __set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
+ kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = vsie_page;
kvm->arch.vsie.page_count++;
} else {
/* reuse an existing entry that belongs to nobody */
while (true) {
- page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
- if (page_ref_inc_return(page) == 2)
+ vsie_page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
+ if (try_get_vsie_page(vsie_page))
break;
- page_ref_dec(page);
kvm->arch.vsie.next++;
kvm->arch.vsie.next %= nr_vcpus;
}
- radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
+ if (vsie_page->scb_gpa != ULONG_MAX)
+ radix_tree_delete(&kvm->arch.vsie.addr_to_page,
+ vsie_page->scb_gpa >> 9);
}
- page->index = addr;
- /* double use of the same address */
- if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) {
- page_ref_dec(page);
+ /* Mark it as invalid until it resides in the tree. */
+ vsie_page->scb_gpa = ULONG_MAX;
+
+ /* Double use of the same address or allocation failure. */
+ if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9,
+ vsie_page)) {
+ put_vsie_page(vsie_page);
mutex_unlock(&kvm->arch.vsie.mutex);
return NULL;
}
+ vsie_page->scb_gpa = addr;
mutex_unlock(&kvm->arch.vsie.mutex);
- vsie_page = page_to_virt(page);
memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
release_gmap_shadow(vsie_page);
vsie_page->fault_addr = 0;
@@ -1412,14 +1450,6 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
return vsie_page;
}
-/* put a vsie page acquired via get_vsie_page */
-static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page)
-{
- struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT);
-
- page_ref_dec(page);
-}
-
int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
{
struct vsie_page *vsie_page;
@@ -1470,7 +1500,7 @@ out_unshadow:
out_unpin_scb:
unpin_scb(vcpu, vsie_page, scb_addr);
out_put:
- put_vsie_page(vcpu->kvm, vsie_page);
+ put_vsie_page(vsie_page);
return rc < 0 ? rc : 0;
}
@@ -1486,18 +1516,18 @@ void kvm_s390_vsie_init(struct kvm *kvm)
void kvm_s390_vsie_destroy(struct kvm *kvm)
{
struct vsie_page *vsie_page;
- struct page *page;
int i;
mutex_lock(&kvm->arch.vsie.mutex);
for (i = 0; i < kvm->arch.vsie.page_count; i++) {
- page = kvm->arch.vsie.pages[i];
+ vsie_page = kvm->arch.vsie.pages[i];
kvm->arch.vsie.pages[i] = NULL;
- vsie_page = page_to_virt(page);
release_gmap_shadow(vsie_page);
/* free the radix tree entry */
- radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
- __free_page(page);
+ if (vsie_page->scb_gpa != ULONG_MAX)
+ radix_tree_delete(&kvm->arch.vsie.addr_to_page,
+ vsie_page->scb_gpa >> 9);
+ free_page((unsigned long)vsie_page);
}
kvm->arch.vsie.page_count = 0;
mutex_unlock(&kvm->arch.vsie.mutex);