summaryrefslogtreecommitdiff
path: root/arch/s390/kvm/gaccess.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/kvm/gaccess.c')
-rw-r--r--arch/s390/kvm/gaccess.c989
1 files changed, 645 insertions, 344 deletions
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 07d30ffcfa41..21c2e61fece4 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -9,170 +9,16 @@
#include <linux/vmalloc.h>
#include <linux/mm_types.h>
#include <linux/err.h>
-
-#include <asm/pgtable.h>
+#include <linux/pgtable.h>
+#include <linux/bitfield.h>
+#include <asm/access-regs.h>
+#include <asm/fault.h>
#include <asm/gmap.h>
+#include <asm/dat-bits.h>
#include "kvm-s390.h"
#include "gaccess.h"
-#include <asm/switch_to.h>
-
-union asce {
- unsigned long val;
- struct {
- unsigned long origin : 52; /* Region- or Segment-Table Origin */
- unsigned long : 2;
- unsigned long g : 1; /* Subspace Group Control */
- unsigned long p : 1; /* Private Space Control */
- unsigned long s : 1; /* Storage-Alteration-Event Control */
- unsigned long x : 1; /* Space-Switch-Event Control */
- unsigned long r : 1; /* Real-Space Control */
- unsigned long : 1;
- unsigned long dt : 2; /* Designation-Type Control */
- unsigned long tl : 2; /* Region- or Segment-Table Length */
- };
-};
-
-enum {
- ASCE_TYPE_SEGMENT = 0,
- ASCE_TYPE_REGION3 = 1,
- ASCE_TYPE_REGION2 = 2,
- ASCE_TYPE_REGION1 = 3
-};
-
-union region1_table_entry {
- unsigned long val;
- struct {
- unsigned long rto: 52;/* Region-Table Origin */
- unsigned long : 2;
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long : 1;
- unsigned long tf : 2; /* Region-Second-Table Offset */
- unsigned long i : 1; /* Region-Invalid Bit */
- unsigned long : 1;
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long tl : 2; /* Region-Second-Table Length */
- };
-};
-
-union region2_table_entry {
- unsigned long val;
- struct {
- unsigned long rto: 52;/* Region-Table Origin */
- unsigned long : 2;
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long : 1;
- unsigned long tf : 2; /* Region-Third-Table Offset */
- unsigned long i : 1; /* Region-Invalid Bit */
- unsigned long : 1;
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long tl : 2; /* Region-Third-Table Length */
- };
-};
-
-struct region3_table_entry_fc0 {
- unsigned long sto: 52;/* Segment-Table Origin */
- unsigned long : 1;
- unsigned long fc : 1; /* Format-Control */
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long : 1;
- unsigned long tf : 2; /* Segment-Table Offset */
- unsigned long i : 1; /* Region-Invalid Bit */
- unsigned long cr : 1; /* Common-Region Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long tl : 2; /* Segment-Table Length */
-};
-
-struct region3_table_entry_fc1 {
- unsigned long rfaa : 33; /* Region-Frame Absolute Address */
- unsigned long : 14;
- unsigned long av : 1; /* ACCF-Validity Control */
- unsigned long acc: 4; /* Access-Control Bits */
- unsigned long f : 1; /* Fetch-Protection Bit */
- unsigned long fc : 1; /* Format-Control */
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long iep: 1; /* Instruction-Execution-Protection */
- unsigned long : 2;
- unsigned long i : 1; /* Region-Invalid Bit */
- unsigned long cr : 1; /* Common-Region Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long : 2;
-};
-
-union region3_table_entry {
- unsigned long val;
- struct region3_table_entry_fc0 fc0;
- struct region3_table_entry_fc1 fc1;
- struct {
- unsigned long : 53;
- unsigned long fc : 1; /* Format-Control */
- unsigned long : 4;
- unsigned long i : 1; /* Region-Invalid Bit */
- unsigned long cr : 1; /* Common-Region Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long : 2;
- };
-};
-
-struct segment_entry_fc0 {
- unsigned long pto: 53;/* Page-Table Origin */
- unsigned long fc : 1; /* Format-Control */
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long : 3;
- unsigned long i : 1; /* Segment-Invalid Bit */
- unsigned long cs : 1; /* Common-Segment Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long : 2;
-};
-
-struct segment_entry_fc1 {
- unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
- unsigned long : 3;
- unsigned long av : 1; /* ACCF-Validity Control */
- unsigned long acc: 4; /* Access-Control Bits */
- unsigned long f : 1; /* Fetch-Protection Bit */
- unsigned long fc : 1; /* Format-Control */
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long iep: 1; /* Instruction-Execution-Protection */
- unsigned long : 2;
- unsigned long i : 1; /* Segment-Invalid Bit */
- unsigned long cs : 1; /* Common-Segment Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long : 2;
-};
-
-union segment_table_entry {
- unsigned long val;
- struct segment_entry_fc0 fc0;
- struct segment_entry_fc1 fc1;
- struct {
- unsigned long : 53;
- unsigned long fc : 1; /* Format-Control */
- unsigned long : 4;
- unsigned long i : 1; /* Segment-Invalid Bit */
- unsigned long cs : 1; /* Common-Segment Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long : 2;
- };
-};
-
-enum {
- TABLE_TYPE_SEGMENT = 0,
- TABLE_TYPE_REGION3 = 1,
- TABLE_TYPE_REGION2 = 2,
- TABLE_TYPE_REGION1 = 3
-};
-union page_table_entry {
- unsigned long val;
- struct {
- unsigned long pfra : 52; /* Page-Frame Real Address */
- unsigned long z : 1; /* Zero Bit */
- unsigned long i : 1; /* Page-Invalid Bit */
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long iep: 1; /* Instruction-Execution-Protection */
- unsigned long : 8;
- };
-};
+#define GMAP_SHADOW_FAKE_TABLE 1ULL
/*
* vaddress union in order to easily decode a virtual address into its
@@ -261,119 +107,119 @@ struct aste {
/* .. more fields there */
};
-int ipte_lock_held(struct kvm_vcpu *vcpu)
+int ipte_lock_held(struct kvm *kvm)
{
- if (vcpu->arch.sie_block->eca & ECA_SII) {
+ if (sclp.has_siif) {
int rc;
- read_lock(&vcpu->kvm->arch.sca_lock);
- rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0;
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ read_lock(&kvm->arch.sca_lock);
+ rc = kvm_s390_get_ipte_control(kvm)->kh != 0;
+ read_unlock(&kvm->arch.sca_lock);
return rc;
}
- return vcpu->kvm->arch.ipte_lock_count != 0;
+ return kvm->arch.ipte_lock_count != 0;
}
-static void ipte_lock_simple(struct kvm_vcpu *vcpu)
+static void ipte_lock_simple(struct kvm *kvm)
{
union ipte_control old, new, *ic;
- mutex_lock(&vcpu->kvm->arch.ipte_mutex);
- vcpu->kvm->arch.ipte_lock_count++;
- if (vcpu->kvm->arch.ipte_lock_count > 1)
+ mutex_lock(&kvm->arch.ipte_mutex);
+ kvm->arch.ipte_lock_count++;
+ if (kvm->arch.ipte_lock_count > 1)
goto out;
retry:
- read_lock(&vcpu->kvm->arch.sca_lock);
- ic = kvm_s390_get_ipte_control(vcpu->kvm);
+ read_lock(&kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(kvm);
+ old = READ_ONCE(*ic);
do {
- old = READ_ONCE(*ic);
if (old.k) {
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ read_unlock(&kvm->arch.sca_lock);
cond_resched();
goto retry;
}
new = old;
new.k = 1;
- } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ } while (!try_cmpxchg(&ic->val, &old.val, new.val));
+ read_unlock(&kvm->arch.sca_lock);
out:
- mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
+ mutex_unlock(&kvm->arch.ipte_mutex);
}
-static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
+static void ipte_unlock_simple(struct kvm *kvm)
{
union ipte_control old, new, *ic;
- mutex_lock(&vcpu->kvm->arch.ipte_mutex);
- vcpu->kvm->arch.ipte_lock_count--;
- if (vcpu->kvm->arch.ipte_lock_count)
+ mutex_lock(&kvm->arch.ipte_mutex);
+ kvm->arch.ipte_lock_count--;
+ if (kvm->arch.ipte_lock_count)
goto out;
- read_lock(&vcpu->kvm->arch.sca_lock);
- ic = kvm_s390_get_ipte_control(vcpu->kvm);
+ read_lock(&kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(kvm);
+ old = READ_ONCE(*ic);
do {
- old = READ_ONCE(*ic);
new = old;
new.k = 0;
- } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- read_unlock(&vcpu->kvm->arch.sca_lock);
- wake_up(&vcpu->kvm->arch.ipte_wq);
+ } while (!try_cmpxchg(&ic->val, &old.val, new.val));
+ read_unlock(&kvm->arch.sca_lock);
+ wake_up(&kvm->arch.ipte_wq);
out:
- mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
+ mutex_unlock(&kvm->arch.ipte_mutex);
}
-static void ipte_lock_siif(struct kvm_vcpu *vcpu)
+static void ipte_lock_siif(struct kvm *kvm)
{
union ipte_control old, new, *ic;
retry:
- read_lock(&vcpu->kvm->arch.sca_lock);
- ic = kvm_s390_get_ipte_control(vcpu->kvm);
+ read_lock(&kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(kvm);
+ old = READ_ONCE(*ic);
do {
- old = READ_ONCE(*ic);
if (old.kg) {
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ read_unlock(&kvm->arch.sca_lock);
cond_resched();
goto retry;
}
new = old;
new.k = 1;
new.kh++;
- } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ } while (!try_cmpxchg(&ic->val, &old.val, new.val));
+ read_unlock(&kvm->arch.sca_lock);
}
-static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
+static void ipte_unlock_siif(struct kvm *kvm)
{
union ipte_control old, new, *ic;
- read_lock(&vcpu->kvm->arch.sca_lock);
- ic = kvm_s390_get_ipte_control(vcpu->kvm);
+ read_lock(&kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(kvm);
+ old = READ_ONCE(*ic);
do {
- old = READ_ONCE(*ic);
new = old;
new.kh--;
if (!new.kh)
new.k = 0;
- } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ } while (!try_cmpxchg(&ic->val, &old.val, new.val));
+ read_unlock(&kvm->arch.sca_lock);
if (!new.kh)
- wake_up(&vcpu->kvm->arch.ipte_wq);
+ wake_up(&kvm->arch.ipte_wq);
}
-void ipte_lock(struct kvm_vcpu *vcpu)
+void ipte_lock(struct kvm *kvm)
{
- if (vcpu->arch.sie_block->eca & ECA_SII)
- ipte_lock_siif(vcpu);
+ if (sclp.has_siif)
+ ipte_lock_siif(kvm);
else
- ipte_lock_simple(vcpu);
+ ipte_lock_simple(kvm);
}
-void ipte_unlock(struct kvm_vcpu *vcpu)
+void ipte_unlock(struct kvm *kvm)
{
- if (vcpu->arch.sie_block->eca & ECA_SII)
- ipte_unlock_siif(vcpu);
+ if (sclp.has_siif)
+ ipte_unlock_siif(kvm);
else
- ipte_unlock_simple(vcpu);
+ ipte_unlock_simple(kvm);
}
static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
@@ -390,7 +236,8 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
if (ar >= NUM_ACRS)
return -EINVAL;
- save_access_regs(vcpu->run->s.regs.acrs);
+ if (vcpu->arch.acrs_loaded)
+ save_access_regs(vcpu->run->s.regs.acrs);
alet.val = vcpu->run->s.regs.acrs[ar];
if (ar == 0 || alet.val == 0) {
@@ -465,61 +312,55 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
return 0;
}
-struct trans_exc_code_bits {
- unsigned long addr : 52; /* Translation-exception Address */
- unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */
- unsigned long : 2;
- unsigned long b56 : 1;
- unsigned long : 3;
- unsigned long b60 : 1;
- unsigned long b61 : 1;
- unsigned long as : 2; /* ASCE Identifier */
-};
-
-enum {
- FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
- FSI_STORE = 1, /* Exception was due to store operation */
- FSI_FETCH = 2 /* Exception was due to fetch operation */
-};
-
enum prot_type {
PROT_TYPE_LA = 0,
PROT_TYPE_KEYC = 1,
PROT_TYPE_ALC = 2,
PROT_TYPE_DAT = 3,
PROT_TYPE_IEP = 4,
+ /* Dummy value for passing an initialized value when code != PGM_PROTECTION */
+ PROT_TYPE_DUMMY,
};
-static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
- u8 ar, enum gacc_mode mode, enum prot_type prot)
+static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
+ enum gacc_mode mode, enum prot_type prot, bool terminate)
{
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
- struct trans_exc_code_bits *tec;
+ union teid *teid;
memset(pgm, 0, sizeof(*pgm));
pgm->code = code;
- tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+ teid = (union teid *)&pgm->trans_exc_code;
switch (code) {
case PGM_PROTECTION:
switch (prot) {
+ case PROT_TYPE_DUMMY:
+ /* We should never get here, acts like termination */
+ WARN_ON_ONCE(1);
+ break;
case PROT_TYPE_IEP:
- tec->b61 = 1;
- /* FALL THROUGH */
+ teid->b61 = 1;
+ fallthrough;
case PROT_TYPE_LA:
- tec->b56 = 1;
+ teid->b56 = 1;
break;
case PROT_TYPE_KEYC:
- tec->b60 = 1;
+ teid->b60 = 1;
break;
case PROT_TYPE_ALC:
- tec->b60 = 1;
- /* FALL THROUGH */
+ teid->b60 = 1;
+ fallthrough;
case PROT_TYPE_DAT:
- tec->b61 = 1;
+ teid->b61 = 1;
break;
}
- /* FALL THROUGH */
+ if (terminate) {
+ teid->b56 = 0;
+ teid->b60 = 0;
+ teid->b61 = 0;
+ }
+ fallthrough;
case PGM_ASCE_TYPE:
case PGM_PAGE_TRANSLATION:
case PGM_REGION_FIRST_TRANS:
@@ -531,10 +372,10 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
* exc_access_id has to be set to 0 for some instructions. Both
* cases have to be handled by the caller.
*/
- tec->addr = gva >> PAGE_SHIFT;
- tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
- tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
- /* FALL THROUGH */
+ teid->addr = gva >> PAGE_SHIFT;
+ teid->fsi = mode == GACC_STORE ? TEID_FSI_STORE : TEID_FSI_FETCH;
+ teid->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
+ fallthrough;
case PGM_ALEN_TRANSLATION:
case PGM_ALE_SEQUENCE:
case PGM_ASTE_VALIDITY:
@@ -551,6 +392,12 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
return code;
}
+static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
+ enum gacc_mode mode, enum prot_type prot)
+{
+ return trans_exc_ending(vcpu, code, gva, ar, mode, prot, false);
+}
+
static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
unsigned long ga, u8 ar, enum gacc_mode mode)
{
@@ -607,7 +454,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
* Returns: - zero on success; @gpa contains the resulting absolute address
* - a negative value if guest access failed due to e.g. broken
* guest mapping
- * - a positve value if an access exception happened. In this case
+ * - a positive value if an access exception happened. In this case
* the returned value is the program interruption code as defined
* by the architecture
*/
@@ -630,7 +477,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
if (asce.r)
goto real_address;
- ptr = asce.origin * PAGE_SIZE;
+ ptr = asce.rsto * PAGE_SIZE;
switch (asce.dt) {
case ASCE_TYPE_REGION1:
if (vaddr.rfx01 > asce.tl)
@@ -663,7 +510,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
case ASCE_TYPE_REGION1: {
union region1_table_entry rfte;
- if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
return PGM_ADDRESSING;
if (deref_table(vcpu->kvm, ptr, &rfte.val))
return -EFAULT;
@@ -677,11 +524,11 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
dat_protection |= rfte.p;
ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8;
}
- /* fallthrough */
+ fallthrough;
case ASCE_TYPE_REGION2: {
union region2_table_entry rste;
- if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
return PGM_ADDRESSING;
if (deref_table(vcpu->kvm, ptr, &rste.val))
return -EFAULT;
@@ -695,11 +542,11 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
dat_protection |= rste.p;
ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8;
}
- /* fallthrough */
+ fallthrough;
case ASCE_TYPE_REGION3: {
union region3_table_entry rtte;
- if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
return PGM_ADDRESSING;
if (deref_table(vcpu->kvm, ptr, &rtte.val))
return -EFAULT;
@@ -723,11 +570,11 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
dat_protection |= rtte.fc0.p;
ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8;
}
- /* fallthrough */
+ fallthrough;
case ASCE_TYPE_SEGMENT: {
union segment_table_entry ste;
- if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
return PGM_ADDRESSING;
if (deref_table(vcpu->kvm, ptr, &ste.val))
return -EFAULT;
@@ -747,7 +594,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8;
}
}
- if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
return PGM_ADDRESSING;
if (deref_table(vcpu->kvm, ptr, &pte.val))
return -EFAULT;
@@ -769,7 +616,7 @@ absolute_address:
*prot = PROT_TYPE_IEP;
return PGM_PROTECTION;
}
- if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
+ if (!kvm_is_gpa_in_memslot(vcpu->kvm, raddr.addr))
return PGM_ADDRESSING;
*gpa = raddr.addr;
return 0;
@@ -794,48 +641,272 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
return 1;
}
-static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
- unsigned long *pages, unsigned long nr_pages,
- const union asce asce, enum gacc_mode mode)
+static int vm_check_access_key(struct kvm *kvm, u8 access_key,
+ enum gacc_mode mode, gpa_t gpa)
+{
+ u8 storage_key, access_control;
+ bool fetch_protected;
+ unsigned long hva;
+ int r;
+
+ if (access_key == 0)
+ return 0;
+
+ hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
+ if (kvm_is_error_hva(hva))
+ return PGM_ADDRESSING;
+
+ mmap_read_lock(current->mm);
+ r = get_guest_storage_key(current->mm, hva, &storage_key);
+ mmap_read_unlock(current->mm);
+ if (r)
+ return r;
+ access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
+ if (access_control == access_key)
+ return 0;
+ fetch_protected = storage_key & _PAGE_FP_BIT;
+ if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !fetch_protected)
+ return 0;
+ return PGM_PROTECTION;
+}
+
+static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode,
+ union asce asce)
+{
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ unsigned long override;
+
+ if (mode == GACC_FETCH || mode == GACC_IFETCH) {
+ /* check if fetch protection override enabled */
+ override = vcpu->arch.sie_block->gcr[0];
+ override &= CR0_FETCH_PROTECTION_OVERRIDE;
+ /* not applicable if subject to DAT && private space */
+ override = override && !(psw_bits(*psw).dat && asce.p);
+ return override;
+ }
+ return false;
+}
+
+static bool fetch_prot_override_applies(unsigned long ga, unsigned int len)
+{
+ return ga < 2048 && ga + len <= 2048;
+}
+
+static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu)
+{
+ /* check if storage protection override enabled */
+ return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE;
+}
+
+static bool storage_prot_override_applies(u8 access_control)
+{
+ /* matches special storage protection override key (9) -> allow */
+ return access_control == PAGE_SPO_ACC;
+}
+
+static int vcpu_check_access_key(struct kvm_vcpu *vcpu, u8 access_key,
+ enum gacc_mode mode, union asce asce, gpa_t gpa,
+ unsigned long ga, unsigned int len)
+{
+ u8 storage_key, access_control;
+ unsigned long hva;
+ int r;
+
+ /* access key 0 matches any storage key -> allow */
+ if (access_key == 0)
+ return 0;
+ /*
+ * caller needs to ensure that gfn is accessible, so we can
+ * assume that this cannot fail
+ */
+ hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa));
+ mmap_read_lock(current->mm);
+ r = get_guest_storage_key(current->mm, hva, &storage_key);
+ mmap_read_unlock(current->mm);
+ if (r)
+ return r;
+ access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
+ /* access key matches storage key -> allow */
+ if (access_control == access_key)
+ return 0;
+ if (mode == GACC_FETCH || mode == GACC_IFETCH) {
+ /* it is a fetch and fetch protection is off -> allow */
+ if (!(storage_key & _PAGE_FP_BIT))
+ return 0;
+ if (fetch_prot_override_applicable(vcpu, mode, asce) &&
+ fetch_prot_override_applies(ga, len))
+ return 0;
+ }
+ if (storage_prot_override_applicable(vcpu) &&
+ storage_prot_override_applies(access_control))
+ return 0;
+ return PGM_PROTECTION;
+}
+
+/**
+ * guest_range_to_gpas() - Calculate guest physical addresses of page fragments
+ * covering a logical range
+ * @vcpu: virtual cpu
+ * @ga: guest address, start of range
+ * @ar: access register
+ * @gpas: output argument, may be NULL
+ * @len: length of range in bytes
+ * @asce: address-space-control element to use for translation
+ * @mode: access mode
+ * @access_key: access key to mach the range's storage keys against
+ *
+ * Translate a logical range to a series of guest absolute addresses,
+ * such that the concatenation of page fragments starting at each gpa make up
+ * the whole range.
+ * The translation is performed as if done by the cpu for the given @asce, @ar,
+ * @mode and state of the @vcpu.
+ * If the translation causes an exception, its program interruption code is
+ * returned and the &struct kvm_s390_pgm_info pgm member of @vcpu is modified
+ * such that a subsequent call to kvm_s390_inject_prog_vcpu() will inject
+ * a correct exception into the guest.
+ * The resulting gpas are stored into @gpas, unless it is NULL.
+ *
+ * Note: All fragments except the first one start at the beginning of a page.
+ * When deriving the boundaries of a fragment from a gpa, all but the last
+ * fragment end at the end of the page.
+ *
+ * Return:
+ * * 0 - success
+ * * <0 - translation could not be performed, for example if guest
+ * memory could not be accessed
+ * * >0 - an access exception occurred. In this case the returned value
+ * is the program interruption code and the contents of pgm may
+ * be used to inject an exception into the guest.
+ */
+static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+ unsigned long *gpas, unsigned long len,
+ const union asce asce, enum gacc_mode mode,
+ u8 access_key)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ unsigned int offset = offset_in_page(ga);
+ unsigned int fragment_len;
int lap_enabled, rc = 0;
enum prot_type prot;
+ unsigned long gpa;
lap_enabled = low_address_protection_enabled(vcpu, asce);
- while (nr_pages) {
+ while (min(PAGE_SIZE - offset, len) > 0) {
+ fragment_len = min(PAGE_SIZE - offset, len);
ga = kvm_s390_logical_to_effective(vcpu, ga);
if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
PROT_TYPE_LA);
- ga &= PAGE_MASK;
if (psw_bits(*psw).dat) {
- rc = guest_translate(vcpu, ga, pages, asce, mode, &prot);
+ rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot);
if (rc < 0)
return rc;
} else {
- *pages = kvm_s390_real_to_abs(vcpu, ga);
- if (kvm_is_error_gpa(vcpu->kvm, *pages))
+ gpa = kvm_s390_real_to_abs(vcpu, ga);
+ if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) {
rc = PGM_ADDRESSING;
+ prot = PROT_TYPE_DUMMY;
+ }
}
if (rc)
return trans_exc(vcpu, rc, ga, ar, mode, prot);
- ga += PAGE_SIZE;
- pages++;
- nr_pages--;
+ rc = vcpu_check_access_key(vcpu, access_key, mode, asce, gpa, ga,
+ fragment_len);
+ if (rc)
+ return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_KEYC);
+ if (gpas)
+ *gpas++ = gpa;
+ offset = 0;
+ ga += fragment_len;
+ len -= fragment_len;
+ }
+ return 0;
+}
+
+static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
+ void *data, unsigned int len)
+{
+ const unsigned int offset = offset_in_page(gpa);
+ const gfn_t gfn = gpa_to_gfn(gpa);
+ int rc;
+
+ if (!gfn_to_memslot(kvm, gfn))
+ return PGM_ADDRESSING;
+ if (mode == GACC_STORE)
+ rc = kvm_write_guest_page(kvm, gfn, data, offset, len);
+ else
+ rc = kvm_read_guest_page(kvm, gfn, data, offset, len);
+ return rc;
+}
+
+static int
+access_guest_page_with_key(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
+ void *data, unsigned int len, u8 access_key)
+{
+ struct kvm_memory_slot *slot;
+ bool writable;
+ gfn_t gfn;
+ hva_t hva;
+ int rc;
+
+ gfn = gpa >> PAGE_SHIFT;
+ slot = gfn_to_memslot(kvm, gfn);
+ hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
+
+ if (kvm_is_error_hva(hva))
+ return PGM_ADDRESSING;
+ /*
+ * Check if it's a ro memslot, even tho that can't occur (they're unsupported).
+ * Don't try to actually handle that case.
+ */
+ if (!writable && mode == GACC_STORE)
+ return -EOPNOTSUPP;
+ hva += offset_in_page(gpa);
+ if (mode == GACC_STORE)
+ rc = copy_to_user_key((void __user *)hva, data, len, access_key);
+ else
+ rc = copy_from_user_key(data, (void __user *)hva, len, access_key);
+ if (rc)
+ return PGM_PROTECTION;
+ if (mode == GACC_STORE)
+ mark_page_dirty_in_slot(kvm, slot, gfn);
+ return 0;
+}
+
+int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data,
+ unsigned long len, enum gacc_mode mode, u8 access_key)
+{
+ int offset = offset_in_page(gpa);
+ int fragment_len;
+ int rc;
+
+ while (min(PAGE_SIZE - offset, len) > 0) {
+ fragment_len = min(PAGE_SIZE - offset, len);
+ rc = access_guest_page_with_key(kvm, mode, gpa, data, fragment_len, access_key);
+ if (rc)
+ return rc;
+ offset = 0;
+ len -= fragment_len;
+ data += fragment_len;
+ gpa += fragment_len;
}
return 0;
}
-int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
- unsigned long len, enum gacc_mode mode)
+int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+ void *data, unsigned long len, enum gacc_mode mode,
+ u8 access_key)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
- unsigned long _len, nr_pages, gpa, idx;
- unsigned long pages_array[2];
- unsigned long *pages;
+ unsigned long nr_pages, idx;
+ unsigned long gpa_array[2];
+ unsigned int fragment_len;
+ unsigned long *gpas;
+ enum prot_type prot;
int need_ipte_lock;
union asce asce;
+ bool try_storage_prot_override;
+ bool try_fetch_prot_override;
int rc;
if (!len)
@@ -845,55 +916,201 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
if (rc)
return rc;
nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
- pages = pages_array;
- if (nr_pages > ARRAY_SIZE(pages_array))
- pages = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
- if (!pages)
+ gpas = gpa_array;
+ if (nr_pages > ARRAY_SIZE(gpa_array))
+ gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
+ if (!gpas)
return -ENOMEM;
+ try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce);
+ try_storage_prot_override = storage_prot_override_applicable(vcpu);
need_ipte_lock = psw_bits(*psw).dat && !asce.r;
if (need_ipte_lock)
- ipte_lock(vcpu);
- rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode);
- for (idx = 0; idx < nr_pages && !rc; idx++) {
- gpa = *(pages + idx) + (ga & ~PAGE_MASK);
- _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
- if (mode == GACC_STORE)
- rc = kvm_write_guest(vcpu->kvm, gpa, data, _len);
+ ipte_lock(vcpu->kvm);
+ /*
+ * Since we do the access further down ultimately via a move instruction
+ * that does key checking and returns an error in case of a protection
+ * violation, we don't need to do the check during address translation.
+ * Skip it by passing access key 0, which matches any storage key,
+ * obviating the need for any further checks. As a result the check is
+ * handled entirely in hardware on access, we only need to take care to
+ * forego key protection checking if fetch protection override applies or
+ * retry with the special key 9 in case of storage protection override.
+ */
+ rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0);
+ if (rc)
+ goto out_unlock;
+ for (idx = 0; idx < nr_pages; idx++) {
+ fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len);
+ if (try_fetch_prot_override && fetch_prot_override_applies(ga, fragment_len)) {
+ rc = access_guest_page(vcpu->kvm, mode, gpas[idx],
+ data, fragment_len);
+ } else {
+ rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
+ data, fragment_len, access_key);
+ }
+ if (rc == PGM_PROTECTION && try_storage_prot_override)
+ rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
+ data, fragment_len, PAGE_SPO_ACC);
+ if (rc)
+ break;
+ len -= fragment_len;
+ data += fragment_len;
+ ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len);
+ }
+ if (rc > 0) {
+ bool terminate = (mode == GACC_STORE) && (idx > 0);
+
+ if (rc == PGM_PROTECTION)
+ prot = PROT_TYPE_KEYC;
else
- rc = kvm_read_guest(vcpu->kvm, gpa, data, _len);
- len -= _len;
- ga += _len;
- data += _len;
+ prot = PROT_TYPE_DUMMY;
+ rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate);
}
+out_unlock:
if (need_ipte_lock)
- ipte_unlock(vcpu);
- if (nr_pages > ARRAY_SIZE(pages_array))
- vfree(pages);
+ ipte_unlock(vcpu->kvm);
+ if (nr_pages > ARRAY_SIZE(gpa_array))
+ vfree(gpas);
return rc;
}
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
void *data, unsigned long len, enum gacc_mode mode)
{
- unsigned long _len, gpa;
+ unsigned int fragment_len;
+ unsigned long gpa;
int rc = 0;
while (len && !rc) {
gpa = kvm_s390_real_to_abs(vcpu, gra);
- _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
- if (mode)
- rc = write_guest_abs(vcpu, gpa, data, _len);
- else
- rc = read_guest_abs(vcpu, gpa, data, _len);
- len -= _len;
- gra += _len;
- data += _len;
+ fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len);
+ rc = access_guest_page(vcpu->kvm, mode, gpa, data, fragment_len);
+ len -= fragment_len;
+ gra += fragment_len;
+ data += fragment_len;
}
+ if (rc > 0)
+ vcpu->arch.pgm.code = rc;
return rc;
}
/**
- * guest_translate_address - translate guest logical into guest absolute address
+ * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address.
+ * @kvm: Virtual machine instance.
+ * @gpa: Absolute guest address of the location to be changed.
+ * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a
+ * non power of two will result in failure.
+ * @old_addr: Pointer to old value. If the location at @gpa contains this value,
+ * the exchange will succeed. After calling cmpxchg_guest_abs_with_key()
+ * *@old_addr contains the value at @gpa before the attempt to
+ * exchange the value.
+ * @new: The value to place at @gpa.
+ * @access_key: The access key to use for the guest access.
+ * @success: output value indicating if an exchange occurred.
+ *
+ * Atomically exchange the value at @gpa by @new, if it contains *@old.
+ * Honors storage keys.
+ *
+ * Return: * 0: successful exchange
+ * * >0: a program interruption code indicating the reason cmpxchg could
+ * not be attempted
+ * * -EINVAL: address misaligned or len not power of two
+ * * -EAGAIN: transient failure (len 1 or 2)
+ * * -EOPNOTSUPP: read-only memslot (should never occur)
+ */
+int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len,
+ __uint128_t *old_addr, __uint128_t new,
+ u8 access_key, bool *success)
+{
+ gfn_t gfn = gpa_to_gfn(gpa);
+ struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+ bool writable;
+ hva_t hva;
+ int ret;
+
+ if (!IS_ALIGNED(gpa, len))
+ return -EINVAL;
+
+ hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
+ if (kvm_is_error_hva(hva))
+ return PGM_ADDRESSING;
+ /*
+ * Check if it's a read-only memslot, even though that cannot occur
+ * since those are unsupported.
+ * Don't try to actually handle that case.
+ */
+ if (!writable)
+ return -EOPNOTSUPP;
+
+ hva += offset_in_page(gpa);
+ /*
+ * The cmpxchg_user_key macro depends on the type of "old", so we need
+ * a case for each valid length and get some code duplication as long
+ * as we don't introduce a new macro.
+ */
+ switch (len) {
+ case 1: {
+ u8 old;
+
+ ret = cmpxchg_user_key((u8 __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ case 2: {
+ u16 old;
+
+ ret = cmpxchg_user_key((u16 __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ case 4: {
+ u32 old;
+
+ ret = cmpxchg_user_key((u32 __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ case 8: {
+ u64 old;
+
+ ret = cmpxchg_user_key((u64 __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ case 16: {
+ __uint128_t old;
+
+ ret = cmpxchg_user_key((__uint128_t __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+ if (*success)
+ mark_page_dirty_in_slot(kvm, slot, gfn);
+ /*
+ * Assume that the fault is caused by protection, either key protection
+ * or user page write protection.
+ */
+ if (ret == -EFAULT)
+ ret = PGM_PROTECTION;
+ return ret;
+}
+
+/**
+ * guest_translate_address_with_key - translate guest logical into guest absolute address
+ * @vcpu: virtual cpu
+ * @gva: Guest virtual address
+ * @ar: Access register
+ * @gpa: Guest physical address
+ * @mode: Translation access mode
+ * @access_key: access key to mach the storage key with
*
* Parameter semantics are the same as the ones from guest_translate.
* The memory contents at the guest address are not changed.
@@ -901,11 +1118,10 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
* Note: The IPTE lock is not taken during this function, so the caller
* has to take care of this.
*/
-int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
- unsigned long *gpa, enum gacc_mode mode)
+int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
+ unsigned long *gpa, enum gacc_mode mode,
+ u8 access_key)
{
- psw_t *psw = &vcpu->arch.sie_block->gpsw;
- enum prot_type prot;
union asce asce;
int rc;
@@ -913,49 +1129,62 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
if (rc)
return rc;
- if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
- if (mode == GACC_STORE)
- return trans_exc(vcpu, PGM_PROTECTION, gva, 0,
- mode, PROT_TYPE_LA);
- }
+ return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode,
+ access_key);
+}
- if (psw_bits(*psw).dat && !asce.r) { /* Use DAT? */
- rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot);
- if (rc > 0)
- return trans_exc(vcpu, rc, gva, 0, mode, prot);
- } else {
- *gpa = kvm_s390_real_to_abs(vcpu, gva);
- if (kvm_is_error_gpa(vcpu->kvm, *gpa))
- return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0);
- }
+/**
+ * check_gva_range - test a range of guest virtual addresses for accessibility
+ * @vcpu: virtual cpu
+ * @gva: Guest virtual address
+ * @ar: Access register
+ * @length: Length of test range
+ * @mode: Translation access mode
+ * @access_key: access key to mach the storage keys with
+ */
+int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
+ unsigned long length, enum gacc_mode mode, u8 access_key)
+{
+ union asce asce;
+ int rc = 0;
+
+ rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
+ if (rc)
+ return rc;
+ ipte_lock(vcpu->kvm);
+ rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode,
+ access_key);
+ ipte_unlock(vcpu->kvm);
return rc;
}
/**
- * check_gva_range - test a range of guest virtual addresses for accessibility
+ * check_gpa_range - test a range of guest physical addresses for accessibility
+ * @kvm: virtual machine instance
+ * @gpa: guest physical address
+ * @length: length of test range
+ * @mode: access mode to test, relevant for storage keys
+ * @access_key: access key to mach the storage keys with
*/
-int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
- unsigned long length, enum gacc_mode mode)
+int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length,
+ enum gacc_mode mode, u8 access_key)
{
- unsigned long gpa;
- unsigned long currlen;
+ unsigned int fragment_len;
int rc = 0;
- ipte_lock(vcpu);
- while (length > 0 && !rc) {
- currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
- rc = guest_translate_address(vcpu, gva, ar, &gpa, mode);
- gva += currlen;
- length -= currlen;
+ while (length && !rc) {
+ fragment_len = min(PAGE_SIZE - offset_in_page(gpa), length);
+ rc = vm_check_access_key(kvm, access_key, mode, gpa);
+ length -= fragment_len;
+ gpa += fragment_len;
}
- ipte_unlock(vcpu);
-
return rc;
}
/**
* kvm_s390_check_low_addr_prot_real - check for low-address protection
+ * @vcpu: virtual cpu
* @gra: Guest real address
*
* Checks whether an address is subject to low-address protection and set
@@ -976,13 +1205,17 @@ int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
* kvm_s390_shadow_tables - walk the guest page table and create shadow tables
* @sg: pointer to the shadow guest address space structure
* @saddr: faulting address in the shadow gmap
- * @pgt: pointer to the page table address result
+ * @pgt: pointer to the beginning of the page table for the given address if
+ * successful (return value 0), or to the first invalid DAT entry in
+ * case of exceptions (return value > 0)
+ * @dat_protection: referenced memory is write protected
* @fake: pgt references contiguous guest memory block, not a pgtable
*/
static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
unsigned long *pgt, int *dat_protection,
int *fake)
{
+ struct kvm *kvm;
struct gmap *parent;
union asce asce;
union vaddress vaddr;
@@ -991,10 +1224,11 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
*fake = 0;
*dat_protection = 0;
+ kvm = sg->private;
parent = sg->parent;
vaddr.addr = saddr;
asce.val = sg->orig_asce;
- ptr = asce.origin * PAGE_SIZE;
+ ptr = asce.rsto * PAGE_SIZE;
if (asce.r) {
*fake = 1;
ptr = 0;
@@ -1034,6 +1268,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
rfte.val = ptr;
goto shadow_r2t;
}
+ *pgt = ptr + vaddr.rfx * 8;
rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);
if (rc)
return rc;
@@ -1050,7 +1285,9 @@ shadow_r2t:
rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
if (rc)
return rc;
- } /* fallthrough */
+ kvm->stat.gmap_shadow_r1_entry++;
+ }
+ fallthrough;
case ASCE_TYPE_REGION2: {
union region2_table_entry rste;
@@ -1059,6 +1296,7 @@ shadow_r2t:
rste.val = ptr;
goto shadow_r3t;
}
+ *pgt = ptr + vaddr.rsx * 8;
rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);
if (rc)
return rc;
@@ -1076,7 +1314,9 @@ shadow_r3t:
rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
if (rc)
return rc;
- } /* fallthrough */
+ kvm->stat.gmap_shadow_r2_entry++;
+ }
+ fallthrough;
case ASCE_TYPE_REGION3: {
union region3_table_entry rtte;
@@ -1085,6 +1325,7 @@ shadow_r3t:
rtte.val = ptr;
goto shadow_sgt;
}
+ *pgt = ptr + vaddr.rtx * 8;
rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);
if (rc)
return rc;
@@ -1111,7 +1352,9 @@ shadow_sgt:
rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
if (rc)
return rc;
- } /* fallthrough */
+ kvm->stat.gmap_shadow_r3_entry++;
+ }
+ fallthrough;
case ASCE_TYPE_SEGMENT: {
union segment_table_entry ste;
@@ -1120,6 +1363,7 @@ shadow_sgt:
ste.val = ptr;
goto shadow_pgt;
}
+ *pgt = ptr + vaddr.sx * 8;
rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);
if (rc)
return rc;
@@ -1142,6 +1386,7 @@ shadow_pgt:
rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake);
if (rc)
return rc;
+ kvm->stat.gmap_shadow_sg_entry++;
}
}
/* Return the parent address of the page table */
@@ -1150,10 +1395,50 @@ shadow_pgt:
}
/**
+ * shadow_pgt_lookup() - find a shadow page table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: the address in the shadow aguest address space
+ * @pgt: parent gmap address of the page table to get shadowed
+ * @dat_protection: if the pgtable is marked as protected by dat
+ * @fake: pgt references contiguous guest memory block, not a pgtable
+ *
+ * Returns 0 if the shadow page table was found and -EAGAIN if the page
+ * table was not found.
+ *
+ * Called with sg->mm->mmap_lock in read.
+ */
+static int shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt,
+ int *dat_protection, int *fake)
+{
+ unsigned long pt_index;
+ unsigned long *table;
+ struct page *page;
+ int rc;
+
+ spin_lock(&sg->guest_table_lock);
+ table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
+ if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
+ /* Shadow page tables are full pages (pte+pgste) */
+ page = pfn_to_page(*table >> PAGE_SHIFT);
+ pt_index = gmap_pgste_get_pgt_addr(page_to_virt(page));
+ *pgt = pt_index & ~GMAP_SHADOW_FAKE_TABLE;
+ *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
+ *fake = !!(pt_index & GMAP_SHADOW_FAKE_TABLE);
+ rc = 0;
+ } else {
+ rc = -EAGAIN;
+ }
+ spin_unlock(&sg->guest_table_lock);
+ return rc;
+}
+
+/**
* kvm_s390_shadow_fault - handle fault on a shadow page table
* @vcpu: virtual cpu
* @sg: pointer to the shadow guest address space structure
* @saddr: faulting address in the shadow gmap
+ * @datptr: will contain the address of the faulting DAT table entry, or of
+ * the valid leaf, plus some flags
*
* Returns: - 0 if the shadow fault was successfully resolved
* - > 0 (pgm exception code) on exceptions while faulting
@@ -1162,23 +1447,26 @@ shadow_pgt:
* - -ENOMEM if out of memory
*/
int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
- unsigned long saddr)
+ unsigned long saddr, unsigned long *datptr)
{
union vaddress vaddr;
union page_table_entry pte;
- unsigned long pgt;
+ unsigned long pgt = 0;
int dat_protection, fake;
int rc;
- down_read(&sg->mm->mmap_sem);
+ if (KVM_BUG_ON(!gmap_is_shadow(sg), vcpu->kvm))
+ return -EFAULT;
+
+ mmap_read_lock(sg->mm);
/*
* We don't want any guest-2 tables to change - so the parent
* tables/pointers we read stay valid - unshadowing is however
* always possible - only guest_table_lock protects us.
*/
- ipte_lock(vcpu);
+ ipte_lock(vcpu->kvm);
- rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
+ rc = shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
if (rc)
rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection,
&fake);
@@ -1188,8 +1476,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
pte.val = pgt + vaddr.px * PAGE_SIZE;
goto shadow_page;
}
- if (!rc)
- rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);
+
+ switch (rc) {
+ case PGM_SEGMENT_TRANSLATION:
+ case PGM_REGION_THIRD_TRANS:
+ case PGM_REGION_SECOND_TRANS:
+ case PGM_REGION_FIRST_TRANS:
+ pgt |= PEI_NOT_PTE;
+ break;
+ case 0:
+ pgt += vaddr.px * 8;
+ rc = gmap_read_table(sg->parent, pgt, &pte.val);
+ }
+ if (datptr)
+ *datptr = pgt | dat_protection * PEI_DAT_PROT;
if (!rc && pte.i)
rc = PGM_PAGE_TRANSLATION;
if (!rc && pte.z)
@@ -1198,7 +1498,8 @@ shadow_page:
pte.p |= dat_protection;
if (!rc)
rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
- ipte_unlock(vcpu);
- up_read(&sg->mm->mmap_sem);
+ vcpu->kvm->stat.gmap_shadow_pg_entry++;
+ ipte_unlock(vcpu->kvm);
+ mmap_read_unlock(sg->mm);
return rc;
}