From 72e1ad4200d5ed5c5adf120b77fb2900a29a48e5 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 19 Sep 2017 12:34:06 +0200
Subject: KVM: s390: document memory ordering for kvm_s390_vcpu_wakeup

swait_active does not enforce any ordering and it can therefore trigger
some subtle races when the CPU moves the read for the check before a
previous store and that store is then used on another CPU that is
preparing the swait.

On s390 there is a call to swait_active in kvm_s390_vcpu_wakeup. The
good thing is, on s390 all potential races cannot happen because all
callers of kvm_s390_vcpu_wakeup do not store (no race) or use an atomic
operation, which handles memory ordering. Since this is not guaranteed
by the Linux semantics (but by the implementation on s390) let's add
smp_mb_after_atomic to make this obvious and document the ordering.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/interrupt.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch/s390')

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index a832ad031cee..23d8fb25c5dd 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1074,6 +1074,12 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
 	 * in kvm_vcpu_block without having the waitqueue set (polling)
 	 */
 	vcpu->valid_wakeup = true;
+	/*
+	 * This is mostly to document, that the read in swait_active could
+	 * be moved before other stores, leading to subtle races.
+	 * All current users do not store or use an atomic like update
+	 */
+	smp_mb__after_atomic();
 	if (swait_active(&vcpu->wq)) {
 		/*
 		 * The vcpu gave up the cpu voluntarily, mark it as a good
-- 
cgit 


From ba850a8e64fbbd5f6407d5931124d00ced0528cc Mon Sep 17 00:00:00 2001
From: Tony Krowiak <akrowiak@linux.vnet.ibm.com>
Date: Fri, 13 Oct 2017 13:38:46 -0400
Subject: KVM: s390: SIE considerations for AP Queue virtualization

The Crypto Control Block (CRYCB) is referenced by the SIE state
description and controls KVM guest access to the Adjunct
Processor (AP) adapters, usage domains and control domains.
This patch defines the AP control blocks to be used for
controlling guest access to the AP adapters and domains.

Signed-off-by: Tony Krowiak <akrowiak@linux.vnet.ibm.com>
Message-Id: <1507916344-3896-2-git-send-email-akrowiak@linux.vnet.ibm.com>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/kvm_host.h | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index fd006a272024..f3a9b5a445b6 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -685,11 +685,28 @@ struct kvm_s390_crypto {
 	__u8 dea_kw;
 };
 
+#define APCB0_MASK_SIZE 1
+struct kvm_s390_apcb0 {
+	__u64 apm[APCB0_MASK_SIZE];		/* 0x0000 */
+	__u64 aqm[APCB0_MASK_SIZE];		/* 0x0008 */
+	__u64 adm[APCB0_MASK_SIZE];		/* 0x0010 */
+	__u64 reserved18;			/* 0x0018 */
+};
+
+#define APCB1_MASK_SIZE 4
+struct kvm_s390_apcb1 {
+	__u64 apm[APCB1_MASK_SIZE];		/* 0x0000 */
+	__u64 aqm[APCB1_MASK_SIZE];		/* 0x0020 */
+	__u64 adm[APCB1_MASK_SIZE];		/* 0x0040 */
+	__u64 reserved60[4];			/* 0x0060 */
+};
+
 struct kvm_s390_crypto_cb {
-	__u8    reserved00[72];                 /* 0x0000 */
-	__u8    dea_wrapping_key_mask[24];      /* 0x0048 */
-	__u8    aes_wrapping_key_mask[32];      /* 0x0060 */
-	__u8    reserved80[128];                /* 0x0080 */
+	struct kvm_s390_apcb0 apcb0;		/* 0x0000 */
+	__u8   reserved20[0x0048 - 0x0020];	/* 0x0020 */
+	__u8   dea_wrapping_key_mask[24];	/* 0x0048 */
+	__u8   aes_wrapping_key_mask[32];	/* 0x0060 */
+	struct kvm_s390_apcb1 apcb1;		/* 0x0080 */
 };
 
 /*
-- 
cgit 


From f7a6509fe002e3909cb41c09e807b7f3ca4a361b Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 1 Sep 2017 17:11:43 +0200
Subject: KVM: s390: vsie: use common code functions for pinning

We will not see -ENOMEM (gfn_to_hva() will return KVM_ERR_PTR_BAD_PAGE
for all errors). So we can also get rid of special handling in the
callers of pin_guest_page() and always assume that it is a g2 error.

As also kvm_s390_inject_program_int() should never fail, we can
simplify pin_scb(), too.

Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20170901151143.22714-1-david@redhat.com>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/vsie.c | 50 ++++++++++++++++++--------------------------------
 1 file changed, 18 insertions(+), 32 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index b18b5652e5c5..a311938b63b3 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -443,22 +443,14 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  *
  * Returns: - 0 on success
  *          - -EINVAL if the gpa is not valid guest storage
- *          - -ENOMEM if out of memory
  */
 static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
 {
 	struct page *page;
-	hva_t hva;
-	int rc;
 
-	hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
-	if (kvm_is_error_hva(hva))
+	page = gfn_to_page(kvm, gpa_to_gfn(gpa));
+	if (is_error_page(page))
 		return -EINVAL;
-	rc = get_user_pages_fast(hva, 1, 1, &page);
-	if (rc < 0)
-		return rc;
-	else if (rc != 1)
-		return -ENOMEM;
 	*hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK);
 	return 0;
 }
@@ -466,11 +458,7 @@ static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
 /* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
 static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
 {
-	struct page *page;
-
-	page = virt_to_page(hpa);
-	set_page_dirty_lock(page);
-	put_page(page);
+	kvm_release_pfn_dirty(hpa >> PAGE_SHIFT);
 	/* mark the page always as dirty for migration */
 	mark_page_dirty(kvm, gpa_to_gfn(gpa));
 }
@@ -557,7 +545,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 			rc = set_validity_icpt(scb_s, 0x003bU);
 		if (!rc) {
 			rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
-			if (rc == -EINVAL)
+			if (rc)
 				rc = set_validity_icpt(scb_s, 0x0034U);
 		}
 		if (rc)
@@ -574,10 +562,10 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 		}
 		/* 256 bytes cannot cross page boundaries */
 		rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
-		if (rc == -EINVAL)
+		if (rc) {
 			rc = set_validity_icpt(scb_s, 0x0080U);
-		if (rc)
 			goto unpin;
+		}
 		scb_s->itdba = hpa;
 	}
 
@@ -592,10 +580,10 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 		 * if this block gets bigger, we have to shadow it.
 		 */
 		rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
-		if (rc == -EINVAL)
+		if (rc) {
 			rc = set_validity_icpt(scb_s, 0x1310U);
-		if (rc)
 			goto unpin;
+		}
 		scb_s->gvrd = hpa;
 	}
 
@@ -607,11 +595,11 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 		}
 		/* 64 bytes cannot cross page boundaries */
 		rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
-		if (rc == -EINVAL)
+		if (rc) {
 			rc = set_validity_icpt(scb_s, 0x0043U);
-		/* Validity 0x0044 will be checked by SIE */
-		if (rc)
 			goto unpin;
+		}
+		/* Validity 0x0044 will be checked by SIE */
 		scb_s->riccbd = hpa;
 	}
 	if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
@@ -635,10 +623,10 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 		 * cross page boundaries
 		 */
 		rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
-		if (rc == -EINVAL)
+		if (rc) {
 			rc = set_validity_icpt(scb_s, 0x10b0U);
-		if (rc)
 			goto unpin;
+		}
 		scb_s->sdnxo = hpa | sdnxc;
 	}
 	return 0;
@@ -663,7 +651,6 @@ static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
  *
  * Returns: - 0 if the scb was pinned.
  *          - > 0 if control has to be given to guest 2
- *          - -ENOMEM if out of memory
  */
 static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
 		   gpa_t gpa)
@@ -672,14 +659,13 @@ static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
 	int rc;
 
 	rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
-	if (rc == -EINVAL) {
+	if (rc) {
 		rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-		if (!rc)
-			rc = 1;
+		WARN_ON_ONCE(rc);
+		return 1;
 	}
-	if (!rc)
-		vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa;
-	return rc;
+	vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa;
+	return 0;
 }
 
 /*
-- 
cgit 


From ee739f4b216e9394281cf99e6d93c67bdf4a37d2 Mon Sep 17 00:00:00 2001
From: Michael Mueller <mimu@linux.vnet.ibm.com>
Date: Mon, 3 Jul 2017 15:32:50 +0200
Subject: KVM: s390: abstract conversion between isc and enum irq_types

The abstraction of the conversion between an isc value and an irq_type
by means of functions isc_to_irq_type() and irq_type_to_isc() allows
to clarify the respective operations where used.

Signed-off-by: Michael Mueller <mimu@linux.vnet.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/interrupt.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 23d8fb25c5dd..a3da4f3065aa 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -213,6 +213,16 @@ static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
 	       vcpu->arch.local_int.pending_irqs;
 }
 
+static inline int isc_to_irq_type(unsigned long isc)
+{
+	return IRQ_PEND_IO_ISC_0 + isc;
+}
+
+static inline int irq_type_to_isc(unsigned long irq_type)
+{
+	return irq_type - IRQ_PEND_IO_ISC_0;
+}
+
 static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
 				   unsigned long active_mask)
 {
@@ -220,7 +230,7 @@ static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
 
 	for (i = 0; i <= MAX_ISC; i++)
 		if (!(vcpu->arch.sie_block->gcr[6] & isc_to_isc_bits(i)))
-			active_mask &= ~(1UL << (IRQ_PEND_IO_ISC_0 + i));
+			active_mask &= ~(1UL << (isc_to_irq_type(i)));
 
 	return active_mask;
 }
@@ -901,7 +911,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
 	fi = &vcpu->kvm->arch.float_int;
 
 	spin_lock(&fi->lock);
-	isc_list = &fi->lists[irq_type - IRQ_PEND_IO_ISC_0];
+	isc_list = &fi->lists[irq_type_to_isc(irq_type)];
 	inti = list_first_entry_or_null(isc_list,
 					struct kvm_s390_interrupt_info,
 					list);
@@ -1401,7 +1411,7 @@ static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm,
 		list_del_init(&iter->list);
 		fi->counters[FIRQ_CNTR_IO] -= 1;
 		if (list_empty(isc_list))
-			clear_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs);
+			clear_bit(isc_to_irq_type(isc), &fi->pending_irqs);
 		spin_unlock(&fi->lock);
 		return iter;
 	}
@@ -1528,7 +1538,7 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 	isc = int_word_to_isc(inti->io.io_int_word);
 	list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
 	list_add_tail(&inti->list, list);
-	set_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs);
+	set_bit(isc_to_irq_type(isc), &fi->pending_irqs);
 	spin_unlock(&fi->lock);
 	return 0;
 }
-- 
cgit 


From 4dd6f17eb913d3d23dd6c07950627ac2c3068dca Mon Sep 17 00:00:00 2001
From: Michael Mueller <mimu@linux.vnet.ibm.com>
Date: Thu, 6 Jul 2017 14:22:20 +0200
Subject: KVM: s390: clear_io_irq() requests are not expected for adapter
 interrupts

There is a chance to delete not yet delivered I/O interrupts if an
exploiter uses the subsystem identification word 0x0000 while
processing a KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl. -EINVAL will be returned
now instead in that case.

Classic interrupts will always have bit 0x10000 set in the schid while
adapter interrupts have a zero schid. The clear_io_irq interface is
only useful for classic interrupts (as adapter interrupts belong to
many devices). Let's make this interface more strict and forbid a schid
of 0.

Signed-off-by: Michael Mueller <mimu@linux.vnet.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/interrupt.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/s390')

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index a3da4f3065aa..c8aacced23fb 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2191,6 +2191,8 @@ static int clear_io_irq(struct kvm *kvm, struct kvm_device_attr *attr)
 		return -EINVAL;
 	if (copy_from_user(&schid, (void __user *) attr->addr, sizeof(schid)))
 		return -EFAULT;
+	if (!schid)
+		return -EINVAL;
 	kfree(kvm_s390_get_io_int(kvm, isc_mask, schid));
 	/*
 	 * If userspace is conforming to the architecture, we can have at most
-- 
cgit 


From da9a1446d248f673a8560ce46251ff620214ab7b Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 9 Nov 2017 10:00:45 +0100
Subject: KVM: s390: provide a capability for AIS state migration

The AIS capability was introduced in 4.12, while the interface to
migrate the state was added in 4.13. Unfortunately it is not possible
for userspace to detect the migration capability without creating a flic
kvm device. As in QEMU the cpu model detection runs on the "none"
machine this will result in cpu model issues regarding the "ais"
capability.

To get the "ais" capability properly let's add a new KVM capability that
tells userspace that AIS states can be migrated.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: Halil Pasic <pasic@linux.vnet.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/s390')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index de6a5b790da0..8f4b655f65d7 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -395,6 +395,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_S390_USER_INSTR0:
 	case KVM_CAP_S390_CMMA_MIGRATION:
 	case KVM_CAP_S390_AIS:
+	case KVM_CAP_S390_AIS_MIGRATION:
 		r = 1;
 		break;
 	case KVM_CAP_S390_MEM_OP:
-- 
cgit