From 72e1ad4200d5ed5c5adf120b77fb2900a29a48e5 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 19 Sep 2017 12:34:06 +0200 Subject: KVM: s390: document memory ordering for kvm_s390_vcpu_wakeup swait_active does not enforce any ordering and it can therefore trigger some subtle races when the CPU moves the read for the check before a previous store and that store is then used on another CPU that is preparing the swait. On s390 there is a call to swait_active in kvm_s390_vcpu_wakeup. The good thing is, on s390 all potential races cannot happen because all callers of kvm_s390_vcpu_wakeup do not store (no race) or use an atomic operation, which handles memory ordering. Since this is not guaranteed by the Linux semantics (but by the implementation on s390) let's add smp_mb_after_atomic to make this obvious and document the ordering. Suggested-by: Paolo Bonzini Acked-by: Halil Pasic Reviewed-by: Cornelia Huck Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index a832ad031cee..23d8fb25c5dd 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1074,6 +1074,12 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) * in kvm_vcpu_block without having the waitqueue set (polling) */ vcpu->valid_wakeup = true; + /* + * This is mostly to document, that the read in swait_active could + * be moved before other stores, leading to subtle races. + * All current users do not store or use an atomic like update + */ + smp_mb__after_atomic(); if (swait_active(&vcpu->wq)) { /* * The vcpu gave up the cpu voluntarily, mark it as a good -- cgit From ba850a8e64fbbd5f6407d5931124d00ced0528cc Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Fri, 13 Oct 2017 13:38:46 -0400 Subject: KVM: s390: SIE considerations for AP Queue virtualization The Crypto Control Block (CRYCB) is referenced by the SIE state description and controls KVM guest access to the Adjunct Processor (AP) adapters, usage domains and control domains. This patch defines the AP control blocks to be used for controlling guest access to the AP adapters and domains. Signed-off-by: Tony Krowiak Message-Id: <1507916344-3896-2-git-send-email-akrowiak@linux.vnet.ibm.com> Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index fd006a272024..f3a9b5a445b6 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -685,11 +685,28 @@ struct kvm_s390_crypto { __u8 dea_kw; }; +#define APCB0_MASK_SIZE 1 +struct kvm_s390_apcb0 { + __u64 apm[APCB0_MASK_SIZE]; /* 0x0000 */ + __u64 aqm[APCB0_MASK_SIZE]; /* 0x0008 */ + __u64 adm[APCB0_MASK_SIZE]; /* 0x0010 */ + __u64 reserved18; /* 0x0018 */ +}; + +#define APCB1_MASK_SIZE 4 +struct kvm_s390_apcb1 { + __u64 apm[APCB1_MASK_SIZE]; /* 0x0000 */ + __u64 aqm[APCB1_MASK_SIZE]; /* 0x0020 */ + __u64 adm[APCB1_MASK_SIZE]; /* 0x0040 */ + __u64 reserved60[4]; /* 0x0060 */ +}; + struct kvm_s390_crypto_cb { - __u8 reserved00[72]; /* 0x0000 */ - __u8 dea_wrapping_key_mask[24]; /* 0x0048 */ - __u8 aes_wrapping_key_mask[32]; /* 0x0060 */ - __u8 reserved80[128]; /* 0x0080 */ + struct kvm_s390_apcb0 apcb0; /* 0x0000 */ + __u8 reserved20[0x0048 - 0x0020]; /* 0x0020 */ + __u8 dea_wrapping_key_mask[24]; /* 0x0048 */ + __u8 aes_wrapping_key_mask[32]; /* 0x0060 */ + struct kvm_s390_apcb1 apcb1; /* 0x0080 */ }; /* -- cgit From f7a6509fe002e3909cb41c09e807b7f3ca4a361b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 1 Sep 2017 17:11:43 +0200 Subject: KVM: s390: vsie: use common code functions for pinning We will not see -ENOMEM (gfn_to_hva() will return KVM_ERR_PTR_BAD_PAGE for all errors). So we can also get rid of special handling in the callers of pin_guest_page() and always assume that it is a g2 error. As also kvm_s390_inject_program_int() should never fail, we can simplify pin_scb(), too. Signed-off-by: David Hildenbrand Message-Id: <20170901151143.22714-1-david@redhat.com> Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/vsie.c | 50 ++++++++++++++++++-------------------------------- 1 file changed, 18 insertions(+), 32 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index b18b5652e5c5..a311938b63b3 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -443,22 +443,14 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) * * Returns: - 0 on success * - -EINVAL if the gpa is not valid guest storage - * - -ENOMEM if out of memory */ static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa) { struct page *page; - hva_t hva; - int rc; - hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); - if (kvm_is_error_hva(hva)) + page = gfn_to_page(kvm, gpa_to_gfn(gpa)); + if (is_error_page(page)) return -EINVAL; - rc = get_user_pages_fast(hva, 1, 1, &page); - if (rc < 0) - return rc; - else if (rc != 1) - return -ENOMEM; *hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK); return 0; } @@ -466,11 +458,7 @@ static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa) /* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */ static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa) { - struct page *page; - - page = virt_to_page(hpa); - set_page_dirty_lock(page); - put_page(page); + kvm_release_pfn_dirty(hpa >> PAGE_SHIFT); /* mark the page always as dirty for migration */ mark_page_dirty(kvm, gpa_to_gfn(gpa)); } @@ -557,7 +545,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) rc = set_validity_icpt(scb_s, 0x003bU); if (!rc) { rc = pin_guest_page(vcpu->kvm, gpa, &hpa); - if (rc == -EINVAL) + if (rc) rc = set_validity_icpt(scb_s, 0x0034U); } if (rc) @@ -574,10 +562,10 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) } /* 256 bytes cannot cross page boundaries */ rc = pin_guest_page(vcpu->kvm, gpa, &hpa); - if (rc == -EINVAL) + if (rc) { rc = set_validity_icpt(scb_s, 0x0080U); - if (rc) goto unpin; + } scb_s->itdba = hpa; } @@ -592,10 +580,10 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) * if this block gets bigger, we have to shadow it. */ rc = pin_guest_page(vcpu->kvm, gpa, &hpa); - if (rc == -EINVAL) + if (rc) { rc = set_validity_icpt(scb_s, 0x1310U); - if (rc) goto unpin; + } scb_s->gvrd = hpa; } @@ -607,11 +595,11 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) } /* 64 bytes cannot cross page boundaries */ rc = pin_guest_page(vcpu->kvm, gpa, &hpa); - if (rc == -EINVAL) + if (rc) { rc = set_validity_icpt(scb_s, 0x0043U); - /* Validity 0x0044 will be checked by SIE */ - if (rc) goto unpin; + } + /* Validity 0x0044 will be checked by SIE */ scb_s->riccbd = hpa; } if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) { @@ -635,10 +623,10 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) * cross page boundaries */ rc = pin_guest_page(vcpu->kvm, gpa, &hpa); - if (rc == -EINVAL) + if (rc) { rc = set_validity_icpt(scb_s, 0x10b0U); - if (rc) goto unpin; + } scb_s->sdnxo = hpa | sdnxc; } return 0; @@ -663,7 +651,6 @@ static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, * * Returns: - 0 if the scb was pinned. * - > 0 if control has to be given to guest 2 - * - -ENOMEM if out of memory */ static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, gpa_t gpa) @@ -672,14 +659,13 @@ static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, int rc; rc = pin_guest_page(vcpu->kvm, gpa, &hpa); - if (rc == -EINVAL) { + if (rc) { rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - if (!rc) - rc = 1; + WARN_ON_ONCE(rc); + return 1; } - if (!rc) - vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa; - return rc; + vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa; + return 0; } /* -- cgit From ee739f4b216e9394281cf99e6d93c67bdf4a37d2 Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Mon, 3 Jul 2017 15:32:50 +0200 Subject: KVM: s390: abstract conversion between isc and enum irq_types The abstraction of the conversion between an isc value and an irq_type by means of functions isc_to_irq_type() and irq_type_to_isc() allows to clarify the respective operations where used. Signed-off-by: Michael Mueller Reviewed-by: Halil Pasic Reviewed-by: Pierre Morel Reviewed-by: Christian Borntraeger Reviewed-by: Cornelia Huck Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 23d8fb25c5dd..a3da4f3065aa 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -213,6 +213,16 @@ static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) vcpu->arch.local_int.pending_irqs; } +static inline int isc_to_irq_type(unsigned long isc) +{ + return IRQ_PEND_IO_ISC_0 + isc; +} + +static inline int irq_type_to_isc(unsigned long irq_type) +{ + return irq_type - IRQ_PEND_IO_ISC_0; +} + static unsigned long disable_iscs(struct kvm_vcpu *vcpu, unsigned long active_mask) { @@ -220,7 +230,7 @@ static unsigned long disable_iscs(struct kvm_vcpu *vcpu, for (i = 0; i <= MAX_ISC; i++) if (!(vcpu->arch.sie_block->gcr[6] & isc_to_isc_bits(i))) - active_mask &= ~(1UL << (IRQ_PEND_IO_ISC_0 + i)); + active_mask &= ~(1UL << (isc_to_irq_type(i))); return active_mask; } @@ -901,7 +911,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu, fi = &vcpu->kvm->arch.float_int; spin_lock(&fi->lock); - isc_list = &fi->lists[irq_type - IRQ_PEND_IO_ISC_0]; + isc_list = &fi->lists[irq_type_to_isc(irq_type)]; inti = list_first_entry_or_null(isc_list, struct kvm_s390_interrupt_info, list); @@ -1401,7 +1411,7 @@ static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm, list_del_init(&iter->list); fi->counters[FIRQ_CNTR_IO] -= 1; if (list_empty(isc_list)) - clear_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs); + clear_bit(isc_to_irq_type(isc), &fi->pending_irqs); spin_unlock(&fi->lock); return iter; } @@ -1528,7 +1538,7 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) isc = int_word_to_isc(inti->io.io_int_word); list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc]; list_add_tail(&inti->list, list); - set_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs); + set_bit(isc_to_irq_type(isc), &fi->pending_irqs); spin_unlock(&fi->lock); return 0; } -- cgit From 4dd6f17eb913d3d23dd6c07950627ac2c3068dca Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Thu, 6 Jul 2017 14:22:20 +0200 Subject: KVM: s390: clear_io_irq() requests are not expected for adapter interrupts There is a chance to delete not yet delivered I/O interrupts if an exploiter uses the subsystem identification word 0x0000 while processing a KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl. -EINVAL will be returned now instead in that case. Classic interrupts will always have bit 0x10000 set in the schid while adapter interrupts have a zero schid. The clear_io_irq interface is only useful for classic interrupts (as adapter interrupts belong to many devices). Let's make this interface more strict and forbid a schid of 0. Signed-off-by: Michael Mueller Reviewed-by: Halil Pasic Reviewed-by: Christian Borntraeger Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index a3da4f3065aa..c8aacced23fb 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2191,6 +2191,8 @@ static int clear_io_irq(struct kvm *kvm, struct kvm_device_attr *attr) return -EINVAL; if (copy_from_user(&schid, (void __user *) attr->addr, sizeof(schid))) return -EFAULT; + if (!schid) + return -EINVAL; kfree(kvm_s390_get_io_int(kvm, isc_mask, schid)); /* * If userspace is conforming to the architecture, we can have at most -- cgit From da9a1446d248f673a8560ce46251ff620214ab7b Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 9 Nov 2017 10:00:45 +0100 Subject: KVM: s390: provide a capability for AIS state migration The AIS capability was introduced in 4.12, while the interface to migrate the state was added in 4.13. Unfortunately it is not possible for userspace to detect the migration capability without creating a flic kvm device. As in QEMU the cpu model detection runs on the "none" machine this will result in cpu model issues regarding the "ais" capability. To get the "ais" capability properly let's add a new KVM capability that tells userspace that AIS states can be migrated. Signed-off-by: Christian Borntraeger Reviewed-by: Cornelia Huck Reviewed-by: David Hildenbrand Acked-by: Halil Pasic --- arch/s390/kvm/kvm-s390.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index de6a5b790da0..8f4b655f65d7 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -395,6 +395,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_USER_INSTR0: case KVM_CAP_S390_CMMA_MIGRATION: case KVM_CAP_S390_AIS: + case KVM_CAP_S390_AIS_MIGRATION: r = 1; break; case KVM_CAP_S390_MEM_OP: -- cgit