diff options
44 files changed, 1931 insertions, 634 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 092ee9fbaf2b..053f613fc9a9 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1451,6 +1451,7 @@ struct kvm_irq_routing_entry { struct kvm_irq_routing_irqchip irqchip; struct kvm_irq_routing_msi msi; struct kvm_irq_routing_s390_adapter adapter; + struct kvm_irq_routing_hv_sint hv_sint; __u32 pad[8]; } u; }; @@ -1459,6 +1460,7 @@ struct kvm_irq_routing_entry { #define KVM_IRQ_ROUTING_IRQCHIP 1 #define KVM_IRQ_ROUTING_MSI 2 #define KVM_IRQ_ROUTING_S390_ADAPTER 3 +#define KVM_IRQ_ROUTING_HV_SINT 4 No flags are specified so far, the corresponding field must be set to zero. @@ -1482,6 +1484,10 @@ struct kvm_irq_routing_s390_adapter { __u32 adapter_id; }; +struct kvm_irq_routing_hv_sint { + __u32 vcpu; + __u32 sint; +}; 4.53 KVM_ASSIGN_SET_MSIX_NR (deprecated) @@ -3331,6 +3337,28 @@ the userspace IOAPIC should process the EOI and retrigger the interrupt if it is still asserted. Vector is the LAPIC interrupt vector for which the EOI was received. + struct kvm_hyperv_exit { +#define KVM_EXIT_HYPERV_SYNIC 1 + __u32 type; + union { + struct { + __u32 msr; + __u64 control; + __u64 evt_page; + __u64 msg_page; + } synic; + } u; + }; + /* KVM_EXIT_HYPERV */ + struct kvm_hyperv_exit hyperv; +Indicates that the VCPU exits into userspace to process some tasks +related to Hyper-V emulation. +Valid values for 'type' are: + KVM_EXIT_HYPERV_SYNIC -- synchronously notify user-space about +Hyper-V SynIC state change. Notification is used to remap SynIC +event/message pages and to enable/disable SynIC messages/events processing +in userspace. + /* Fix the size of the union. */ char padding[256]; }; @@ -3685,3 +3713,16 @@ available, means that that the kernel has an implementation of the H_RANDOM hypercall backed by a hardware random-number generator. If present, the kernel H_RANDOM handler can be enabled for guest use with the KVM_CAP_PPC_ENABLE_HCALL capability. + +8.2 KVM_CAP_HYPERV_SYNIC + +Architectures: x86 +This capability, if KVM_CHECK_EXTENSION indicates that it is +available, means that that the kernel has an implementation of the +Hyper-V Synthetic interrupt controller(SynIC). Hyper-V SynIC is +used to support Windows Hyper-V based guest paravirt drivers(VMBus). + +In order to use SynIC, it has to be activated by setting this +capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this +will disable the use of APIC hardware virtualization even if supported +by the CPU, as it's incompatible with SynIC auto-EOI behavior. diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index 2d09d1ed86d0..f083a168eb35 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt @@ -37,7 +37,8 @@ Returns: -EFAULT if the given address is not accessible Allows userspace to query the actual limit and set a new limit for the maximum guest memory size. The limit will be rounded up to 2048 MB, 4096 GB, 8192 TB respectively, as this limit is governed by -the number of page table levels. +the number of page table levels. In the case that there is no limit we will set +the limit to KVM_S390_NO_MEM_LIMIT (U64_MAX). 2. GROUP: KVM_S390_VM_CPU_MODEL Architectures: s390 diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt index 3a4d681c3e98..daf9c0f742d2 100644 --- a/Documentation/virtual/kvm/mmu.txt +++ b/Documentation/virtual/kvm/mmu.txt @@ -203,10 +203,10 @@ Shadow pages contain the following information: page cannot be destroyed. See role.invalid. parent_ptes: The reverse mapping for the pte/ptes pointing at this page's spt. If - parent_ptes bit 0 is zero, only one spte points at this pages and + parent_ptes bit 0 is zero, only one spte points at this page and parent_ptes points at this single spte, otherwise, there exists multiple sptes pointing at this page and (parent_ptes & ~0x1) points at a data - structure with a list of parent_ptes. + structure with a list of parent sptes. unsync: If true, then the translations in this page may not match the guest's translation. This is equivalent to the state of the tlb when a pte is diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 54b45b73195f..a29da44cdc6c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -308,16 +308,10 @@ static void kvmppc_dump_regs(struct kvm_vcpu *vcpu) static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) { - int r; - struct kvm_vcpu *v, *ret = NULL; + struct kvm_vcpu *ret; mutex_lock(&kvm->lock); - kvm_for_each_vcpu(r, v, kvm) { - if (v->vcpu_id == id) { - ret = v; - break; - } - } + ret = kvm_get_vcpu_by_id(kvm, id); mutex_unlock(&kvm->lock); return ret; } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 64891b081ad5..70fb08da416d 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -512,7 +512,7 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) put_page(hpage); } -static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) +static bool kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) { ulong mp_pa = vcpu->arch.magic_page_pa; @@ -521,7 +521,7 @@ static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) gpa &= ~0xFFFULL; if (unlikely(mp_pa) && unlikely((mp_pa & KVM_PAM) == (gpa & KVM_PAM))) { - return 1; + return true; } return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT); diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index bab6739a1154..08e34a5dc909 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -104,6 +104,9 @@ #define HWCAP_S390_TE 1024 #define HWCAP_S390_VXRS 2048 +/* Internal bits, not exposed via elf */ +#define HWCAP_INT_SIE 1UL + /* * These are used to set parameters in the core dumps. */ @@ -169,6 +172,10 @@ extern unsigned int vdso_enabled; extern unsigned long elf_hwcap; #define ELF_HWCAP (elf_hwcap) +/* Internal hardware capabilities, not exposed via elf */ + +extern unsigned long int_hwcap; + /* This yields a string that ld.so will use to load implementation specific libraries for optimization. This is more specific in intent than poking at uname or /proc/cpuinfo. diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index efaac2c3bb77..c83144110ea9 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -25,7 +25,9 @@ #include <asm/fpu/api.h> #include <asm/isc.h> -#define KVM_MAX_VCPUS 64 +#define KVM_S390_BSCA_CPU_SLOTS 64 +#define KVM_S390_ESCA_CPU_SLOTS 248 +#define KVM_MAX_VCPUS KVM_S390_ESCA_CPU_SLOTS #define KVM_USER_MEM_SLOTS 32 /* @@ -40,9 +42,34 @@ #define SIGP_CTRL_C 0x80 #define SIGP_CTRL_SCN_MASK 0x3f -struct sca_entry { +union bsca_sigp_ctrl { + __u8 value; + struct { + __u8 c : 1; + __u8 r : 1; + __u8 scn : 6; + }; +} __packed; + +union esca_sigp_ctrl { + __u16 value; + struct { + __u8 c : 1; + __u8 reserved: 7; + __u8 scn; + }; +} __packed; + +struct esca_entry { + union esca_sigp_ctrl sigp_ctrl; + __u16 reserved1[3]; + __u64 sda; + __u64 reserved2[6]; +} __packed; + +struct bsca_entry { __u8 reserved0; - __u8 sigp_ctrl; + union bsca_sigp_ctrl sigp_ctrl; __u16 reserved[3]; __u64 sda; __u64 reserved2[2]; @@ -57,14 +84,22 @@ union ipte_control { }; }; -struct sca_block { +struct bsca_block { union ipte_control ipte_control; __u64 reserved[5]; __u64 mcn; __u64 reserved2; - struct sca_entry cpu[64]; + struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS]; } __attribute__((packed)); +struct esca_block { + union ipte_control ipte_control; + __u64 reserved1[7]; + __u64 mcn[4]; + __u64 reserved2[20]; + struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; +} __packed; + #define CPUSTAT_STOPPED 0x80000000 #define CPUSTAT_WAIT 0x10000000 #define CPUSTAT_ECALL_PEND 0x08000000 @@ -585,11 +620,14 @@ struct kvm_s390_crypto_cb { }; struct kvm_arch{ - struct sca_block *sca; + void *sca; + int use_esca; + rwlock_t sca_lock; debug_info_t *dbf; struct kvm_s390_float_interrupt float_int; struct kvm_device *flic; struct gmap *gmap; + unsigned long mem_limit; int css_support; int use_irqchip; int use_cmma; diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 821dde5f425d..dea883f85d66 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -29,7 +29,10 @@ struct sclp_ipl_info { struct sclp_core_entry { u8 core_id; - u8 reserved0[2]; + u8 reserved0; + u8 : 4; + u8 sief2 : 1; + u8 : 3; u8 : 3; u8 siif : 1; u8 sigpif : 1; @@ -53,6 +56,9 @@ struct sclp_info { unsigned char has_sigpif : 1; unsigned char has_core_type : 1; unsigned char has_sprp : 1; + unsigned char has_hvs : 1; + unsigned char has_esca : 1; + unsigned char has_sief2 : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index ef1a5fcc6c66..d2aea31252f2 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -66,6 +66,8 @@ struct kvm_s390_io_adapter_req { #define KVM_S390_VM_MEM_CLR_CMMA 1 #define KVM_S390_VM_MEM_LIMIT_SIZE 2 +#define KVM_S390_NO_MEM_LIMIT U64_MAX + /* kvm attributes for KVM_S390_VM_TOD */ #define KVM_S390_VM_TOD_LOW 0 #define KVM_S390_VM_TOD_HIGH 1 diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 7ce00e7a709a..647128d5b983 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -61,6 +61,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", "edat", "etf3eh", "highgprs", "te", "vx" }; + static const char * const int_hwcap_str[] = { + "sie" + }; unsigned long n = (unsigned long) v - 1; int i; @@ -75,6 +78,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) for (i = 0; i < ARRAY_SIZE(hwcap_str); i++) if (hwcap_str[i] && (elf_hwcap & (1UL << i))) seq_printf(m, "%s ", hwcap_str[i]); + for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++) + if (int_hwcap_str[i] && (int_hwcap & (1UL << i))) + seq_printf(m, "%s ", int_hwcap_str[i]); seq_puts(m, "\n"); show_cacheinfo(m); } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c837bcacf218..dc83ae66a730 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -80,6 +80,8 @@ EXPORT_SYMBOL(console_irq); unsigned long elf_hwcap __read_mostly = 0; char elf_platform[ELF_PLATFORM_SIZE]; +unsigned long int_hwcap = 0; + int __initdata memory_end_set; unsigned long __initdata memory_end; unsigned long __initdata max_physmem_end; @@ -793,6 +795,13 @@ static int __init setup_hwcaps(void) strcpy(elf_platform, "z13"); break; } + + /* + * Virtualization support HWCAP_INT_SIE is bit 0. + */ + if (sclp.has_sief2) + int_hwcap |= HWCAP_INT_SIE; + return 0; } arch_initcall(setup_hwcaps); diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 5fbfb88f8477..05f7de9869a9 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -155,10 +155,8 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu) static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vcpu->kvm; struct kvm_vcpu *tcpu; int tid; - int i; tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; vcpu->stat.diagnose_9c++; @@ -167,12 +165,9 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) if (tid == vcpu->vcpu_id) return 0; - kvm_for_each_vcpu(i, tcpu, kvm) - if (tcpu->vcpu_id == tid) { - kvm_vcpu_yield_to(tcpu); - break; - } - + tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid); + if (tcpu) + kvm_vcpu_yield_to(tcpu); return 0; } diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index a7559f7207df..d30db40437dc 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -259,10 +259,14 @@ struct aste { int ipte_lock_held(struct kvm_vcpu *vcpu) { - union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control; + if (vcpu->arch.sie_block->eca & 1) { + int rc; - if (vcpu->arch.sie_block->eca & 1) - return ic->kh != 0; + read_lock(&vcpu->kvm->arch.sca_lock); + rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0; + read_unlock(&vcpu->kvm->arch.sca_lock); + return rc; + } return vcpu->kvm->arch.ipte_lock_count != 0; } @@ -274,16 +278,20 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu) vcpu->kvm->arch.ipte_lock_count++; if (vcpu->kvm->arch.ipte_lock_count > 1) goto out; - ic = &vcpu->kvm->arch.sca->ipte_control; +retry: + read_lock(&vcpu->kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); - while (old.k) { + if (old.k) { + read_unlock(&vcpu->kvm->arch.sca_lock); cond_resched(); - old = READ_ONCE(*ic); + goto retry; } new = old; new.k = 1; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); out: mutex_unlock(&vcpu->kvm->arch.ipte_mutex); } @@ -296,12 +304,14 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu) vcpu->kvm->arch.ipte_lock_count--; if (vcpu->kvm->arch.ipte_lock_count) goto out; - ic = &vcpu->kvm->arch.sca->ipte_control; + read_lock(&vcpu->kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); new = old; new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); wake_up(&vcpu->kvm->arch.ipte_wq); out: mutex_unlock(&vcpu->kvm->arch.ipte_mutex); @@ -311,24 +321,29 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu) { union ipte_control old, new, *ic; - ic = &vcpu->kvm->arch.sca->ipte_control; +retry: + read_lock(&vcpu->kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); - while (old.kg) { + if (old.kg) { + read_unlock(&vcpu->kvm->arch.sca_lock); cond_resched(); - old = READ_ONCE(*ic); + goto retry; } new = old; new.k = 1; new.kh++; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); } static void ipte_unlock_siif(struct kvm_vcpu *vcpu) { union ipte_control old, new, *ic; - ic = &vcpu->kvm->arch.sca->ipte_control; + read_lock(&vcpu->kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); new = old; @@ -336,6 +351,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu) if (!new.kh) new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); if (!new.kh) wake_up(&vcpu->kvm->arch.ipte_wq); } diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index b4a5aa110cec..d53c10753c46 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -54,9 +54,6 @@ void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc) static int handle_noop(struct kvm_vcpu *vcpu) { switch (vcpu->arch.sie_block->icptcode) { - case 0x0: - vcpu->stat.exit_null++; - break; case 0x10: vcpu->stat.exit_external_request++; break; @@ -338,8 +335,10 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) { + if (kvm_is_ucontrol(vcpu->kvm)) + return -EOPNOTSUPP; + switch (vcpu->arch.sie_block->icptcode) { - case 0x00: case 0x10: case 0x18: return handle_noop(vcpu); diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 6a75352f453c..62ec925aa196 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -34,6 +34,106 @@ #define PFAULT_DONE 0x0680 #define VIRTIO_PARAM 0x0d00 +/* handle external calls via sigp interpretation facility */ +static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) +{ + int c, scn; + + if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND)) + return 0; + + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl sigp_ctrl = + sca->cpu[vcpu->vcpu_id].sigp_ctrl; + + c = sigp_ctrl.c; + scn = sigp_ctrl.scn; + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl sigp_ctrl = + sca->cpu[vcpu->vcpu_id].sigp_ctrl; + + c = sigp_ctrl.c; + scn = sigp_ctrl.scn; + } + read_unlock(&vcpu->kvm->arch.sca_lock); + + if (src_id) + *src_id = scn; + + return c; +} + +static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) +{ + int expect, rc; + + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; + + new_val.scn = src_id; + new_val.c = 1; + old_val.c = 0; + + expect = old_val.value; + rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; + + new_val.scn = src_id; + new_val.c = 1; + old_val.c = 0; + + expect = old_val.value; + rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); + } + read_unlock(&vcpu->kvm->arch.sca_lock); + + if (rc != expect) { + /* another external call is pending */ + return -EBUSY; + } + atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); + return 0; +} + +static void sca_clear_ext_call(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + int rc, expect; + + atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags); + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union esca_sigp_ctrl old = *sigp_ctrl; + + expect = old.value; + rc = cmpxchg(&sigp_ctrl->value, old.value, 0); + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union bsca_sigp_ctrl old = *sigp_ctrl; + + expect = old.value; + rc = cmpxchg(&sigp_ctrl->value, old.value, 0); + } + read_unlock(&vcpu->kvm->arch.sca_lock); + WARN_ON(rc != expect); /* cannot clear? */ +} + int psw_extint_disabled(struct kvm_vcpu *vcpu) { return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); @@ -792,13 +892,11 @@ static const deliver_irq_t deliver_irq_funcs[] = { int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; if (!sclp.has_sigpif) return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); - return (sigp_ctrl & SIGP_CTRL_C) && - (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND); + return sca_ext_call_pending(vcpu, NULL); } int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop) @@ -909,9 +1007,7 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) memset(&li->irq, 0, sizeof(li->irq)); spin_unlock(&li->lock); - /* clear pending external calls set by sigp interpretation facility */ - atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags); - vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl = 0; + sca_clear_ext_call(vcpu); } int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) @@ -1003,21 +1099,6 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) return 0; } -static int __inject_extcall_sigpif(struct kvm_vcpu *vcpu, uint16_t src_id) -{ - unsigned char new_val, old_val; - uint8_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; - - new_val = SIGP_CTRL_C | (src_id & SIGP_CTRL_SCN_MASK); - old_val = *sigp_ctrl & ~SIGP_CTRL_C; - if (cmpxchg(sigp_ctrl, old_val, new_val) != old_val) { - /* another external call is pending */ - return -EBUSY; - } - atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); - return 0; -} - static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -1034,7 +1115,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) return -EINVAL; if (sclp.has_sigpif) - return __inject_extcall_sigpif(vcpu, src_id); + return sca_inject_ext_call(vcpu, src_id); if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) return -EBUSY; @@ -2203,7 +2284,7 @@ static void store_local_irq(struct kvm_s390_local_interrupt *li, int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) { - uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; + int scn; unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)]; struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; unsigned long pending_irqs; @@ -2243,14 +2324,12 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) } } - if ((sigp_ctrl & SIGP_CTRL_C) && - (atomic_read(&vcpu->arch.sie_block->cpuflags) & - CPUSTAT_ECALL_PEND)) { + if (sca_ext_call_pending(vcpu, &scn)) { if (n + sizeof(irq) > len) return -ENOBUFS; memset(&irq, 0, sizeof(irq)); irq.type = KVM_S390_INT_EXTERNAL_CALL; - irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK; + irq.u.extcall.code = scn; if (copy_to_user(&buf[n], &irq, sizeof(irq))) return -EFAULT; n += sizeof(irq); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 846589281b04..940e9ff231a3 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -246,7 +246,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: - r = KVM_MAX_VCPUS; + r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS + : KVM_S390_BSCA_CPU_SLOTS; break; case KVM_CAP_NR_MEMSLOTS: r = KVM_USER_MEM_SLOTS; @@ -283,6 +284,8 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm, } /* Section: vm related */ +static void sca_del_vcpu(struct kvm_vcpu *vcpu); + /* * Get (and clear) the dirty memory log for a memory slot. */ @@ -375,8 +378,8 @@ static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *att case KVM_S390_VM_MEM_LIMIT_SIZE: ret = 0; VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", - kvm->arch.gmap->asce_end); - if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr)) + kvm->arch.mem_limit); + if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) ret = -EFAULT; break; default: @@ -428,9 +431,17 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att if (get_user(new_limit, (u64 __user *)attr->addr)) return -EFAULT; - if (new_limit > kvm->arch.gmap->asce_end) + if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && + new_limit > kvm->arch.mem_limit) return -E2BIG; + if (!new_limit) + return -EINVAL; + + /* gmap_alloc takes last usable address */ + if (new_limit != KVM_S390_NO_MEM_LIMIT) + new_limit -= 1; + ret = -EBUSY; mutex_lock(&kvm->lock); if (atomic_read(&kvm->online_vcpus) == 0) { @@ -447,7 +458,9 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att } } mutex_unlock(&kvm->lock); - VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit); + VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); + VM_EVENT(kvm, 3, "New guest asce: 0x%pK", + (void *) kvm->arch.gmap->asce); break; } default: @@ -1024,7 +1037,7 @@ static int kvm_s390_apxa_installed(void) u8 config[128]; int cc; - if (test_facility(2) && test_facility(12)) { + if (test_facility(12)) { cc = kvm_s390_query_ap_config(config); if (cc) @@ -1075,6 +1088,15 @@ static int kvm_s390_crypto_init(struct kvm *kvm) return 0; } +static void sca_dispose(struct kvm *kvm) +{ + if (kvm->arch.use_esca) + free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); + else + free_page((unsigned long)(kvm->arch.sca)); + kvm->arch.sca = NULL; +} + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int i, rc; @@ -1098,14 +1120,17 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) rc = -ENOMEM; - kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); + kvm->arch.use_esca = 0; /* start with basic SCA */ + rwlock_init(&kvm->arch.sca_lock); + kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL); if (!kvm->arch.sca) goto out_err; spin_lock(&kvm_lock); sca_offset += 16; - if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE) + if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) sca_offset = 0; - kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset); + kvm->arch.sca = (struct bsca_block *) + ((char *) kvm->arch.sca + sca_offset); spin_unlock(&kvm_lock); sprintf(debug_name, "kvm-%u", current->pid); @@ -1157,8 +1182,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (type & KVM_VM_S390_UCONTROL) { kvm->arch.gmap = NULL; + kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; } else { - kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1); + if (sclp.hamax == U64_MAX) + kvm->arch.mem_limit = TASK_MAX_SIZE; + else + kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE, + sclp.hamax + 1); + kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1); if (!kvm->arch.gmap) goto out_err; kvm->arch.gmap->private = kvm; @@ -1170,14 +1201,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.epoch = 0; spin_lock_init(&kvm->arch.start_stop_lock); - KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid); + KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); return 0; out_err: kfree(kvm->arch.crypto.crycb); free_page((unsigned long)kvm->arch.model.fac); debug_unregister(kvm->arch.dbf); - free_page((unsigned long)(kvm->arch.sca)); + sca_dispose(kvm); KVM_EVENT(3, "creation of vm failed: %d", rc); return rc; } @@ -1188,13 +1219,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); kvm_s390_clear_local_irqs(vcpu); kvm_clear_async_pf_completion_queue(vcpu); - if (!kvm_is_ucontrol(vcpu->kvm)) { - clear_bit(63 - vcpu->vcpu_id, - (unsigned long *) &vcpu->kvm->arch.sca->mcn); - if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == - (__u64) vcpu->arch.sie_block) - vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; - } + if (!kvm_is_ucontrol(vcpu->kvm)) + sca_del_vcpu(vcpu); smp_mb(); if (kvm_is_ucontrol(vcpu->kvm)) @@ -1228,14 +1254,14 @@ void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_vcpus(kvm); free_page((unsigned long)kvm->arch.model.fac); - free_page((unsigned long)(kvm->arch.sca)); + sca_dispose(kvm); debug_unregister(kvm->arch.dbf); kfree(kvm->arch.crypto.crycb); if (!kvm_is_ucontrol(kvm)) gmap_free(kvm->arch.gmap); kvm_s390_destroy_adapters(kvm); kvm_s390_clear_float_irqs(kvm); - KVM_EVENT(3, "vm 0x%p destroyed", kvm); + KVM_EVENT(3, "vm 0x%pK destroyed", kvm); } /* Section: vcpu related */ @@ -1249,6 +1275,117 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) return 0; } +static void sca_del_vcpu(struct kvm_vcpu *vcpu) +{ + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + + clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); + sca->cpu[vcpu->vcpu_id].sda = 0; + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + + clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); + sca->cpu[vcpu->vcpu_id].sda = 0; + } + read_unlock(&vcpu->kvm->arch.sca_lock); +} + +static void sca_add_vcpu(struct kvm_vcpu *vcpu) +{ + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + + sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; + vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); + vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; + vcpu->arch.sie_block->ecb2 |= 0x04U; + set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + + sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; + vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); + vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; + set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); + } + read_unlock(&vcpu->kvm->arch.sca_lock); +} + +/* Basic SCA to Extended SCA data copy routines */ +static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) +{ + d->sda = s->sda; + d->sigp_ctrl.c = s->sigp_ctrl.c; + d->sigp_ctrl.scn = s->sigp_ctrl.scn; +} + +static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) +{ + int i; + + d->ipte_control = s->ipte_control; + d->mcn[0] = s->mcn; + for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) + sca_copy_entry(&d->cpu[i], &s->cpu[i]); +} + +static int sca_switch_to_extended(struct kvm *kvm) +{ + struct bsca_block *old_sca = kvm->arch.sca; + struct esca_block *new_sca; + struct kvm_vcpu *vcpu; + unsigned int vcpu_idx; + u32 scaol, scaoh; + + new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO); + if (!new_sca) + return -ENOMEM; + + scaoh = (u32)((u64)(new_sca) >> 32); + scaol = (u32)(u64)(new_sca) & ~0x3fU; + + kvm_s390_vcpu_block_all(kvm); + write_lock(&kvm->arch.sca_lock); + + sca_copy_b_to_e(new_sca, old_sca); + + kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { + vcpu->arch.sie_block->scaoh = scaoh; + vcpu->arch.sie_block->scaol = scaol; + vcpu->arch.sie_block->ecb2 |= 0x04U; + } + kvm->arch.sca = new_sca; + kvm->arch.use_esca = 1; + + write_unlock(&kvm->arch.sca_lock); + kvm_s390_vcpu_unblock_all(kvm); + + free_page((unsigned long)old_sca); + + VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", + old_sca, kvm->arch.sca); + return 0; +} + +static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) +{ + int rc; + + if (id < KVM_S390_BSCA_CPU_SLOTS) + return true; + if (!sclp.has_esca) + return false; + + mutex_lock(&kvm->lock); + rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); + mutex_unlock(&kvm->lock); + + return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; +} + int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; @@ -1369,8 +1506,11 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; preempt_enable(); mutex_unlock(&vcpu->kvm->lock); - if (!kvm_is_ucontrol(vcpu->kvm)) + if (!kvm_is_ucontrol(vcpu->kvm)) { vcpu->arch.gmap = vcpu->kvm->arch.gmap; + sca_add_vcpu(vcpu); + } + } static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) @@ -1465,7 +1605,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, struct sie_page *sie_page; int rc = -EINVAL; - if (id >= KVM_MAX_VCPUS) + if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) goto out; rc = -ENOMEM; @@ -1482,20 +1622,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; vcpu->arch.sie_block->icpua = id; - if (!kvm_is_ucontrol(kvm)) { - if (!kvm->arch.sca) { - WARN_ON_ONCE(1); - goto out_free_cpu; - } - if (!kvm->arch.sca->cpu[id].sda) - kvm->arch.sca->cpu[id].sda = - (__u64) vcpu->arch.sie_block; - vcpu->arch.sie_block->scaoh = - (__u32)(((__u64)kvm->arch.sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; - set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); - } - spin_lock_init(&vcpu->arch.local_int.lock); vcpu->arch.local_int.float_int = &kvm->arch.float_int; vcpu->arch.local_int.wq = &vcpu->wq; @@ -1509,15 +1635,13 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, */ vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS, GFP_KERNEL); - if (!vcpu->arch.guest_fpregs.fprs) { - rc = -ENOMEM; + if (!vcpu->arch.guest_fpregs.fprs) goto out_free_sie_block; - } rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) goto out_free_sie_block; - VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, + VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu, vcpu->arch.sie_block); trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); @@ -2013,7 +2137,8 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) */ kvm_check_async_pf_completion(vcpu); - memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); + vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; + vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; if (need_resched()) schedule(); @@ -2071,8 +2196,6 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) { - int rc = -1; - VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); @@ -2080,40 +2203,36 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) if (guestdbg_enabled(vcpu)) kvm_s390_restore_guest_per_regs(vcpu); - if (exit_reason >= 0) { - rc = 0; + vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; + vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; + + if (vcpu->arch.sie_block->icptcode > 0) { + int rc = kvm_handle_sie_intercept(vcpu); + + if (rc != -EOPNOTSUPP) + return rc; + vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; + vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; + vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; + vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; + return -EREMOTE; + } else if (exit_reason != -EFAULT) { + vcpu->stat.exit_null++; + return 0; } else if (kvm_is_ucontrol(vcpu->kvm)) { vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; vcpu->run->s390_ucontrol.trans_exc_code = current->thread.gmap_addr; vcpu->run->s390_ucontrol.pgm_code = 0x10; - rc = -EREMOTE; - + return -EREMOTE; } else if (current->thread.gmap_pfault) { trace_kvm_s390_major_guest_pfault(vcpu); current->thread.gmap_pfault = 0; - if (kvm_arch_setup_async_pf(vcpu)) { - rc = 0; - } else { - gpa_t gpa = current->thread.gmap_addr; - rc = kvm_arch_fault_in_page(vcpu, gpa, 1); - } - } - - if (rc == -1) - rc = vcpu_post_run_fault_in_sie(vcpu); - - memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); - - if (rc == 0) { - if (kvm_is_ucontrol(vcpu->kvm)) - /* Don't exit for host interrupts. */ - rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0; - else - rc = kvm_handle_sie_intercept(vcpu); + if (kvm_arch_setup_async_pf(vcpu)) + return 0; + return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); } - - return rc; + return vcpu_post_run_fault_in_sie(vcpu); } static int __vcpu_run(struct kvm_vcpu *vcpu) @@ -2233,18 +2352,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) rc = 0; } - if (rc == -EOPNOTSUPP) { - /* intercept cannot be handled in-kernel, prepare kvm-run */ - kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; - kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; - kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; - kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; - rc = 0; - } - if (rc == -EREMOTE) { - /* intercept was handled, but userspace support is needed - * kvm_run has been prepared by the handler */ + /* userspace support is needed, kvm_run has been prepared */ rc = 0; } @@ -2736,6 +2845,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (mem->memory_size & 0xffffful) return -EINVAL; + if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit) + return -EINVAL; + return 0; } @@ -2767,6 +2879,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, static int __init kvm_s390_init(void) { + if (!sclp.has_sief2) { + pr_info("SIE not available\n"); + return -ENODEV; + } + return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 1e70e00d3c5e..df1abada1f36 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -340,4 +340,11 @@ void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu); void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu); void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu); +/* support for Basic/Extended SCA handling */ +static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm) +{ + struct bsca_block *sca = kvm->arch.sca; /* SCA version doesn't matter */ + + return &sca->ipte_control; +} #endif diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index cc1d6c68356f..396485bca191 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -55,8 +55,8 @@ TRACE_EVENT(kvm_s390_create_vcpu, __entry->sie_block = sie_block; ), - TP_printk("create cpu %d at %p, sie block at %p", __entry->id, - __entry->vcpu, __entry->sie_block) + TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK", + __entry->id, __entry->vcpu, __entry->sie_block) ); TRACE_EVENT(kvm_s390_destroy_vcpu, @@ -254,7 +254,7 @@ TRACE_EVENT(kvm_s390_enable_css, __entry->kvm = kvm; ), - TP_printk("enabling channel I/O support (kvm @ %p)\n", + TP_printk("enabling channel I/O support (kvm @ %pK)\n", __entry->kvm) ); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 54ef3bc01b43..63b039899a5e 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -133,7 +133,7 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) /** * gmap_alloc - allocate a guest address space * @mm: pointer to the parent mm_struct - * @limit: maximum size of the gmap address space + * @limit: maximum address of the gmap address space * * Returns a guest address space structure. */ @@ -402,7 +402,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, if ((from | to | len) & (PMD_SIZE - 1)) return -EINVAL; if (len == 0 || from + len < from || to + len < to || - from + len > TASK_MAX_SIZE || to + len > gmap->asce_end) + from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) return -EINVAL; flush = 0; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 30cfd64295a0..a7c89876698b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -25,6 +25,7 @@ #include <linux/pvclock_gtod.h> #include <linux/clocksource.h> #include <linux/irqbypass.h> +#include <linux/hyperv.h> #include <asm/pvclock-abi.h> #include <asm/desc.h> @@ -213,6 +214,10 @@ union kvm_mmu_page_role { }; }; +struct kvm_rmap_head { + unsigned long val; +}; + struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; @@ -230,7 +235,7 @@ struct kvm_mmu_page { bool unsync; int root_count; /* Currently serving as active root */ unsigned int unsync_children; - unsigned long parent_ptes; /* Reverse mapping for parent_pte */ + struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ /* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen. */ unsigned long mmu_valid_gen; @@ -374,10 +379,38 @@ struct kvm_mtrr { struct list_head head; }; +/* Hyper-V SynIC timer */ +struct kvm_vcpu_hv_stimer { + struct hrtimer timer; + int index; + u64 config; + u64 count; + u64 exp_time; + struct hv_message msg; + bool msg_pending; +}; + +/* Hyper-V synthetic interrupt controller (SynIC)*/ +struct kvm_vcpu_hv_synic { + u64 version; + u64 control; + u64 msg_page; + u64 evt_page; + atomic64_t sint[HV_SYNIC_SINT_COUNT]; + atomic_t sint_to_gsi[HV_SYNIC_SINT_COUNT]; + DECLARE_BITMAP(auto_eoi_bitmap, 256); + DECLARE_BITMAP(vec_bitmap, 256); + bool active; +}; + /* Hyper-V per vcpu emulation context */ struct kvm_vcpu_hv { u64 hv_vapic; s64 runtime_offset; + struct kvm_vcpu_hv_synic synic; + struct kvm_hyperv_exit exit; + struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT]; + DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); }; struct kvm_vcpu_arch { @@ -400,7 +433,8 @@ struct kvm_vcpu_arch { u64 efer; u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ - u64 eoi_exit_bitmap[4]; + bool apicv_active; + DECLARE_BITMAP(ioapic_handled_vectors, 256); unsigned long apic_attention; int32_t apic_arb_prio; int mp_state; @@ -589,7 +623,7 @@ struct kvm_lpage_info { }; struct kvm_arch_memory_slot { - unsigned long *rmap[KVM_NR_PAGE_SIZES]; + struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES]; struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; }; @@ -831,10 +865,11 @@ struct kvm_x86_ops { void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); - int (*cpu_uses_apicv)(struct kvm_vcpu *vcpu); + bool (*get_enable_apicv)(void); + void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm *kvm, int isr); - void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu); + void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); @@ -1086,6 +1121,8 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); +void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu); + int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 040d4083c24f..7956412d09bd 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -269,4 +269,96 @@ typedef struct _HV_REFERENCE_TSC_PAGE { #define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17) #define HV_SYNIC_SINT_VECTOR_MASK (0xFF) +#define HV_SYNIC_STIMER_COUNT (4) + +/* Define synthetic interrupt controller message constants. */ +#define HV_MESSAGE_SIZE (256) +#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) +#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) + +/* Define hypervisor message types. */ +enum hv_message_type { + HVMSG_NONE = 0x00000000, + + /* Memory access messages. */ + HVMSG_UNMAPPED_GPA = 0x80000000, + HVMSG_GPA_INTERCEPT = 0x80000001, + + /* Timer notification messages. */ + HVMSG_TIMER_EXPIRED = 0x80000010, + + /* Error messages. */ + HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, + HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, + HVMSG_UNSUPPORTED_FEATURE = 0x80000022, + + /* Trace buffer complete messages. */ + HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, + + /* Platform-specific processor intercept messages. */ + HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, + HVMSG_X64_MSR_INTERCEPT = 0x80010001, + HVMSG_X64_CPUID_INTERCEPT = 0x80010002, + HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, + HVMSG_X64_APIC_EOI = 0x80010004, + HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 +}; + +/* Define synthetic interrupt controller message flags. */ +union hv_message_flags { + __u8 asu8; + struct { + __u8 msg_pending:1; + __u8 reserved:7; + }; +}; + +/* Define port identifier type. */ +union hv_port_id { + __u32 asu32; + struct { + __u32 id:24; + __u32 reserved:8; + } u; +}; + +/* Define synthetic interrupt controller message header. */ +struct hv_message_header { + __u32 message_type; + __u8 payload_size; + union hv_message_flags message_flags; + __u8 reserved[2]; + union { + __u64 sender; + union hv_port_id port; + }; +}; + +/* Define synthetic interrupt controller message format. */ +struct hv_message { + struct hv_message_header header; + union { + __u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; + } u; +}; + +/* Define the synthetic interrupt message page layout. */ +struct hv_message_page { + struct hv_message sint_message[HV_SYNIC_SINT_COUNT]; +}; + +/* Define timer message payload structure. */ +struct hv_timer_message_payload { + __u32 timer_index; + __u32 reserved; + __u64 expiration_time; /* When the timer expired */ + __u64 delivery_time; /* When the message was delivered */ +}; + +#define HV_STIMER_ENABLE (1ULL << 0) +#define HV_STIMER_PERIODIC (1ULL << 1) +#define HV_STIMER_LAZY (1ULL << 2) +#define HV_STIMER_AUTOENABLE (1ULL << 3) +#define HV_STIMER_SINT(config) (__u8)(((config) >> 16) & 0x0F) + #endif diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 62cf8c915e95..f34f666778b2 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -23,13 +23,645 @@ #include "x86.h" #include "lapic.h" +#include "ioapic.h" #include "hyperv.h" #include <linux/kvm_host.h> +#include <linux/highmem.h> +#include <asm/apicdef.h> #include <trace/events/kvm.h> #include "trace.h" +static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint) +{ + return atomic64_read(&synic->sint[sint]); +} + +static inline int synic_get_sint_vector(u64 sint_value) +{ + if (sint_value & HV_SYNIC_SINT_MASKED) + return -1; + return sint_value & HV_SYNIC_SINT_VECTOR_MASK; +} + +static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic, + int vector) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { + if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) + return true; + } + return false; +} + +static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic, + int vector) +{ + int i; + u64 sint_value; + + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { + sint_value = synic_read_sint(synic, i); + if (synic_get_sint_vector(sint_value) == vector && + sint_value & HV_SYNIC_SINT_AUTO_EOI) + return true; + } + return false; +} + +static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, u64 data) +{ + int vector; + + vector = data & HV_SYNIC_SINT_VECTOR_MASK; + if (vector < 16) + return 1; + /* + * Guest may configure multiple SINTs to use the same vector, so + * we maintain a bitmap of vectors handled by synic, and a + * bitmap of vectors with auto-eoi behavior. The bitmaps are + * updated here, and atomically queried on fast paths. + */ + + atomic64_set(&synic->sint[sint], data); + + if (synic_has_vector_connected(synic, vector)) + __set_bit(vector, synic->vec_bitmap); + else + __clear_bit(vector, synic->vec_bitmap); + + if (synic_has_vector_auto_eoi(synic, vector)) + __set_bit(vector, synic->auto_eoi_bitmap); + else + __clear_bit(vector, synic->auto_eoi_bitmap); + + /* Load SynIC vectors into EOI exit bitmap */ + kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic)); + return 0; +} + +static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id) +{ + struct kvm_vcpu *vcpu; + struct kvm_vcpu_hv_synic *synic; + + if (vcpu_id >= atomic_read(&kvm->online_vcpus)) + return NULL; + vcpu = kvm_get_vcpu(kvm, vcpu_id); + if (!vcpu) + return NULL; + synic = vcpu_to_synic(vcpu); + return (synic->active) ? synic : NULL; +} + +static void synic_clear_sint_msg_pending(struct kvm_vcpu_hv_synic *synic, + u32 sint) +{ + struct kvm_vcpu *vcpu = synic_to_vcpu(synic); + struct page *page; + gpa_t gpa; + struct hv_message *msg; + struct hv_message_page *msg_page; + + gpa = synic->msg_page & PAGE_MASK; + page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); + if (is_error_page(page)) { + vcpu_err(vcpu, "Hyper-V SynIC can't get msg page, gpa 0x%llx\n", + gpa); + return; + } + msg_page = kmap_atomic(page); + + msg = &msg_page->sint_message[sint]; + msg->header.message_flags.msg_pending = 0; + + kunmap_atomic(msg_page); + kvm_release_page_dirty(page); + kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); +} + +static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); + struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); + struct kvm_vcpu_hv_stimer *stimer; + int gsi, idx, stimers_pending; + + vcpu_debug(vcpu, "Hyper-V SynIC acked sint %d\n", sint); + + if (synic->msg_page & HV_SYNIC_SIMP_ENABLE) + synic_clear_sint_msg_pending(synic, sint); + + /* Try to deliver pending Hyper-V SynIC timers messages */ + stimers_pending = 0; + for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) { + stimer = &hv_vcpu->stimer[idx]; + if (stimer->msg_pending && + (stimer->config & HV_STIMER_ENABLE) && + HV_STIMER_SINT(stimer->config) == sint) { + set_bit(stimer->index, + hv_vcpu->stimer_pending_bitmap); + stimers_pending++; + } + } + if (stimers_pending) + kvm_make_request(KVM_REQ_HV_STIMER, vcpu); + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = atomic_read(&synic->sint_to_gsi[sint]); + if (gsi != -1) + kvm_notify_acked_gsi(kvm, gsi); + srcu_read_unlock(&kvm->irq_srcu, idx); +} + +static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr) +{ + struct kvm_vcpu *vcpu = synic_to_vcpu(synic); + struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; + + hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC; + hv_vcpu->exit.u.synic.msr = msr; + hv_vcpu->exit.u.synic.control = synic->control; + hv_vcpu->exit.u.synic.evt_page = synic->evt_page; + hv_vcpu->exit.u.synic.msg_page = synic->msg_page; + + kvm_make_request(KVM_REQ_HV_EXIT, vcpu); +} + +static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, + u32 msr, u64 data, bool host) +{ + struct kvm_vcpu *vcpu = synic_to_vcpu(synic); + int ret; + + if (!synic->active) + return 1; + + vcpu_debug(vcpu, "Hyper-V SynIC set msr 0x%x 0x%llx host %d\n", + msr, data, host); + ret = 0; + switch (msr) { + case HV_X64_MSR_SCONTROL: + synic->control = data; + if (!host) + synic_exit(synic, msr); + break; + case HV_X64_MSR_SVERSION: + if (!host) { + ret = 1; + break; + } + synic->version = data; + break; + case HV_X64_MSR_SIEFP: + if (data & HV_SYNIC_SIEFP_ENABLE) + if (kvm_clear_guest(vcpu->kvm, + data & PAGE_MASK, PAGE_SIZE)) { + ret = 1; + break; + } + synic->evt_page = data; + if (!host) + synic_exit(synic, msr); + break; + case HV_X64_MSR_SIMP: + if (data & HV_SYNIC_SIMP_ENABLE) + if (kvm_clear_guest(vcpu->kvm, + data & PAGE_MASK, PAGE_SIZE)) { + ret = 1; + break; + } + synic->msg_page = data; + if (!host) + synic_exit(synic, msr); + break; + case HV_X64_MSR_EOM: { + int i; + + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) + kvm_hv_notify_acked_sint(vcpu, i); + break; + } + case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: + ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data); + break; + default: + ret = 1; + break; + } + return ret; +} + +static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata) +{ + int ret; + + if (!synic->active) + return 1; + + ret = 0; + switch (msr) { + case HV_X64_MSR_SCONTROL: + *pdata = synic->control; + break; + case HV_X64_MSR_SVERSION: + *pdata = synic->version; + break; + case HV_X64_MSR_SIEFP: + *pdata = synic->evt_page; + break; + case HV_X64_MSR_SIMP: + *pdata = synic->msg_page; + break; + case HV_X64_MSR_EOM: + *pdata = 0; + break; + case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: + *pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]); + break; + default: + ret = 1; + break; + } + return ret; +} + +int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) +{ + struct kvm_vcpu *vcpu = synic_to_vcpu(synic); + struct kvm_lapic_irq irq; + int ret, vector; + + if (sint >= ARRAY_SIZE(synic->sint)) + return -EINVAL; + + vector = synic_get_sint_vector(synic_read_sint(synic, sint)); + if (vector < 0) + return -ENOENT; + + memset(&irq, 0, sizeof(irq)); + irq.dest_id = kvm_apic_id(vcpu->arch.apic); + irq.dest_mode = APIC_DEST_PHYSICAL; + irq.delivery_mode = APIC_DM_FIXED; + irq.vector = vector; + irq.level = 1; + + ret = kvm_irq_delivery_to_apic(vcpu->kvm, NULL, &irq, NULL); + vcpu_debug(vcpu, "Hyper-V SynIC set irq ret %d\n", ret); + return ret; +} + +int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint) +{ + struct kvm_vcpu_hv_synic *synic; + + synic = synic_get(kvm, vcpu_id); + if (!synic) + return -EINVAL; + + return synic_set_irq(synic, sint); +} + +void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector) +{ + struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); + int i; + + vcpu_debug(vcpu, "Hyper-V SynIC send eoi vec %d\n", vector); + + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) + if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) + kvm_hv_notify_acked_sint(vcpu, i); +} + +static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vcpu_id, u32 sint, int gsi) +{ + struct kvm_vcpu_hv_synic *synic; + + synic = synic_get(kvm, vcpu_id); + if (!synic) + return -EINVAL; + + if (sint >= ARRAY_SIZE(synic->sint_to_gsi)) + return -EINVAL; + + atomic_set(&synic->sint_to_gsi[sint], gsi); + return 0; +} + +void kvm_hv_irq_routing_update(struct kvm *kvm) +{ + struct kvm_irq_routing_table *irq_rt; + struct kvm_kernel_irq_routing_entry *e; + u32 gsi; + + irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, + lockdep_is_held(&kvm->irq_lock)); + + for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) { + hlist_for_each_entry(e, &irq_rt->map[gsi], link) { + if (e->type == KVM_IRQ_ROUTING_HV_SINT) + kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu, + e->hv_sint.sint, gsi); + } + } +} + +static void synic_init(struct kvm_vcpu_hv_synic *synic) +{ + int i; + + memset(synic, 0, sizeof(*synic)); + synic->version = HV_SYNIC_VERSION_1; + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { + atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED); + atomic_set(&synic->sint_to_gsi[i], -1); + } +} + +static u64 get_time_ref_counter(struct kvm *kvm) +{ + return div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); +} + +static void stimer_mark_expired(struct kvm_vcpu_hv_stimer *stimer, + bool vcpu_kick) +{ + struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); + + set_bit(stimer->index, + vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); + kvm_make_request(KVM_REQ_HV_STIMER, vcpu); + if (vcpu_kick) + kvm_vcpu_kick(vcpu); +} + +static void stimer_stop(struct kvm_vcpu_hv_stimer *stimer) +{ + hrtimer_cancel(&stimer->timer); +} + +static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer) +{ + struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); + + stimer_stop(stimer); + clear_bit(stimer->index, + vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); + stimer->msg_pending = false; +} + +static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer) +{ + struct kvm_vcpu_hv_stimer *stimer; + + stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer); + stimer_mark_expired(stimer, true); + + return HRTIMER_NORESTART; +} + +static void stimer_restart(struct kvm_vcpu_hv_stimer *stimer) +{ + u64 time_now; + ktime_t ktime_now; + u64 remainder; + + time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm); + ktime_now = ktime_get(); + + div64_u64_rem(time_now - stimer->exp_time, stimer->count, &remainder); + stimer->exp_time = time_now + (stimer->count - remainder); + + hrtimer_start(&stimer->timer, + ktime_add_ns(ktime_now, + 100 * (stimer->exp_time - time_now)), + HRTIMER_MODE_ABS); +} + +static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) +{ + u64 time_now; + ktime_t ktime_now; + + time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm); + ktime_now = ktime_get(); + + if (stimer->config & HV_STIMER_PERIODIC) { + if (stimer->count == 0) + return -EINVAL; + + stimer->exp_time = time_now + stimer->count; + hrtimer_start(&stimer->timer, + ktime_add_ns(ktime_now, 100 * stimer->count), + HRTIMER_MODE_ABS); + return 0; + } + stimer->exp_time = stimer->count; + if (time_now >= stimer->count) { + /* + * Expire timer according to Hypervisor Top-Level Functional + * specification v4(15.3.1): + * "If a one shot is enabled and the specified count is in + * the past, it will expire immediately." + */ + stimer_mark_expired(stimer, false); + return 0; + } + + hrtimer_start(&stimer->timer, + ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)), + HRTIMER_MODE_ABS); + return 0; +} + +static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, + bool host) +{ + if (stimer->count == 0 || HV_STIMER_SINT(config) == 0) + config &= ~HV_STIMER_ENABLE; + stimer->config = config; + stimer_cleanup(stimer); + if (stimer->config & HV_STIMER_ENABLE) + if (stimer_start(stimer)) + return 1; + return 0; +} + +static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, + bool host) +{ + stimer->count = count; + + stimer_cleanup(stimer); + if (stimer->count == 0) + stimer->config &= ~HV_STIMER_ENABLE; + else if (stimer->config & HV_STIMER_AUTOENABLE) { + stimer->config |= HV_STIMER_ENABLE; + if (stimer_start(stimer)) + return 1; + } + + return 0; +} + +static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig) +{ + *pconfig = stimer->config; + return 0; +} + +static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount) +{ + *pcount = stimer->count; + return 0; +} + +static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint, + struct hv_message *src_msg) +{ + struct kvm_vcpu *vcpu = synic_to_vcpu(synic); + struct page *page; + gpa_t gpa; + struct hv_message *dst_msg; + int r; + struct hv_message_page *msg_page; + + if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE)) + return -ENOENT; + + gpa = synic->msg_page & PAGE_MASK; + page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); + if (is_error_page(page)) + return -EFAULT; + + msg_page = kmap_atomic(page); + dst_msg = &msg_page->sint_message[sint]; + if (sync_cmpxchg(&dst_msg->header.message_type, HVMSG_NONE, + src_msg->header.message_type) != HVMSG_NONE) { + dst_msg->header.message_flags.msg_pending = 1; + r = -EAGAIN; + } else { + memcpy(&dst_msg->u.payload, &src_msg->u.payload, + src_msg->header.payload_size); + dst_msg->header.message_type = src_msg->header.message_type; + dst_msg->header.payload_size = src_msg->header.payload_size; + r = synic_set_irq(synic, sint); + if (r >= 1) + r = 0; + else if (r == 0) + r = -EFAULT; + } + kunmap_atomic(msg_page); + kvm_release_page_dirty(page); + kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); + return r; +} + +static void stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer) +{ + struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); + struct hv_message *msg = &stimer->msg; + struct hv_timer_message_payload *payload = + (struct hv_timer_message_payload *)&msg->u.payload; + int r; + + stimer->msg_pending = true; + payload->expiration_time = stimer->exp_time; + payload->delivery_time = get_time_ref_counter(vcpu->kvm); + r = synic_deliver_msg(vcpu_to_synic(vcpu), + HV_STIMER_SINT(stimer->config), msg); + if (!r) + stimer->msg_pending = false; +} + +static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer) +{ + stimer_send_msg(stimer); + if (!(stimer->config & HV_STIMER_PERIODIC)) + stimer->config |= ~HV_STIMER_ENABLE; + else + stimer_restart(stimer); +} + +void kvm_hv_process_stimers(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); + struct kvm_vcpu_hv_stimer *stimer; + u64 time_now; + int i; + + for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) + if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) { + stimer = &hv_vcpu->stimer[i]; + if (stimer->config & HV_STIMER_ENABLE) { + time_now = get_time_ref_counter(vcpu->kvm); + if (time_now >= stimer->exp_time) + stimer_expiration(stimer); + } + } +} + +void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); + int i; + + for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) + stimer_cleanup(&hv_vcpu->stimer[i]); +} + +static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer) +{ + struct hv_message *msg = &stimer->msg; + struct hv_timer_message_payload *payload = + (struct hv_timer_message_payload *)&msg->u.payload; + + memset(&msg->header, 0, sizeof(msg->header)); + msg->header.message_type = HVMSG_TIMER_EXPIRED; + msg->header.payload_size = sizeof(*payload); + + payload->timer_index = stimer->index; + payload->expiration_time = 0; + payload->delivery_time = 0; +} + +static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index) +{ + memset(stimer, 0, sizeof(*stimer)); + stimer->index = timer_index; + hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + stimer->timer.function = stimer_timer_callback; + stimer_prepare_msg(stimer); +} + +void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); + int i; + + synic_init(&hv_vcpu->synic); + + bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); + for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) + stimer_init(&hv_vcpu->stimer[i], i); +} + +int kvm_hv_activate_synic(struct kvm_vcpu *vcpu) +{ + /* + * Hyper-V SynIC auto EOI SINT's are + * not compatible with APICV, so deactivate APICV + */ + kvm_vcpu_deactivate_apicv(vcpu); + vcpu_to_synic(vcpu)->active = true; + return 0; +} + static bool kvm_hv_msr_partition_wide(u32 msr) { bool r = false; @@ -226,6 +858,31 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) return 1; hv->runtime_offset = data - current_task_runtime_100ns(); break; + case HV_X64_MSR_SCONTROL: + case HV_X64_MSR_SVERSION: + case HV_X64_MSR_SIEFP: + case HV_X64_MSR_SIMP: + case HV_X64_MSR_EOM: + case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: + return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host); + case HV_X64_MSR_STIMER0_CONFIG: + case HV_X64_MSR_STIMER1_CONFIG: + case HV_X64_MSR_STIMER2_CONFIG: + case HV_X64_MSR_STIMER3_CONFIG: { + int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; + + return stimer_set_config(vcpu_to_stimer(vcpu, timer_index), + data, host); + } + case HV_X64_MSR_STIMER0_COUNT: + case HV_X64_MSR_STIMER1_COUNT: + case HV_X64_MSR_STIMER2_COUNT: + case HV_X64_MSR_STIMER3_COUNT: { + int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; + + return stimer_set_count(vcpu_to_stimer(vcpu, timer_index), + data, host); + } default: vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", msr, data); @@ -248,11 +905,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case HV_X64_MSR_HYPERCALL: data = hv->hv_hypercall; break; - case HV_X64_MSR_TIME_REF_COUNT: { - data = - div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); + case HV_X64_MSR_TIME_REF_COUNT: + data = get_time_ref_counter(kvm); break; - } case HV_X64_MSR_REFERENCE_TSC: data = hv->hv_tsc_page; break; @@ -304,6 +959,31 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case HV_X64_MSR_VP_RUNTIME: data = current_task_runtime_100ns() + hv->runtime_offset; break; + case HV_X64_MSR_SCONTROL: + case HV_X64_MSR_SVERSION: + case HV_X64_MSR_SIEFP: + case HV_X64_MSR_SIMP: + case HV_X64_MSR_EOM: + case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: + return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata); + case HV_X64_MSR_STIMER0_CONFIG: + case HV_X64_MSR_STIMER1_CONFIG: + case HV_X64_MSR_STIMER2_CONFIG: + case HV_X64_MSR_STIMER3_CONFIG: { + int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; + + return stimer_get_config(vcpu_to_stimer(vcpu, timer_index), + pdata); + } + case HV_X64_MSR_STIMER0_COUNT: + case HV_X64_MSR_STIMER1_COUNT: + case HV_X64_MSR_STIMER2_COUNT: + case HV_X64_MSR_STIMER3_COUNT: { + int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; + + return stimer_get_count(vcpu_to_stimer(vcpu, timer_index), + pdata); + } default: vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index c7bce559f67b..60eccd4bd1d3 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -24,9 +24,64 @@ #ifndef __ARCH_X86_KVM_HYPERV_H__ #define __ARCH_X86_KVM_HYPERV_H__ +static inline struct kvm_vcpu_hv *vcpu_to_hv_vcpu(struct kvm_vcpu *vcpu) +{ + return &vcpu->arch.hyperv; +} + +static inline struct kvm_vcpu *hv_vcpu_to_vcpu(struct kvm_vcpu_hv *hv_vcpu) +{ + struct kvm_vcpu_arch *arch; + + arch = container_of(hv_vcpu, struct kvm_vcpu_arch, hyperv); + return container_of(arch, struct kvm_vcpu, arch); +} + +static inline struct kvm_vcpu_hv_synic *vcpu_to_synic(struct kvm_vcpu *vcpu) +{ + return &vcpu->arch.hyperv.synic; +} + +static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic) +{ + return hv_vcpu_to_vcpu(container_of(synic, struct kvm_vcpu_hv, synic)); +} + int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host); int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); + bool kvm_hv_hypercall_enabled(struct kvm *kvm); int kvm_hv_hypercall(struct kvm_vcpu *vcpu); +void kvm_hv_irq_routing_update(struct kvm *kvm); +int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint); +void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector); +int kvm_hv_activate_synic(struct kvm_vcpu *vcpu); + +void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu); +void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu); + +static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu, + int timer_index) +{ + return &vcpu_to_hv_vcpu(vcpu)->stimer[timer_index]; +} + +static inline struct kvm_vcpu *stimer_to_vcpu(struct kvm_vcpu_hv_stimer *stimer) +{ + struct kvm_vcpu_hv *hv_vcpu; + + hv_vcpu = container_of(stimer - stimer->index, struct kvm_vcpu_hv, + stimer[0]); + return hv_vcpu_to_vcpu(hv_vcpu); +} + +static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu) +{ + return !bitmap_empty(vcpu->arch.hyperv.stimer_pending_bitmap, + HV_SYNIC_STIMER_COUNT); +} + +void kvm_hv_process_stimers(struct kvm_vcpu *vcpu); + #endif diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 88d0a92d3f94..1facfd60b04a 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -233,7 +233,7 @@ static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr) } -void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) { struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; union kvm_ioapic_redirect_entry *e; @@ -250,7 +250,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) (e->fields.trig_mode == IOAPIC_EDGE_TRIG && kvm_apic_pending_eoi(vcpu, e->fields.vector))) __set_bit(e->fields.vector, - (unsigned long *)eoi_exit_bitmap); + ioapic_handled_vectors); } } spin_unlock(&ioapic->lock); diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 084617d37c74..2d16dc251d81 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -121,7 +121,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, unsigned long *dest_map); int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); -void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); -void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); - +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, + ulong *ioapic_handled_vectors); +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, + ulong *ioapic_handled_vectors); #endif diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 097060e33bd6..3982b479bb5f 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -76,7 +76,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) if (kvm_cpu_has_extint(v)) return 1; - if (kvm_vcpu_apic_vid_enabled(v)) + if (kvm_vcpu_apicv_active(v)) return 0; return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 84b96d319909..8fc89efb5250 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -33,6 +33,8 @@ #include "lapic.h" +#include "hyperv.h" + static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status) @@ -219,6 +221,16 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, srcu_read_unlock(&kvm->irq_srcu, idx); } +static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + if (!level) + return -1; + + return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint); +} + int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { @@ -257,6 +269,11 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, e->msi.address_hi = ue->u.msi.address_hi; e->msi.data = ue->u.msi.data; break; + case KVM_IRQ_ROUTING_HV_SINT: + e->set = kvm_hv_set_sint; + e->hv_sint.vcpu = ue->u.hv_sint.vcpu; + e->hv_sint.sint = ue->u.hv_sint.sint; + break; default: goto out; } @@ -332,14 +349,15 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm) return kvm_set_irq_routing(kvm, empty_routing, 0, 0); } -void kvm_arch_irq_routing_update(struct kvm *kvm) +void kvm_arch_post_irq_routing_update(struct kvm *kvm) { if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm)) return; kvm_make_scan_ioapic_request(kvm); } -void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, + ulong *ioapic_handled_vectors) { struct kvm *kvm = vcpu->kvm; struct kvm_kernel_irq_routing_entry *entry; @@ -369,9 +387,26 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) u32 vector = entry->msi.data & 0xff; __set_bit(vector, - (unsigned long *) eoi_exit_bitmap); + ioapic_handled_vectors); } } } srcu_read_unlock(&kvm->irq_srcu, idx); } + +int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, + int irq_source_id, int level, bool line_status) +{ + switch (irq->type) { + case KVM_IRQ_ROUTING_HV_SINT: + return kvm_hv_set_sint(irq, kvm, irq_source_id, level, + line_status); + default: + return -EWOULDBLOCK; + } +} + +void kvm_arch_irq_routing_update(struct kvm *kvm) +{ + kvm_hv_irq_routing_update(kvm); +} diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4d30b865be30..36591faed13b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -41,6 +41,7 @@ #include "trace.h" #include "x86.h" #include "cpuid.h" +#include "hyperv.h" #ifndef CONFIG_X86_64 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) @@ -128,11 +129,6 @@ static inline int apic_enabled(struct kvm_lapic *apic) (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) -static inline int kvm_apic_id(struct kvm_lapic *apic) -{ - return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; -} - /* The logical map is definitely wrong if we have multiple * modes at the same time. (Physical map is always right.) */ @@ -379,7 +375,8 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) if (!apic->irr_pending) return -1; - kvm_x86_ops->sync_pir_to_irr(apic->vcpu); + if (apic->vcpu->arch.apicv_active) + kvm_x86_ops->sync_pir_to_irr(apic->vcpu); result = apic_search_irr(apic); ASSERT(result == -1 || result >= 16); @@ -392,7 +389,7 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) vcpu = apic->vcpu; - if (unlikely(kvm_vcpu_apic_vid_enabled(vcpu))) { + if (unlikely(vcpu->arch.apicv_active)) { /* try to update RVI */ apic_clear_vector(vec, apic->regs + APIC_IRR); kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -418,7 +415,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic) * because the processor can modify ISR under the hood. Instead * just set SVI. */ - if (unlikely(kvm_x86_ops->hwapic_isr_update)) + if (unlikely(vcpu->arch.apicv_active)) kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec); else { ++apic->isr_count; @@ -466,7 +463,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) * on the other hand isr_count and highest_isr_cache are unused * and must be left alone. */ - if (unlikely(kvm_x86_ops->hwapic_isr_update)) + if (unlikely(vcpu->arch.apicv_active)) kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); else { @@ -852,7 +849,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, apic_clear_vector(vector, apic->regs + APIC_TMR); } - if (kvm_x86_ops->deliver_posted_interrupt) + if (vcpu->arch.apicv_active) kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); else { apic_set_irr(vector, apic); @@ -932,7 +929,7 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector) { - return test_bit(vector, (ulong *)apic->vcpu->arch.eoi_exit_bitmap); + return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors); } static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) @@ -974,6 +971,9 @@ static int apic_set_eoi(struct kvm_lapic *apic) apic_clear_isr(vector, apic); apic_update_ppr(apic); + if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap)) + kvm_hv_synic_send_eoi(apic->vcpu, vector); + kvm_ioapic_send_eoi(apic, vector); kvm_make_request(KVM_REQ_EVENT, apic->vcpu); return vector; @@ -1225,7 +1225,7 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu) int vec = reg & APIC_VECTOR_MASK; void *bitmap = apic->regs + APIC_ISR; - if (kvm_x86_ops->deliver_posted_interrupt) + if (vcpu->arch.apicv_active) bitmap = apic->regs + APIC_IRR; if (apic_test_vector(vec, bitmap)) @@ -1693,8 +1693,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); } - apic->irr_pending = kvm_vcpu_apic_vid_enabled(vcpu); - apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0; + apic->irr_pending = vcpu->arch.apicv_active; + apic->isr_count = vcpu->arch.apicv_active ? 1 : 0; apic->highest_isr_cache = -1; update_divide_count(apic); atomic_set(&apic->lapic_timer.pending, 0); @@ -1883,6 +1883,12 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) apic_set_isr(vector, apic); apic_update_ppr(apic); apic_clear_irr(vector, apic); + + if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) { + apic_clear_isr(vector, apic); + apic_update_ppr(apic); + } + return vector; } @@ -1906,15 +1912,15 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, update_divide_count(apic); start_apic_timer(apic); apic->irr_pending = true; - apic->isr_count = kvm_x86_ops->hwapic_isr_update ? + apic->isr_count = vcpu->arch.apicv_active ? 1 : count_vectors(apic->regs + APIC_ISR); apic->highest_isr_cache = -1; - if (kvm_x86_ops->hwapic_irr_update) + if (vcpu->arch.apicv_active) { kvm_x86_ops->hwapic_irr_update(vcpu, apic_find_highest_irr(apic)); - if (unlikely(kvm_x86_ops->hwapic_isr_update)) kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); + } kvm_make_request(KVM_REQ_EVENT, vcpu); if (ioapic_in_kernel(vcpu->kvm)) kvm_rtc_eoi_tracking_restore_one(vcpu); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index fde8e35d5850..41bdb35b4b67 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -143,9 +143,9 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic) return apic->vcpu->arch.apic_base & X2APIC_ENABLE; } -static inline bool kvm_vcpu_apic_vid_enabled(struct kvm_vcpu *vcpu) +static inline bool kvm_vcpu_apicv_active(struct kvm_vcpu *vcpu) { - return kvm_x86_ops->cpu_uses_apicv(vcpu); + return vcpu->arch.apic && vcpu->arch.apicv_active; } static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) @@ -164,6 +164,11 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) return kvm_vcpu_has_lapic(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); } +static inline int kvm_apic_id(struct kvm_lapic *apic) +{ + return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; +} + bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); void wait_lapic_expire(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e7c2c1428a69..7f5a82bb61e9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -311,11 +311,6 @@ static int is_large_pte(u64 pte) return pte & PT_PAGE_SIZE_MASK; } -static int is_rmap_spte(u64 pte) -{ - return is_shadow_present_pte(pte); -} - static int is_last_spte(u64 pte, int level) { if (level == PT_PAGE_TABLE_LEVEL) @@ -540,7 +535,7 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) u64 old_spte = *sptep; bool ret = false; - WARN_ON(!is_rmap_spte(new_spte)); + WARN_ON(!is_shadow_present_pte(new_spte)); if (!is_shadow_present_pte(old_spte)) { mmu_spte_set(sptep, new_spte); @@ -595,7 +590,7 @@ static int mmu_spte_clear_track_bits(u64 *sptep) else old_spte = __update_clear_spte_slow(sptep, 0ull); - if (!is_rmap_spte(old_spte)) + if (!is_shadow_present_pte(old_spte)) return 0; pfn = spte_to_pfn(old_spte); @@ -909,36 +904,35 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn, } /* - * Pte mapping structures: + * About rmap_head encoding: * - * If pte_list bit zero is zero, then pte_list point to the spte. - * - * If pte_list bit zero is one, (then pte_list & ~1) points to a struct + * If the bit zero of rmap_head->val is clear, then it points to the only spte + * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct * pte_list_desc containing more mappings. - * - * Returns the number of pte entries before the spte was added or zero if - * the spte was not added. - * + */ + +/* + * Returns the number of pointers in the rmap chain, not counting the new one. */ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte, - unsigned long *pte_list) + struct kvm_rmap_head *rmap_head) { struct pte_list_desc *desc; int i, count = 0; - if (!*pte_list) { + if (!rmap_head->val) { rmap_printk("pte_list_add: %p %llx 0->1\n", spte, *spte); - *pte_list = (unsigned long)spte; - } else if (!(*pte_list & 1)) { + rmap_head->val = (unsigned long)spte; + } else if (!(rmap_head->val & 1)) { rmap_printk("pte_list_add: %p %llx 1->many\n", spte, *spte); desc = mmu_alloc_pte_list_desc(vcpu); - desc->sptes[0] = (u64 *)*pte_list; + desc->sptes[0] = (u64 *)rmap_head->val; desc->sptes[1] = spte; - *pte_list = (unsigned long)desc | 1; + rmap_head->val = (unsigned long)desc | 1; ++count; } else { rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte); - desc = (struct pte_list_desc *)(*pte_list & ~1ul); + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); while (desc->sptes[PTE_LIST_EXT-1] && desc->more) { desc = desc->more; count += PTE_LIST_EXT; @@ -955,8 +949,9 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte, } static void -pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc, - int i, struct pte_list_desc *prev_desc) +pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head, + struct pte_list_desc *desc, int i, + struct pte_list_desc *prev_desc) { int j; @@ -967,43 +962,43 @@ pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc, if (j != 0) return; if (!prev_desc && !desc->more) - *pte_list = (unsigned long)desc->sptes[0]; + rmap_head->val = (unsigned long)desc->sptes[0]; else if (prev_desc) prev_desc->more = desc->more; else - *pte_list = (unsigned long)desc->more | 1; + rmap_head->val = (unsigned long)desc->more | 1; mmu_free_pte_list_desc(desc); } -static void pte_list_remove(u64 *spte, unsigned long *pte_list) +static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head) { struct pte_list_desc *desc; struct pte_list_desc *prev_desc; int i; - if (!*pte_list) { + if (!rmap_head->val) { printk(KERN_ERR "pte_list_remove: %p 0->BUG\n", spte); BUG(); - } else if (!(*pte_list & 1)) { + } else if (!(rmap_head->val & 1)) { rmap_printk("pte_list_remove: %p 1->0\n", spte); - if ((u64 *)*pte_list != spte) { + if ((u64 *)rmap_head->val != spte) { printk(KERN_ERR "pte_list_remove: %p 1->BUG\n", spte); BUG(); } - *pte_list = 0; + rmap_head->val = 0; } else { rmap_printk("pte_list_remove: %p many->many\n", spte); - desc = (struct pte_list_desc *)(*pte_list & ~1ul); + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); prev_desc = NULL; while (desc) { - for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) + for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) { if (desc->sptes[i] == spte) { - pte_list_desc_remove_entry(pte_list, - desc, i, - prev_desc); + pte_list_desc_remove_entry(rmap_head, + desc, i, prev_desc); return; } + } prev_desc = desc; desc = desc->more; } @@ -1012,28 +1007,8 @@ static void pte_list_remove(u64 *spte, unsigned long *pte_list) } } -typedef void (*pte_list_walk_fn) (u64 *spte); -static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) -{ - struct pte_list_desc *desc; - int i; - - if (!*pte_list) - return; - - if (!(*pte_list & 1)) - return fn((u64 *)*pte_list); - - desc = (struct pte_list_desc *)(*pte_list & ~1ul); - while (desc) { - for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) - fn(desc->sptes[i]); - desc = desc->more; - } -} - -static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, - struct kvm_memory_slot *slot) +static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level, + struct kvm_memory_slot *slot) { unsigned long idx; @@ -1041,10 +1016,8 @@ static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx]; } -/* - * Take gfn and return the reverse mapping to it. - */ -static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, struct kvm_mmu_page *sp) +static struct kvm_rmap_head *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, + struct kvm_mmu_page *sp) { struct kvm_memslots *slots; struct kvm_memory_slot *slot; @@ -1065,24 +1038,24 @@ static bool rmap_can_add(struct kvm_vcpu *vcpu) static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) { struct kvm_mmu_page *sp; - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; sp = page_header(__pa(spte)); kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn); - rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp); - return pte_list_add(vcpu, spte, rmapp); + rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp); + return pte_list_add(vcpu, spte, rmap_head); } static void rmap_remove(struct kvm *kvm, u64 *spte) { struct kvm_mmu_page *sp; gfn_t gfn; - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; sp = page_header(__pa(spte)); gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); - rmapp = gfn_to_rmap(kvm, gfn, sp); - pte_list_remove(spte, rmapp); + rmap_head = gfn_to_rmap(kvm, gfn, sp); + pte_list_remove(spte, rmap_head); } /* @@ -1102,19 +1075,26 @@ struct rmap_iterator { * * Returns sptep if found, NULL otherwise. */ -static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter) +static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head, + struct rmap_iterator *iter) { - if (!rmap) + u64 *sptep; + + if (!rmap_head->val) return NULL; - if (!(rmap & 1)) { + if (!(rmap_head->val & 1)) { iter->desc = NULL; - return (u64 *)rmap; + sptep = (u64 *)rmap_head->val; + goto out; } - iter->desc = (struct pte_list_desc *)(rmap & ~1ul); + iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); iter->pos = 0; - return iter->desc->sptes[iter->pos]; + sptep = iter->desc->sptes[iter->pos]; +out: + BUG_ON(!is_shadow_present_pte(*sptep)); + return sptep; } /* @@ -1124,14 +1104,14 @@ static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter) */ static u64 *rmap_get_next(struct rmap_iterator *iter) { + u64 *sptep; + if (iter->desc) { if (iter->pos < PTE_LIST_EXT - 1) { - u64 *sptep; - ++iter->pos; sptep = iter->desc->sptes[iter->pos]; if (sptep) - return sptep; + goto out; } iter->desc = iter->desc->more; @@ -1139,17 +1119,20 @@ static u64 *rmap_get_next(struct rmap_iterator *iter) if (iter->desc) { iter->pos = 0; /* desc->sptes[0] cannot be NULL */ - return iter->desc->sptes[iter->pos]; + sptep = iter->desc->sptes[iter->pos]; + goto out; } } return NULL; +out: + BUG_ON(!is_shadow_present_pte(*sptep)); + return sptep; } -#define for_each_rmap_spte(_rmap_, _iter_, _spte_) \ - for (_spte_ = rmap_get_first(*_rmap_, _iter_); \ - _spte_ && ({BUG_ON(!is_shadow_present_pte(*_spte_)); 1;}); \ - _spte_ = rmap_get_next(_iter_)) +#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_) \ + for (_spte_ = rmap_get_first(_rmap_head_, _iter_); \ + _spte_; _spte_ = rmap_get_next(_iter_)) static void drop_spte(struct kvm *kvm, u64 *sptep) { @@ -1207,14 +1190,15 @@ static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect) return mmu_spte_update(sptep, spte); } -static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, +static bool __rmap_write_protect(struct kvm *kvm, + struct kvm_rmap_head *rmap_head, bool pt_protect) { u64 *sptep; struct rmap_iterator iter; bool flush = false; - for_each_rmap_spte(rmapp, &iter, sptep) + for_each_rmap_spte(rmap_head, &iter, sptep) flush |= spte_write_protect(kvm, sptep, pt_protect); return flush; @@ -1231,13 +1215,13 @@ static bool spte_clear_dirty(struct kvm *kvm, u64 *sptep) return mmu_spte_update(sptep, spte); } -static bool __rmap_clear_dirty(struct kvm *kvm, unsigned long *rmapp) +static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) { u64 *sptep; struct rmap_iterator iter; bool flush = false; - for_each_rmap_spte(rmapp, &iter, sptep) + for_each_rmap_spte(rmap_head, &iter, sptep) flush |= spte_clear_dirty(kvm, sptep); return flush; @@ -1254,13 +1238,13 @@ static bool spte_set_dirty(struct kvm *kvm, u64 *sptep) return mmu_spte_update(sptep, spte); } -static bool __rmap_set_dirty(struct kvm *kvm, unsigned long *rmapp) +static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) { u64 *sptep; struct rmap_iterator iter; bool flush = false; - for_each_rmap_spte(rmapp, &iter, sptep) + for_each_rmap_spte(rmap_head, &iter, sptep) flush |= spte_set_dirty(kvm, sptep); return flush; @@ -1280,12 +1264,12 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) { - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; while (mask) { - rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), - PT_PAGE_TABLE_LEVEL, slot); - __rmap_write_protect(kvm, rmapp, false); + rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), + PT_PAGE_TABLE_LEVEL, slot); + __rmap_write_protect(kvm, rmap_head, false); /* clear the first set bit */ mask &= mask - 1; @@ -1305,12 +1289,12 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) { - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; while (mask) { - rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), - PT_PAGE_TABLE_LEVEL, slot); - __rmap_clear_dirty(kvm, rmapp); + rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), + PT_PAGE_TABLE_LEVEL, slot); + __rmap_clear_dirty(kvm, rmap_head); /* clear the first set bit */ mask &= mask - 1; @@ -1342,28 +1326,27 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) { struct kvm_memory_slot *slot; - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; int i; bool write_protected = false; slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { - rmapp = __gfn_to_rmap(gfn, i, slot); - write_protected |= __rmap_write_protect(vcpu->kvm, rmapp, true); + rmap_head = __gfn_to_rmap(gfn, i, slot); + write_protected |= __rmap_write_protect(vcpu->kvm, rmap_head, true); } return write_protected; } -static bool kvm_zap_rmapp(struct kvm *kvm, unsigned long *rmapp) +static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head) { u64 *sptep; struct rmap_iterator iter; bool flush = false; - while ((sptep = rmap_get_first(*rmapp, &iter))) { - BUG_ON(!(*sptep & PT_PRESENT_MASK)); + while ((sptep = rmap_get_first(rmap_head, &iter))) { rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep); drop_spte(kvm, sptep); @@ -1373,14 +1356,14 @@ static bool kvm_zap_rmapp(struct kvm *kvm, unsigned long *rmapp) return flush; } -static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, +static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long data) { - return kvm_zap_rmapp(kvm, rmapp); + return kvm_zap_rmapp(kvm, rmap_head); } -static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, +static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long data) { @@ -1395,7 +1378,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, new_pfn = pte_pfn(*ptep); restart: - for_each_rmap_spte(rmapp, &iter, sptep) { + for_each_rmap_spte(rmap_head, &iter, sptep) { rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n", sptep, *sptep, gfn, level); @@ -1433,11 +1416,11 @@ struct slot_rmap_walk_iterator { /* output fields. */ gfn_t gfn; - unsigned long *rmap; + struct kvm_rmap_head *rmap; int level; /* private field. */ - unsigned long *end_rmap; + struct kvm_rmap_head *end_rmap; }; static void @@ -1496,7 +1479,7 @@ static int kvm_handle_hva_range(struct kvm *kvm, unsigned long end, unsigned long data, int (*handler)(struct kvm *kvm, - unsigned long *rmapp, + struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, @@ -1540,7 +1523,8 @@ static int kvm_handle_hva_range(struct kvm *kvm, static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, unsigned long data, - int (*handler)(struct kvm *kvm, unsigned long *rmapp, + int (*handler)(struct kvm *kvm, + struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long data)) @@ -1563,7 +1547,7 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp); } -static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, +static int kvm_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long data) { @@ -1573,18 +1557,19 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, BUG_ON(!shadow_accessed_mask); - for_each_rmap_spte(rmapp, &iter, sptep) + for_each_rmap_spte(rmap_head, &iter, sptep) { if (*sptep & shadow_accessed_mask) { young = 1; clear_bit((ffs(shadow_accessed_mask) - 1), (unsigned long *)sptep); } + } trace_kvm_age_page(gfn, level, slot, young); return young; } -static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, +static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long data) { @@ -1600,11 +1585,12 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, if (!shadow_accessed_mask) goto out; - for_each_rmap_spte(rmapp, &iter, sptep) + for_each_rmap_spte(rmap_head, &iter, sptep) { if (*sptep & shadow_accessed_mask) { young = 1; break; } + } out: return young; } @@ -1613,14 +1599,14 @@ out: static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) { - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; struct kvm_mmu_page *sp; sp = page_header(__pa(spte)); - rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp); + rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp); - kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0); + kvm_unmap_rmapp(vcpu->kvm, rmap_head, NULL, gfn, sp->role.level, 0); kvm_flush_remote_tlbs(vcpu->kvm); } @@ -1720,8 +1706,7 @@ static void drop_parent_pte(struct kvm_mmu_page *sp, mmu_spte_clear_no_track(parent_pte); } -static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, - u64 *parent_pte, int direct) +static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct) { struct kvm_mmu_page *sp; @@ -1737,8 +1722,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, * this feature. See the comments in kvm_zap_obsolete_pages(). */ list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); - sp->parent_ptes = 0; - mmu_page_add_parent_pte(vcpu, sp, parent_pte); kvm_mod_used_mmu_pages(vcpu->kvm, +1); return sp; } @@ -1746,7 +1729,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, static void mark_unsync(u64 *spte); static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) { - pte_list_walk(&sp->parent_ptes, mark_unsync); + u64 *sptep; + struct rmap_iterator iter; + + for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) { + mark_unsync(sptep); + } } static void mark_unsync(u64 *spte) @@ -1806,6 +1794,13 @@ static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, return (pvec->nr == KVM_PAGE_ARRAY_NR); } +static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx) +{ + --sp->unsync_children; + WARN_ON((int)sp->unsync_children < 0); + __clear_bit(idx, sp->unsync_child_bitmap); +} + static int __mmu_unsync_walk(struct kvm_mmu_page *sp, struct kvm_mmu_pages *pvec) { @@ -1815,8 +1810,10 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, struct kvm_mmu_page *child; u64 ent = sp->spt[i]; - if (!is_shadow_present_pte(ent) || is_large_pte(ent)) - goto clear_child_bitmap; + if (!is_shadow_present_pte(ent) || is_large_pte(ent)) { + clear_unsync_child_bit(sp, i); + continue; + } child = page_header(ent & PT64_BASE_ADDR_MASK); @@ -1825,28 +1822,21 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, return -ENOSPC; ret = __mmu_unsync_walk(child, pvec); - if (!ret) - goto clear_child_bitmap; - else if (ret > 0) + if (!ret) { + clear_unsync_child_bit(sp, i); + continue; + } else if (ret > 0) { nr_unsync_leaf += ret; - else + } else return ret; } else if (child->unsync) { nr_unsync_leaf++; if (mmu_pages_add(pvec, child, i)) return -ENOSPC; } else - goto clear_child_bitmap; - - continue; - -clear_child_bitmap: - __clear_bit(i, sp->unsync_child_bitmap); - sp->unsync_children--; - WARN_ON((int)sp->unsync_children < 0); + clear_unsync_child_bit(sp, i); } - return nr_unsync_leaf; } @@ -2009,9 +1999,7 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents) if (!sp) return; - --sp->unsync_children; - WARN_ON((int)sp->unsync_children < 0); - __clear_bit(idx, sp->unsync_child_bitmap); + clear_unsync_child_bit(sp, idx); level++; } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children); } @@ -2053,14 +2041,6 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, } } -static void init_shadow_page_table(struct kvm_mmu_page *sp) -{ - int i; - - for (i = 0; i < PT64_ENT_PER_PAGE; ++i) - sp->spt[i] = 0ull; -} - static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp) { sp->write_flooding_count = 0; @@ -2083,8 +2063,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, gva_t gaddr, unsigned level, int direct, - unsigned access, - u64 *parent_pte) + unsigned access) { union kvm_mmu_page_role role; unsigned quadrant; @@ -2116,21 +2095,18 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, if (sp->unsync && kvm_sync_page_transient(vcpu, sp)) break; - mmu_page_add_parent_pte(vcpu, sp, parent_pte); - if (sp->unsync_children) { + if (sp->unsync_children) kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); - kvm_mmu_mark_parents_unsync(sp); - } else if (sp->unsync) - kvm_mmu_mark_parents_unsync(sp); __clear_sp_write_flooding_count(sp); trace_kvm_mmu_get_page(sp, false); return sp; } + ++vcpu->kvm->stat.mmu_cache_miss; - sp = kvm_mmu_alloc_page(vcpu, parent_pte, direct); - if (!sp) - return sp; + + sp = kvm_mmu_alloc_page(vcpu, direct); + sp->gfn = gfn; sp->role = role; hlist_add_head(&sp->hash_link, @@ -2144,7 +2120,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, account_shadowed(vcpu->kvm, sp); } sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; - init_shadow_page_table(sp); + clear_page(sp->spt); trace_kvm_mmu_get_page(sp, true); return sp; } @@ -2198,7 +2174,8 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) return __shadow_walk_next(iterator, *iterator->sptep); } -static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed) +static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, + struct kvm_mmu_page *sp) { u64 spte; @@ -2206,12 +2183,14 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed) VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | - shadow_user_mask | shadow_x_mask; - - if (accessed) - spte |= shadow_accessed_mask; + shadow_user_mask | shadow_x_mask | shadow_accessed_mask; mmu_spte_set(sptep, spte); + + mmu_page_add_parent_pte(vcpu, sp, sptep); + + if (sp->unsync_children || sp->unsync) + mark_unsync(sptep); } static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, @@ -2270,17 +2249,12 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm, mmu_page_zap_pte(kvm, sp, sp->spt + i); } -static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte) -{ - mmu_page_remove_parent_pte(sp, parent_pte); -} - static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) { u64 *sptep; struct rmap_iterator iter; - while ((sptep = rmap_get_first(sp->parent_ptes, &iter))) + while ((sptep = rmap_get_first(&sp->parent_ptes, &iter))) drop_parent_pte(sp, sptep); } @@ -2564,18 +2538,18 @@ done: return ret; } -static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, - unsigned pte_access, int write_fault, int *emulate, - int level, gfn_t gfn, pfn_t pfn, bool speculative, - bool host_writable) +static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, + int write_fault, int level, gfn_t gfn, pfn_t pfn, + bool speculative, bool host_writable) { int was_rmapped = 0; int rmap_count; + bool emulate = false; pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__, *sptep, write_fault, gfn); - if (is_rmap_spte(*sptep)) { + if (is_shadow_present_pte(*sptep)) { /* * If we overwrite a PTE page pointer with a 2MB PMD, unlink * the parent of the now unreachable PTE. @@ -2600,12 +2574,12 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative, true, host_writable)) { if (write_fault) - *emulate = 1; + emulate = true; kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } - if (unlikely(is_mmio_spte(*sptep) && emulate)) - *emulate = 1; + if (unlikely(is_mmio_spte(*sptep))) + emulate = true; pgprintk("%s: setting spte %llx\n", __func__, *sptep); pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n", @@ -2624,6 +2598,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, } kvm_release_pfn_clean(pfn); + + return emulate; } static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, @@ -2658,9 +2634,8 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, return -1; for (i = 0; i < ret; i++, gfn++, start++) - mmu_set_spte(vcpu, start, access, 0, NULL, - sp->role.level, gfn, page_to_pfn(pages[i]), - true, true); + mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn, + page_to_pfn(pages[i]), true, true); return 0; } @@ -2708,9 +2683,8 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) __direct_pte_prefetch(vcpu, sp, sptep); } -static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, - int map_writable, int level, gfn_t gfn, pfn_t pfn, - bool prefault) +static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable, + int level, gfn_t gfn, pfn_t pfn, bool prefault) { struct kvm_shadow_walk_iterator iterator; struct kvm_mmu_page *sp; @@ -2722,9 +2696,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { if (iterator.level == level) { - mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, - write, &emulate, level, gfn, pfn, - prefault, map_writable); + emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, + write, level, gfn, pfn, prefault, + map_writable); direct_pte_prefetch(vcpu, iterator.sptep); ++vcpu->stat.pf_fixed; break; @@ -2737,10 +2711,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, base_addr &= PT64_LVL_ADDR_MASK(iterator.level); pseudo_gfn = base_addr >> PAGE_SHIFT; sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, - iterator.level - 1, - 1, ACC_ALL, iterator.sptep); + iterator.level - 1, 1, ACC_ALL); - link_shadow_page(iterator.sptep, sp, true); + link_shadow_page(vcpu, iterator.sptep, sp); } } return emulate; @@ -2919,7 +2892,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, * If the mapping has been changed, let the vcpu fault on the * same address again. */ - if (!is_rmap_spte(spte)) { + if (!is_shadow_present_pte(spte)) { ret = true; goto exit; } @@ -3018,11 +2991,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, make_mmu_pages_available(vcpu); if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); - r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, - prefault); + r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); spin_unlock(&vcpu->kvm->mmu_lock); - return r; out_unlock: @@ -3097,8 +3068,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { spin_lock(&vcpu->kvm->mmu_lock); make_mmu_pages_available(vcpu); - sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, - 1, ACC_ALL, NULL); + sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 1, ACC_ALL); ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); vcpu->arch.mmu.root_hpa = __pa(sp->spt); @@ -3110,9 +3080,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) spin_lock(&vcpu->kvm->mmu_lock); make_mmu_pages_available(vcpu); sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), - i << 30, - PT32_ROOT_LEVEL, 1, ACC_ALL, - NULL); + i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL); root = __pa(sp->spt); ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); @@ -3149,7 +3117,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) spin_lock(&vcpu->kvm->mmu_lock); make_mmu_pages_available(vcpu); sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, - 0, ACC_ALL, NULL); + 0, ACC_ALL); root = __pa(sp->spt); ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); @@ -3182,9 +3150,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) } spin_lock(&vcpu->kvm->mmu_lock); make_mmu_pages_available(vcpu); - sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, - PT32_ROOT_LEVEL, 0, - ACC_ALL, NULL); + sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, + 0, ACC_ALL); root = __pa(sp->spt); ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); @@ -3531,8 +3498,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, make_mmu_pages_available(vcpu); if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); - r = __direct_map(vcpu, gpa, write, map_writable, - level, gfn, pfn, prefault); + r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); spin_unlock(&vcpu->kvm->mmu_lock); return r; @@ -4495,7 +4461,7 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu) } /* The return value indicates if tlb flush on all vcpus is needed. */ -typedef bool (*slot_level_handler) (struct kvm *kvm, unsigned long *rmap); +typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); /* The caller should hold mmu-lock before calling this function. */ static bool @@ -4589,9 +4555,10 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) spin_unlock(&kvm->mmu_lock); } -static bool slot_rmap_write_protect(struct kvm *kvm, unsigned long *rmapp) +static bool slot_rmap_write_protect(struct kvm *kvm, + struct kvm_rmap_head *rmap_head) { - return __rmap_write_protect(kvm, rmapp, false); + return __rmap_write_protect(kvm, rmap_head, false); } void kvm_mmu_slot_remove_write_access(struct kvm *kvm, @@ -4627,7 +4594,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, } static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, - unsigned long *rmapp) + struct kvm_rmap_head *rmap_head) { u64 *sptep; struct rmap_iterator iter; @@ -4636,7 +4603,7 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, struct kvm_mmu_page *sp; restart: - for_each_rmap_spte(rmapp, &iter, sptep) { + for_each_rmap_spte(rmap_head, &iter, sptep) { sp = page_header(__pa(sptep)); pfn = spte_to_pfn(*sptep); diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index 03d518e499a6..1cee3ec20dd2 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -129,7 +129,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) { static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; struct kvm_mmu_page *rev_sp; struct kvm_memslots *slots; struct kvm_memory_slot *slot; @@ -150,8 +150,8 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) return; } - rmapp = __gfn_to_rmap(gfn, rev_sp->role.level, slot); - if (!*rmapp) { + rmap_head = __gfn_to_rmap(gfn, rev_sp->role.level, slot); + if (!rmap_head->val) { if (!__ratelimit(&ratelimit_state)) return; audit_printk(kvm, "no rmap for writable spte %llx\n", @@ -183,7 +183,7 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp) return; for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { - if (!is_rmap_spte(sp->spt[i])) + if (!is_shadow_present_pte(sp->spt[i])) continue; inspect_spte_has_rmap(kvm, sp->spt + i); @@ -192,7 +192,7 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp) static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) { - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; u64 *sptep; struct rmap_iterator iter; struct kvm_memslots *slots; @@ -203,13 +203,14 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, sp->gfn); - rmapp = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot); + rmap_head = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot); - for_each_rmap_spte(rmapp, &iter, sptep) + for_each_rmap_spte(rmap_head, &iter, sptep) { if (is_writable_pte(*sptep)) audit_printk(kvm, "shadow page has writable " "mappings: gfn %llx role %x\n", sp->gfn, sp->role.word); + } } static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 3058a22a658d..91e939b486d1 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -475,8 +475,8 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, * we call mmu_set_spte() with host_writable = true because * pte_prefetch_gfn_to_pfn always gets a writable pfn. */ - mmu_set_spte(vcpu, spte, pte_access, 0, NULL, PT_PAGE_TABLE_LEVEL, - gfn, pfn, true, true); + mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn, + true, true); return true; } @@ -556,7 +556,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, struct kvm_mmu_page *sp = NULL; struct kvm_shadow_walk_iterator it; unsigned direct_access, access = gw->pt_access; - int top_level, emulate = 0; + int top_level, emulate; direct_access = gw->pte_access; @@ -587,7 +587,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, if (!is_shadow_present_pte(*it.sptep)) { table_gfn = gw->table_gfn[it.level - 2]; sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1, - false, access, it.sptep); + false, access); } /* @@ -598,7 +598,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, goto out_gpte_changed; if (sp) - link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK); + link_shadow_page(vcpu, it.sptep, sp); } for (; @@ -617,20 +617,18 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, - true, direct_access, it.sptep); - link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK); + true, direct_access); + link_shadow_page(vcpu, it.sptep, sp); } clear_sp_write_flooding_count(it.sptep); - mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, &emulate, - it.level, gw->gfn, pfn, prefault, map_writable); + emulate = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, + it.level, gw->gfn, pfn, prefault, map_writable); FNAME(pte_prefetch)(vcpu, gw, it.sptep); return emulate; out_gpte_changed: - if (sp) - kvm_mmu_put_page(sp, it.sptep); kvm_release_pfn_clean(pfn); return 0; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 83a1c643f9a5..af342150558a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -86,6 +86,7 @@ static const u32 host_save_user_msrs[] = { MSR_FS_BASE, #endif MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, + MSR_TSC_AUX, }; #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) @@ -135,6 +136,7 @@ struct vcpu_svm { uint64_t asid_generation; uint64_t sysenter_esp; uint64_t sysenter_eip; + uint64_t tsc_aux; u64 next_rip; @@ -1238,6 +1240,9 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio); } } + /* This assumes that the kernel never uses MSR_TSC_AUX */ + if (static_cpu_has(X86_FEATURE_RDTSCP)) + wrmsrl(MSR_TSC_AUX, svm->tsc_aux); } static void svm_vcpu_put(struct kvm_vcpu *vcpu) @@ -3024,6 +3029,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_SYSENTER_ESP: msr_info->data = svm->sysenter_esp; break; + case MSR_TSC_AUX: + if (!boot_cpu_has(X86_FEATURE_RDTSCP)) + return 1; + msr_info->data = svm->tsc_aux; + break; /* * Nobody will change the following 5 values in the VMCB so we can * safely return them on rdmsr. They will always be 0 until LBRV is @@ -3145,6 +3155,18 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm->sysenter_esp = data; svm->vmcb->save.sysenter_esp = data; break; + case MSR_TSC_AUX: + if (!boot_cpu_has(X86_FEATURE_RDTSCP)) + return 1; + + /* + * This is rare, so we update the MSR here instead of using + * direct_access_msrs. Doing that would require a rdmsr in + * svm_vcpu_put. + */ + svm->tsc_aux = data; + wrmsrl(MSR_TSC_AUX, svm->tsc_aux); + break; case MSR_IA32_DEBUGCTLMSR: if (!boot_cpu_has(X86_FEATURE_LBRV)) { vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", @@ -3559,12 +3581,16 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) return; } -static int svm_cpu_uses_apicv(struct kvm_vcpu *vcpu) +static bool svm_get_enable_apicv(void) +{ + return false; +} + +static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) { - return 0; } -static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu) +static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { return; } @@ -4037,7 +4063,7 @@ static int svm_get_lpage_level(void) static bool svm_rdtscp_supported(void) { - return false; + return boot_cpu_has(X86_FEATURE_RDTSCP); } static bool svm_invpcid_supported(void) @@ -4328,7 +4354,8 @@ static struct kvm_x86_ops svm_x86_ops = { .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, - .cpu_uses_apicv = svm_cpu_uses_apicv, + .get_enable_apicv = svm_get_enable_apicv, + .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, .load_eoi_exitmap = svm_load_eoi_exitmap, .sync_pir_to_irr = svm_sync_pir_to_irr, diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 120302511802..ab9ae67a80e4 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -268,7 +268,7 @@ TRACE_EVENT(kvm_inj_virq, #define kvm_trace_sym_exc \ EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \ EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \ - EXS(MF), EXS(MC) + EXS(MF), EXS(AC), EXS(MC) /* * Tracepoint for kvm interrupt injection: diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index af823a388c19..62d958a1ec0f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -19,6 +19,7 @@ #include "irq.h" #include "mmu.h" #include "cpuid.h" +#include "lapic.h" #include <linux/kvm_host.h> #include <linux/module.h> @@ -862,7 +863,6 @@ static void kvm_cpu_vmxon(u64 addr); static void kvm_cpu_vmxoff(void); static bool vmx_mpx_supported(void); static bool vmx_xsaves_supported(void); -static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu); static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); @@ -870,7 +870,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); static bool guest_state_valid(struct kvm_vcpu *vcpu); static u32 vmx_segment_access_rights(struct kvm_segment *var); -static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); static int alloc_identity_pagetable(struct kvm *kvm); @@ -1448,7 +1447,51 @@ static inline void ept_sync_context(u64 eptp) } } -static __always_inline unsigned long vmcs_readl(unsigned long field) +static __always_inline void vmcs_check16(unsigned long field) +{ + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000, + "16-bit accessor invalid for 64-bit field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, + "16-bit accessor invalid for 64-bit high field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, + "16-bit accessor invalid for 32-bit high field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, + "16-bit accessor invalid for natural width field"); +} + +static __always_inline void vmcs_check32(unsigned long field) +{ + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0, + "32-bit accessor invalid for 16-bit field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, + "32-bit accessor invalid for natural width field"); +} + +static __always_inline void vmcs_check64(unsigned long field) +{ + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0, + "64-bit accessor invalid for 16-bit field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, + "64-bit accessor invalid for 64-bit high field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, + "64-bit accessor invalid for 32-bit field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, + "64-bit accessor invalid for natural width field"); +} + +static __always_inline void vmcs_checkl(unsigned long field) +{ + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0, + "Natural width accessor invalid for 16-bit field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000, + "Natural width accessor invalid for 64-bit field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, + "Natural width accessor invalid for 64-bit high field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, + "Natural width accessor invalid for 32-bit field"); +} + +static __always_inline unsigned long __vmcs_readl(unsigned long field) { unsigned long value; @@ -1459,23 +1502,32 @@ static __always_inline unsigned long vmcs_readl(unsigned long field) static __always_inline u16 vmcs_read16(unsigned long field) { - return vmcs_readl(field); + vmcs_check16(field); + return __vmcs_readl(field); } static __always_inline u32 vmcs_read32(unsigned long field) { - return vmcs_readl(field); + vmcs_check32(field); + return __vmcs_readl(field); } static __always_inline u64 vmcs_read64(unsigned long field) { + vmcs_check64(field); #ifdef CONFIG_X86_64 - return vmcs_readl(field); + return __vmcs_readl(field); #else - return vmcs_readl(field) | ((u64)vmcs_readl(field+1) << 32); + return __vmcs_readl(field) | ((u64)__vmcs_readl(field+1) << 32); #endif } +static __always_inline unsigned long vmcs_readl(unsigned long field) +{ + vmcs_checkl(field); + return __vmcs_readl(field); +} + static noinline void vmwrite_error(unsigned long field, unsigned long value) { printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n", @@ -1483,7 +1535,7 @@ static noinline void vmwrite_error(unsigned long field, unsigned long value) dump_stack(); } -static void vmcs_writel(unsigned long field, unsigned long value) +static __always_inline void __vmcs_writel(unsigned long field, unsigned long value) { u8 error; @@ -1493,33 +1545,46 @@ static void vmcs_writel(unsigned long field, unsigned long value) vmwrite_error(field, value); } -static void vmcs_write16(unsigned long field, u16 value) +static __always_inline void vmcs_write16(unsigned long field, u16 value) { - vmcs_writel(field, value); + vmcs_check16(field); + __vmcs_writel(field, value); } -static void vmcs_write32(unsigned long field, u32 value) +static __always_inline void vmcs_write32(unsigned long field, u32 value) { - vmcs_writel(field, value); + vmcs_check32(field); + __vmcs_writel(field, value); } -static void vmcs_write64(unsigned long field, u64 value) +static __always_inline void vmcs_write64(unsigned long field, u64 value) { - vmcs_writel(field, value); + vmcs_check64(field); + __vmcs_writel(field, value); #ifndef CONFIG_X86_64 asm volatile (""); - vmcs_writel(field+1, value >> 32); + __vmcs_writel(field+1, value >> 32); #endif } -static void vmcs_clear_bits(unsigned long field, u32 mask) +static __always_inline void vmcs_writel(unsigned long field, unsigned long value) { - vmcs_writel(field, vmcs_readl(field) & ~mask); + vmcs_checkl(field); + __vmcs_writel(field, value); } -static void vmcs_set_bits(unsigned long field, u32 mask) +static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask) { - vmcs_writel(field, vmcs_readl(field) | mask); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000, + "vmcs_clear_bits does not support 64-bit fields"); + __vmcs_writel(field, __vmcs_readl(field) & ~mask); +} + +static __always_inline void vmcs_set_bits(unsigned long field, u32 mask) +{ + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000, + "vmcs_set_bits does not support 64-bit fields"); + __vmcs_writel(field, __vmcs_readl(field) | mask); } static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val) @@ -2498,7 +2563,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx->nested.nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | PIN_BASED_VMX_PREEMPTION_TIMER; - if (vmx_cpu_uses_apicv(&vmx->vcpu)) + if (kvm_vcpu_apicv_active(&vmx->vcpu)) vmx->nested.nested_vmx_pinbased_ctls_high |= PIN_BASED_POSTED_INTR; @@ -4462,9 +4527,9 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr) msr, MSR_TYPE_W); } -static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu) +static bool vmx_get_enable_apicv(void) { - return enable_apicv && lapic_in_kernel(vcpu); + return enable_apicv; } static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) @@ -4586,11 +4651,6 @@ static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); } -static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu) -{ - return; -} - /* * Set up the vmcs's constant host-state fields, i.e., host-state fields that * will not change in the lifetime of the guest. @@ -4660,11 +4720,18 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) { u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; - if (!vmx_cpu_uses_apicv(&vmx->vcpu)) + if (!kvm_vcpu_apicv_active(&vmx->vcpu)) pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; return pin_based_exec_ctrl; } +static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); +} + static u32 vmx_exec_control(struct vcpu_vmx *vmx) { u32 exec_control = vmcs_config.cpu_based_exec_ctrl; @@ -4703,7 +4770,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; if (!ple_gap) exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; - if (!vmx_cpu_uses_apicv(&vmx->vcpu)) + if (!kvm_vcpu_apicv_active(&vmx->vcpu)) exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; @@ -4767,7 +4834,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write32(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control(vmx)); - if (vmx_cpu_uses_apicv(&vmx->vcpu)) { + if (kvm_vcpu_apicv_active(&vmx->vcpu)) { vmcs_write64(EOI_EXIT_BITMAP0, 0); vmcs_write64(EOI_EXIT_BITMAP1, 0); vmcs_write64(EOI_EXIT_BITMAP2, 0); @@ -4775,7 +4842,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write16(GUEST_INTR_STATUS, 0); - vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); + vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); } @@ -4867,7 +4934,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) seg_setup(VCPU_SREG_CS); vmcs_write16(GUEST_CS_SELECTOR, 0xf000); - vmcs_write32(GUEST_CS_BASE, 0xffff0000); + vmcs_writel(GUEST_CS_BASE, 0xffff0000ul); seg_setup(VCPU_SREG_DS); seg_setup(VCPU_SREG_ES); @@ -4903,7 +4970,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); - vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0); setup_msrs(vmx); @@ -4919,7 +4986,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); - if (vmx_cpu_uses_apicv(vcpu)) + if (kvm_vcpu_apicv_active(vcpu)) memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); if (vmx->vpid != 0) @@ -6203,15 +6270,6 @@ static __init int hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 48; } - if (enable_apicv) - kvm_x86_ops->update_cr8_intercept = NULL; - else { - kvm_x86_ops->hwapic_irr_update = NULL; - kvm_x86_ops->hwapic_isr_update = NULL; - kvm_x86_ops->deliver_posted_interrupt = NULL; - kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; - } - vmx_disable_intercept_for_msr(MSR_FS_BASE, false); vmx_disable_intercept_for_msr(MSR_GS_BASE, false); vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); @@ -7901,7 +7959,7 @@ static void dump_vmcs(void) u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); u32 secondary_exec_control = 0; unsigned long cr4 = vmcs_readl(GUEST_CR4); - u64 efer = vmcs_readl(GUEST_IA32_EFER); + u64 efer = vmcs_read64(GUEST_IA32_EFER); int i, n; if (cpu_has_secondary_exec_ctrls()) @@ -7917,10 +7975,10 @@ static void dump_vmcs(void) if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) && (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA)) { - pr_err("PDPTR0 = 0x%016lx PDPTR1 = 0x%016lx\n", - vmcs_readl(GUEST_PDPTR0), vmcs_readl(GUEST_PDPTR1)); - pr_err("PDPTR2 = 0x%016lx PDPTR3 = 0x%016lx\n", - vmcs_readl(GUEST_PDPTR2), vmcs_readl(GUEST_PDPTR3)); + pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n", + vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1)); + pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n", + vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3)); } pr_err("RSP = 0x%016lx RIP = 0x%016lx\n", vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP)); @@ -7941,16 +7999,16 @@ static void dump_vmcs(void) vmx_dump_sel("TR: ", GUEST_TR_SELECTOR); if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) || (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER))) - pr_err("EFER = 0x%016llx PAT = 0x%016lx\n", - efer, vmcs_readl(GUEST_IA32_PAT)); - pr_err("DebugCtl = 0x%016lx DebugExceptions = 0x%016lx\n", - vmcs_readl(GUEST_IA32_DEBUGCTL), + pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", + efer, vmcs_read64(GUEST_IA32_PAT)); + pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n", + vmcs_read64(GUEST_IA32_DEBUGCTL), vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS)); if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) - pr_err("PerfGlobCtl = 0x%016lx\n", - vmcs_readl(GUEST_IA32_PERF_GLOBAL_CTRL)); + pr_err("PerfGlobCtl = 0x%016llx\n", + vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL)); if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) - pr_err("BndCfgS = 0x%016lx\n", vmcs_readl(GUEST_BNDCFGS)); + pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS)); pr_err("Interruptibility = %08x ActivityState = %08x\n", vmcs_read32(GUEST_INTERRUPTIBILITY_INFO), vmcs_read32(GUEST_ACTIVITY_STATE)); @@ -7979,11 +8037,12 @@ static void dump_vmcs(void) vmcs_read32(HOST_IA32_SYSENTER_CS), vmcs_readl(HOST_IA32_SYSENTER_EIP)); if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER)) - pr_err("EFER = 0x%016lx PAT = 0x%016lx\n", - vmcs_readl(HOST_IA32_EFER), vmcs_readl(HOST_IA32_PAT)); + pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", + vmcs_read64(HOST_IA32_EFER), + vmcs_read64(HOST_IA32_PAT)); if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) - pr_err("PerfGlobCtl = 0x%016lx\n", - vmcs_readl(HOST_IA32_PERF_GLOBAL_CTRL)); + pr_err("PerfGlobCtl = 0x%016llx\n", + vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL)); pr_err("*** Control State ***\n"); pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n", @@ -8006,16 +8065,16 @@ static void dump_vmcs(void) pr_err("IDTVectoring: info=%08x errcode=%08x\n", vmcs_read32(IDT_VECTORING_INFO_FIELD), vmcs_read32(IDT_VECTORING_ERROR_CODE)); - pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET)); + pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET)); if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING) - pr_err("TSC Multiplier = 0x%016lx\n", - vmcs_readl(TSC_MULTIPLIER)); + pr_err("TSC Multiplier = 0x%016llx\n", + vmcs_read64(TSC_MULTIPLIER)); if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV)); if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)) - pr_err("EPT pointer = 0x%016lx\n", vmcs_readl(EPT_POINTER)); + pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER)); n = vmcs_read32(CR3_TARGET_COUNT); for (i = 0; i + 1 < n; i += 4) pr_err("CR3 target%u=%016lx target%u=%016lx\n", @@ -8152,7 +8211,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) * apicv */ if (!cpu_has_vmx_virtualize_x2apic_mode() || - !vmx_cpu_uses_apicv(vcpu)) + !kvm_vcpu_apicv_active(vcpu)) return; if (!cpu_need_tpr_shadow(vcpu)) @@ -8257,10 +8316,9 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) } } -static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu) +static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { - u64 *eoi_exit_bitmap = vcpu->arch.eoi_exit_bitmap; - if (!vmx_cpu_uses_apicv(vcpu)) + if (!kvm_vcpu_apicv_active(vcpu)) return; vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); @@ -9507,7 +9565,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) */ vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; vmx->nested.pi_pending = false; - vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); + vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); vmcs_write64(POSTED_INTR_DESC_ADDR, page_to_phys(vmx->nested.pi_desc_page) + (unsigned long)(vmcs12->posted_intr_desc_addr & @@ -10168,7 +10226,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, * Additionally, restore L2's PDPTR to vmcs12. */ if (enable_ept) { - vmcs12->guest_cr3 = vmcs_read64(GUEST_CR3); + vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3); vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0); vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1); vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2); @@ -10804,7 +10862,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .update_cr8_intercept = update_cr8_intercept, .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, .set_apic_access_page_addr = vmx_set_apic_access_page_addr, - .cpu_uses_apicv = vmx_cpu_uses_apicv, + .get_enable_apicv = vmx_get_enable_apicv, + .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, .load_eoi_exitmap = vmx_load_eoi_exitmap, .hwapic_irr_update = vmx_hwapic_irr_update, .hwapic_isr_update = vmx_hwapic_isr_update, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eed32283d22c..b6102c1eb3b1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -951,7 +951,7 @@ static u32 msrs_to_save[] = { MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, - MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS + MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, }; static unsigned num_msrs_to_save; @@ -966,6 +966,8 @@ static u32 emulated_msrs[] = { HV_X64_MSR_RESET, HV_X64_MSR_VP_INDEX, HV_X64_MSR_VP_RUNTIME, + HV_X64_MSR_SCONTROL, + HV_X64_MSR_STIMER0_CONFIG, HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, MSR_KVM_PV_EOI_EN, @@ -2198,6 +2200,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: + case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: return kvm_hv_set_msr_common(vcpu, msr, data, msr_info->host_initiated); case MSR_IA32_BBL_CR_CTL3: @@ -2402,6 +2405,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: + case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: return kvm_hv_get_msr_common(vcpu, msr_info->index, &msr_info->data); break; @@ -2541,6 +2545,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_HYPERV: case KVM_CAP_HYPERV_VAPIC: case KVM_CAP_HYPERV_SPIN: + case KVM_CAP_HYPERV_SYNIC: case KVM_CAP_PCI_SEGMENT: case KVM_CAP_DEBUGREGS: case KVM_CAP_X86_ROBUST_SINGLESTEP: @@ -2748,7 +2753,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { - kvm_x86_ops->sync_pir_to_irr(vcpu); + if (vcpu->arch.apicv_active) + kvm_x86_ops->sync_pir_to_irr(vcpu); + memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); return 0; @@ -3191,6 +3198,20 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu) return 0; } +static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, + struct kvm_enable_cap *cap) +{ + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_HYPERV_SYNIC: + return kvm_hv_activate_synic(vcpu); + default: + return -EINVAL; + } +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -3455,6 +3476,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_set_guest_paused(vcpu); goto out; } + case KVM_ENABLE_CAP: { + struct kvm_enable_cap cap; + + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + goto out; + r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); + break; + } default: r = -EINVAL; } @@ -4001,16 +4031,17 @@ static void kvm_init_msr_list(void) /* * Even MSRs that are valid in the host may not be exposed - * to the guests in some cases. We could work around this - * in VMX with the generic MSR save/load machinery, but it - * is not really worthwhile since it will really only - * happen with nested virtualization. + * to the guests in some cases. */ switch (msrs_to_save[i]) { case MSR_IA32_BNDCFGS: if (!kvm_x86_ops->mpx_supported()) continue; break; + case MSR_TSC_AUX: + if (!kvm_x86_ops->rdtscp_supported()) + continue; + break; default: break; } @@ -5867,6 +5898,12 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); } +void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) +{ + vcpu->arch.apicv_active = false; + kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu); +} + int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { unsigned long nr, a0, a1, a2, a3, ret; @@ -5960,6 +5997,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) if (!vcpu->arch.apic) return; + if (vcpu->arch.apicv_active) + return; + if (!vcpu->arch.apic->vapic_addr) max_irr = kvm_lapic_find_highest_irr(vcpu); else @@ -6298,18 +6338,23 @@ static void process_smi(struct kvm_vcpu *vcpu) static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { + u64 eoi_exit_bitmap[4]; + if (!kvm_apic_hw_enabled(vcpu->arch.apic)) return; - memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8); + bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256); if (irqchip_split(vcpu->kvm)) - kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap); + kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors); else { - kvm_x86_ops->sync_pir_to_irr(vcpu); - kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap); + if (vcpu->arch.apicv_active) + kvm_x86_ops->sync_pir_to_irr(vcpu); + kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); } - kvm_x86_ops->load_eoi_exitmap(vcpu); + bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors, + vcpu_to_synic(vcpu)->vec_bitmap, 256); + kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); } static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) @@ -6417,7 +6462,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) { BUG_ON(vcpu->arch.pending_ioapic_eoi > 255); if (test_bit(vcpu->arch.pending_ioapic_eoi, - (void *) vcpu->arch.eoi_exit_bitmap)) { + vcpu->arch.ioapic_handled_vectors)) { vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI; vcpu->run->eoi.vector = vcpu->arch.pending_ioapic_eoi; @@ -6441,6 +6486,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 0; goto out; } + if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) { + vcpu->run->exit_reason = KVM_EXIT_HYPERV; + vcpu->run->hyperv = vcpu->arch.hyperv.exit; + r = 0; + goto out; + } + if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu)) + kvm_hv_process_stimers(vcpu); } /* @@ -6452,7 +6505,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * Update architecture specific hints for APIC * virtual interrupt delivery. */ - if (kvm_x86_ops->hwapic_irr_update) + if (vcpu->arch.apicv_active) kvm_x86_ops->hwapic_irr_update(vcpu, kvm_lapic_find_highest_irr(vcpu)); } @@ -7523,6 +7576,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) BUG_ON(vcpu->kvm == NULL); kvm = vcpu->kvm; + vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(); vcpu->arch.pv.pv_unhalted = false; vcpu->arch.emulate_ctxt.ops = &emulate_ops; if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu)) @@ -7580,6 +7634,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.pending_external_vector = -1; + kvm_hv_vcpu_init(vcpu); + return 0; fail_free_mce_banks: @@ -7598,6 +7654,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { int idx; + kvm_hv_vcpu_uninit(vcpu); kvm_pmu_destroy(vcpu); kfree(vcpu->arch.mce_banks); kvm_free_lapic(vcpu); @@ -7992,6 +8049,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) kvm_cpu_has_interrupt(vcpu)) return true; + if (kvm_hv_has_stimer_pending(vcpu)) + return true; + return false; } diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 3782636562a1..678663e2085f 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -63,10 +63,6 @@ enum hv_cpuid_function { /* Define version of the synthetic interrupt controller. */ #define HV_SYNIC_VERSION (1) -/* Define synthetic interrupt controller message constants. */ -#define HV_MESSAGE_SIZE (256) -#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) -#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) #define HV_ANY_VP (0xFFFFFFFF) /* Define synthetic interrupt controller flag constants. */ @@ -74,48 +70,9 @@ enum hv_cpuid_function { #define HV_EVENT_FLAGS_BYTE_COUNT (256) #define HV_EVENT_FLAGS_DWORD_COUNT (256 / sizeof(u32)) -/* Define hypervisor message types. */ -enum hv_message_type { - HVMSG_NONE = 0x00000000, - - /* Memory access messages. */ - HVMSG_UNMAPPED_GPA = 0x80000000, - HVMSG_GPA_INTERCEPT = 0x80000001, - - /* Timer notification messages. */ - HVMSG_TIMER_EXPIRED = 0x80000010, - - /* Error messages. */ - HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, - HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, - HVMSG_UNSUPPORTED_FEATURE = 0x80000022, - - /* Trace buffer complete messages. */ - HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, - - /* Platform-specific processor intercept messages. */ - HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, - HVMSG_X64_MSR_INTERCEPT = 0x80010001, - HVMSG_X64_CPUID_INTERCEPT = 0x80010002, - HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, - HVMSG_X64_APIC_EOI = 0x80010004, - HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 -}; - -#define HV_SYNIC_STIMER_COUNT (4) - /* Define invalid partition identifier. */ #define HV_PARTITION_ID_INVALID ((u64)0x0) -/* Define port identifier type. */ -union hv_port_id { - u32 asu32; - struct { - u32 id:24; - u32 reserved:8; - } u ; -}; - /* Define port type. */ enum hv_port_type { HVPORT_MSG = 1, @@ -163,27 +120,6 @@ struct hv_connection_info { }; }; -/* Define synthetic interrupt controller message flags. */ -union hv_message_flags { - u8 asu8; - struct { - u8 msg_pending:1; - u8 reserved:7; - }; -}; - -/* Define synthetic interrupt controller message header. */ -struct hv_message_header { - enum hv_message_type message_type; - u8 payload_size; - union hv_message_flags message_flags; - u8 reserved[2]; - union { - u64 sender; - union hv_port_id port; - }; -}; - /* * Timer configuration register. */ @@ -200,31 +136,9 @@ union hv_timer_config { }; }; - -/* Define timer message payload structure. */ -struct hv_timer_message_payload { - u32 timer_index; - u32 reserved; - u64 expiration_time; /* When the timer expired */ - u64 delivery_time; /* When the message was delivered */ -}; - -/* Define synthetic interrupt controller message format. */ -struct hv_message { - struct hv_message_header header; - union { - u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; - } u ; -}; - /* Define the number of message buffers associated with each port. */ #define HV_PORT_MESSAGE_BUFFER_COUNT (16) -/* Define the synthetic interrupt message page layout. */ -struct hv_message_page { - struct hv_message sint_message[HV_SYNIC_SINT_COUNT]; -}; - /* Define the synthetic interrupt controller event flags format. */ union hv_synic_event_flags { u8 flags8[HV_EVENT_FLAGS_BYTE_COUNT]; @@ -347,7 +261,7 @@ enum hv_call_code { struct hv_input_post_message { union hv_connection_id connectionid; u32 reserved; - enum hv_message_type message_type; + u32 message_type; u32 payload_size; u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; }; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 7bc6df3100ef..6804354c42bd 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -40,10 +40,14 @@ struct read_info_sccb { u8 fac85; /* 85 */ u8 _pad_86[91 - 86]; /* 86-90 */ u8 flags; /* 91 */ - u8 _pad_92[100 - 92]; /* 92-99 */ + u8 _pad_92[99 - 92]; /* 92-98 */ + u8 hamaxpow; /* 99 */ u32 rnsize2; /* 100-103 */ u64 rnmax2; /* 104-111 */ - u8 _pad_112[120 - 112]; /* 112-119 */ + u8 _pad_112[116 - 112]; /* 112-115 */ + u8 fac116; /* 116 */ + u8 _pad_117[119 - 117]; /* 117-118 */ + u8 fac119; /* 119 */ u16 hcpua; /* 120-121 */ u8 _pad_122[4096 - 122]; /* 122-4095 */ } __packed __aligned(PAGE_SIZE); @@ -108,6 +112,8 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.facilities = sccb->facilities; sclp.has_sprp = !!(sccb->fac84 & 0x02); sclp.has_core_type = !!(sccb->fac84 & 0x01); + sclp.has_esca = !!(sccb->fac116 & 0x08); + sclp.has_hvs = !!(sccb->fac119 & 0x80); if (sccb->fac85 & 0x02) S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; @@ -115,6 +121,11 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.rzm <<= 20; sclp.ibc = sccb->ibc; + if (sccb->hamaxpow && sccb->hamaxpow < 64) + sclp.hamax = (1UL << sccb->hamaxpow) - 1; + else + sclp.hamax = U64_MAX; + if (!sccb->hcpua) { if (MACHINE_IS_VM) sclp.max_cores = 64; @@ -131,6 +142,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) continue; sclp.has_siif = cpue->siif; sclp.has_sigpif = cpue->sigpif; + sclp.has_sief2 = cpue->sief2; break; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c923350ca20a..61c3e6c69f27 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -143,6 +143,8 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_HV_CRASH 27 #define KVM_REQ_IOAPIC_EOI_EXIT 28 #define KVM_REQ_HV_RESET 29 +#define KVM_REQ_HV_EXIT 30 +#define KVM_REQ_HV_STIMER 31 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 @@ -318,6 +320,11 @@ struct kvm_s390_adapter_int { u32 adapter_id; }; +struct kvm_hv_sint { + u32 vcpu; + u32 sint; +}; + struct kvm_kernel_irq_routing_entry { u32 gsi; u32 type; @@ -331,6 +338,7 @@ struct kvm_kernel_irq_routing_entry { } irqchip; struct msi_msg msi; struct kvm_s390_adapter_int adapter; + struct kvm_hv_sint hv_sint; }; struct hlist_node link; }; @@ -439,10 +447,13 @@ struct kvm { /* The guest did something we don't support. */ #define vcpu_unimpl(vcpu, fmt, ...) \ - kvm_pr_unimpl("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) + kvm_pr_unimpl("vcpu%i, guest rIP: 0x%lx " fmt, \ + (vcpu)->vcpu_id, kvm_rip_read(vcpu), ## __VA_ARGS__) #define vcpu_debug(vcpu, fmt, ...) \ kvm_debug("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) +#define vcpu_err(vcpu, fmt, ...) \ + kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) { @@ -465,6 +476,11 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) struct kvm_vcpu *vcpu; int i; + if (id < 0 || id >= KVM_MAX_VCPUS) + return NULL; + vcpu = kvm_get_vcpu(kvm, id); + if (vcpu && vcpu->vcpu_id == id) + return vcpu; kvm_for_each_vcpu(i, vcpu, kvm) if (vcpu->vcpu_id == id) return vcpu; @@ -484,12 +500,12 @@ void vcpu_put(struct kvm_vcpu *vcpu); #ifdef __KVM_HAVE_IOAPIC void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); -void kvm_arch_irq_routing_update(struct kvm *kvm); +void kvm_arch_post_irq_routing_update(struct kvm *kvm); #else static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) { } -static inline void kvm_arch_irq_routing_update(struct kvm *kvm) +static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm) { } #endif @@ -634,7 +650,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); -int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); +bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); @@ -1004,7 +1020,6 @@ struct kvm_stats_debugfs_item { const char *name; int offset; enum kvm_stat_kind kind; - struct dentry *dentry; }; extern struct kvm_stats_debugfs_item debugfs_entries[]; extern struct dentry *kvm_debugfs_dir; @@ -1091,6 +1106,7 @@ static inline void kvm_irq_routing_update(struct kvm *kvm) { } #endif +void kvm_arch_irq_routing_update(struct kvm *kvm); static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h index 00a97bb905db..35e568f04b1e 100644 --- a/include/linux/kvm_para.h +++ b/include/linux/kvm_para.h @@ -4,10 +4,8 @@ #include <uapi/linux/kvm_para.h> -static inline int kvm_para_has_feature(unsigned int feature) +static inline bool kvm_para_has_feature(unsigned int feature) { - if (kvm_arch_para_features() & (1UL << feature)) - return 1; - return 0; + return !!(kvm_arch_para_features() & (1UL << feature)); } #endif /* __LINUX_KVM_PARA_H */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 03f3618612aa..6e32f7599081 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -154,6 +154,20 @@ struct kvm_s390_skeys { __u32 flags; __u32 reserved[9]; }; + +struct kvm_hyperv_exit { +#define KVM_EXIT_HYPERV_SYNIC 1 + __u32 type; + union { + struct { + __u32 msr; + __u64 control; + __u64 evt_page; + __u64 msg_page; + } synic; + } u; +}; + #define KVM_S390_GET_SKEYS_NONE 1 #define KVM_S390_SKEYS_MAX 1048576 @@ -184,6 +198,7 @@ struct kvm_s390_skeys { #define KVM_EXIT_SYSTEM_EVENT 24 #define KVM_EXIT_S390_STSI 25 #define KVM_EXIT_IOAPIC_EOI 26 +#define KVM_EXIT_HYPERV 27 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -338,6 +353,8 @@ struct kvm_run { struct { __u8 vector; } eoi; + /* KVM_EXIT_HYPERV */ + struct kvm_hyperv_exit hyperv; /* Fix the size of the union. */ char padding[256]; }; @@ -831,6 +848,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_GUEST_DEBUG_HW_WPS 120 #define KVM_CAP_SPLIT_IRQCHIP 121 #define KVM_CAP_IOEVENTFD_ANY_LENGTH 122 +#define KVM_CAP_HYPERV_SYNIC 123 #ifdef KVM_CAP_IRQ_ROUTING @@ -854,10 +872,16 @@ struct kvm_irq_routing_s390_adapter { __u32 adapter_id; }; +struct kvm_irq_routing_hv_sint { + __u32 vcpu; + __u32 sint; +}; + /* gsi routing entry types */ #define KVM_IRQ_ROUTING_IRQCHIP 1 #define KVM_IRQ_ROUTING_MSI 2 #define KVM_IRQ_ROUTING_S390_ADAPTER 3 +#define KVM_IRQ_ROUTING_HV_SINT 4 struct kvm_irq_routing_entry { __u32 gsi; @@ -868,6 +892,7 @@ struct kvm_irq_routing_entry { struct kvm_irq_routing_irqchip irqchip; struct kvm_irq_routing_msi msi; struct kvm_irq_routing_s390_adapter adapter; + struct kvm_irq_routing_hv_sint hv_sint; __u32 pad[8]; } u; }; diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 77d42be6970e..353159922456 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -57,8 +57,7 @@ int kvm_async_pf_init(void) void kvm_async_pf_deinit(void) { - if (async_pf_cache) - kmem_cache_destroy(async_pf_cache); + kmem_cache_destroy(async_pf_cache); async_pf_cache = NULL; } diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index f0b08a2a48ba..fe84e1a95dd5 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -166,6 +166,10 @@ out: return r; } +void __attribute__((weak)) kvm_arch_irq_routing_update(struct kvm *kvm) +{ +} + int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *ue, unsigned nr, @@ -219,9 +223,10 @@ int kvm_set_irq_routing(struct kvm *kvm, old = kvm->irq_routing; rcu_assign_pointer(kvm->irq_routing, new); kvm_irq_routing_update(kvm); + kvm_arch_irq_routing_update(kvm); mutex_unlock(&kvm->irq_lock); - kvm_arch_irq_routing_update(kvm); + kvm_arch_post_irq_routing_update(kvm); synchronize_srcu_expedited(&kvm->irq_srcu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 484079efea5b..be3cef12706c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1164,15 +1164,15 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn); } -int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) +bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) { struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn); if (!memslot || memslot->id >= KVM_USER_MEM_SLOTS || memslot->flags & KVM_MEMSLOT_INVALID) - return 0; + return false; - return 1; + return true; } EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); @@ -2257,7 +2257,7 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu) static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) { int r; - struct kvm_vcpu *vcpu, *v; + struct kvm_vcpu *vcpu; if (id >= KVM_MAX_VCPUS) return -EINVAL; @@ -2281,12 +2281,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) r = -EINVAL; goto unlock_vcpu_destroy; } - - kvm_for_each_vcpu(r, v, kvm) - if (v->vcpu_id == id) { - r = -EEXIST; - goto unlock_vcpu_destroy; - } + if (kvm_get_vcpu_by_id(kvm, id)) { + r = -EEXIST; + goto unlock_vcpu_destroy; + } BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]); @@ -3449,10 +3447,9 @@ static int kvm_init_debug(void) goto out; for (p = debugfs_entries; p->name; ++p) { - p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, - (void *)(long)p->offset, - stat_fops[p->kind]); - if (p->dentry == NULL) + if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir, + (void *)(long)p->offset, + stat_fops[p->kind])) goto out_dir; } @@ -3464,15 +3461,6 @@ out: return r; } -static void kvm_exit_debug(void) -{ - struct kvm_stats_debugfs_item *p; - - for (p = debugfs_entries; p->name; ++p) - debugfs_remove(p->dentry); - debugfs_remove(kvm_debugfs_dir); -} - static int kvm_suspend(void) { if (kvm_usage_count) @@ -3630,7 +3618,7 @@ EXPORT_SYMBOL_GPL(kvm_init); void kvm_exit(void) { - kvm_exit_debug(); + debugfs_remove_recursive(kvm_debugfs_dir); misc_deregister(&kvm_dev); kmem_cache_destroy(kvm_vcpu_cache); kvm_async_pf_deinit(); |