From 44900ba65e16ab3c6608e105654f38f54d030caa Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 13 Dec 2017 12:58:02 +0100 Subject: KVM: VMX: optimize shadow VMCS copying MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because all fields can be read/written with a single vmread/vmwrite on 64-bit kernels, the switch statements in copy_vmcs12_to_shadow and copy_shadow_to_vmcs12 are unnecessary. What I did in this patch is to copy the two parts of 64-bit fields separately on 32-bit kernels, to keep all complicated #ifdef-ery in init_vmcs_shadow_fields. The disadvantage is that 64-bit fields have to be listed separately in shadow_read_only/read_write_fields, but those are few and we can validate the arrays when building the VMREAD and VMWRITE bitmaps. This saves a few hundred clock cycles per nested vmexit. However there is still a "switch" in vmcs_read_any and vmcs_write_any. So, while at it, this patch reorders the fields by type, hoping that the branch predictor appreciates it. Cc: Jim Mattson Cc: Wanpeng Li Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 143 ++++++++++++++++++++++++----------------------------- 1 file changed, 65 insertions(+), 78 deletions(-) (limited to 'arch/x86/kvm/vmx.c') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7b114a293c66..bdd3f155efae 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -686,7 +686,7 @@ static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) [number##_HIGH] = VMCS12_OFFSET(name)+4 -static unsigned long shadow_read_only_fields[] = { +static u16 shadow_read_only_fields[] = { /* * We do NOT shadow fields that are modified when L0 * traps and emulates any vmx instruction (e.g. VMPTRLD, @@ -699,49 +699,59 @@ static unsigned long shadow_read_only_fields[] = { * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified * by nested_vmx_failValid) */ + /* 32-bits */ VM_EXIT_REASON, VM_EXIT_INTR_INFO, VM_EXIT_INSTRUCTION_LEN, IDT_VECTORING_INFO_FIELD, IDT_VECTORING_ERROR_CODE, VM_EXIT_INTR_ERROR_CODE, + + /* Natural width */ EXIT_QUALIFICATION, GUEST_LINEAR_ADDRESS, - GUEST_PHYSICAL_ADDRESS + + /* 64-bit */ + GUEST_PHYSICAL_ADDRESS, + GUEST_PHYSICAL_ADDRESS_HIGH, }; static int max_shadow_read_only_fields = ARRAY_SIZE(shadow_read_only_fields); -static unsigned long shadow_read_write_fields[] = { +static u16 shadow_read_write_fields[] = { + /* 16-bits */ + GUEST_CS_SELECTOR, + GUEST_INTR_STATUS, + GUEST_PML_INDEX, + HOST_FS_SELECTOR, + HOST_GS_SELECTOR, + + /* 32-bits */ + CPU_BASED_VM_EXEC_CONTROL, + EXCEPTION_BITMAP, + VM_ENTRY_EXCEPTION_ERROR_CODE, + VM_ENTRY_INTR_INFO_FIELD, + VM_ENTRY_INSTRUCTION_LEN, TPR_THRESHOLD, + GUEST_CS_LIMIT, + GUEST_CS_AR_BYTES, + GUEST_INTERRUPTIBILITY_INFO, + VMX_PREEMPTION_TIMER_VALUE, + + /* Natural width */ GUEST_RIP, GUEST_RSP, GUEST_CR0, GUEST_CR3, GUEST_CR4, - GUEST_INTERRUPTIBILITY_INFO, GUEST_RFLAGS, - GUEST_CS_SELECTOR, - GUEST_CS_AR_BYTES, - GUEST_CS_LIMIT, GUEST_CS_BASE, GUEST_ES_BASE, - GUEST_PML_INDEX, - GUEST_INTR_STATUS, - VMX_PREEMPTION_TIMER_VALUE, CR0_GUEST_HOST_MASK, CR0_READ_SHADOW, CR4_READ_SHADOW, - EXCEPTION_BITMAP, - CPU_BASED_VM_EXEC_CONTROL, - VM_ENTRY_EXCEPTION_ERROR_CODE, - VM_ENTRY_INTR_INFO_FIELD, - VM_ENTRY_INSTRUCTION_LEN, - VM_ENTRY_EXCEPTION_ERROR_CODE, HOST_FS_BASE, HOST_GS_BASE, - HOST_FS_SELECTOR, - HOST_GS_SELECTOR }; static int max_shadow_read_write_fields = ARRAY_SIZE(shadow_read_write_fields); @@ -3901,15 +3911,39 @@ static void init_vmcs_shadow_fields(void) { int i, j; - /* No checks for read only fields yet */ + for (i = j = 0; i < max_shadow_read_only_fields; i++) { + u16 field = shadow_read_only_fields[i]; + if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64 && + (i + 1 == max_shadow_read_only_fields || + shadow_read_only_fields[i + 1] != field + 1)) + pr_err("Missing field from shadow_read_only_field %x\n", + field + 1); + + clear_bit(field, vmx_vmread_bitmap); +#ifdef CONFIG_X86_64 + if (field & 1) + continue; +#endif + if (j < i) + shadow_read_only_fields[j] = field; + j++; + } + max_shadow_read_only_fields = j; for (i = j = 0; i < max_shadow_read_write_fields; i++) { + u16 field = shadow_read_write_fields[i]; + if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64 && + (i + 1 == max_shadow_read_write_fields || + shadow_read_write_fields[i + 1] != field + 1)) + pr_err("Missing field from shadow_read_write_field %x\n", + field + 1); + /* * PML and the preemption timer can be emulated, but the * processor cannot vmwrite to fields that don't exist * on bare metal. */ - switch (shadow_read_write_fields[i]) { + switch (field) { case GUEST_PML_INDEX: if (!cpu_has_vmx_pml()) continue; @@ -3926,31 +3960,17 @@ static void init_vmcs_shadow_fields(void) break; } + clear_bit(field, vmx_vmwrite_bitmap); + clear_bit(field, vmx_vmread_bitmap); +#ifdef CONFIG_X86_64 + if (field & 1) + continue; +#endif if (j < i) - shadow_read_write_fields[j] = - shadow_read_write_fields[i]; + shadow_read_write_fields[j] = field; j++; } max_shadow_read_write_fields = j; - - /* shadowed fields guest access without vmexit */ - for (i = 0; i < max_shadow_read_write_fields; i++) { - unsigned long field = shadow_read_write_fields[i]; - - clear_bit(field, vmx_vmwrite_bitmap); - clear_bit(field, vmx_vmread_bitmap); - if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) { - clear_bit(field + 1, vmx_vmwrite_bitmap); - clear_bit(field + 1, vmx_vmread_bitmap); - } - } - for (i = 0; i < max_shadow_read_only_fields; i++) { - unsigned long field = shadow_read_only_fields[i]; - - clear_bit(field, vmx_vmread_bitmap); - if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) - clear_bit(field + 1, vmx_vmread_bitmap); - } } static __init int alloc_kvm_area(void) @@ -7538,7 +7558,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) unsigned long field; u64 field_value; struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; - const unsigned long *fields = shadow_read_write_fields; + const u16 *fields = shadow_read_write_fields; const int num_fields = max_shadow_read_write_fields; preempt_disable(); @@ -7547,23 +7567,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) for (i = 0; i < num_fields; i++) { field = fields[i]; - switch (vmcs_field_type(field)) { - case VMCS_FIELD_TYPE_U16: - field_value = vmcs_read16(field); - break; - case VMCS_FIELD_TYPE_U32: - field_value = vmcs_read32(field); - break; - case VMCS_FIELD_TYPE_U64: - field_value = vmcs_read64(field); - break; - case VMCS_FIELD_TYPE_NATURAL_WIDTH: - field_value = vmcs_readl(field); - break; - default: - WARN_ON(1); - continue; - } + field_value = __vmcs_readl(field); vmcs12_write_any(&vmx->vcpu, field, field_value); } @@ -7575,7 +7579,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) { - const unsigned long *fields[] = { + const u16 *fields[] = { shadow_read_write_fields, shadow_read_only_fields }; @@ -7594,24 +7598,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) for (i = 0; i < max_fields[q]; i++) { field = fields[q][i]; vmcs12_read_any(&vmx->vcpu, field, &field_value); - - switch (vmcs_field_type(field)) { - case VMCS_FIELD_TYPE_U16: - vmcs_write16(field, (u16)field_value); - break; - case VMCS_FIELD_TYPE_U32: - vmcs_write32(field, (u32)field_value); - break; - case VMCS_FIELD_TYPE_U64: - vmcs_write64(field, (u64)field_value); - break; - case VMCS_FIELD_TYPE_NATURAL_WIDTH: - vmcs_writel(field, (long)field_value); - break; - default: - WARN_ON(1); - break; - } + __vmcs_writel(field, field_value); } } -- cgit