diff options
| -rw-r--r-- | arch/x86/include/asm/kvm_host.h | 4 | ||||
| -rw-r--r-- | arch/x86/kvm/mmu.c | 61 | ||||
| -rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 25 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx.c | 19 | 
4 files changed, 95 insertions, 14 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f5df0a84e51c..c0efd16bdfa1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -286,6 +286,7 @@ struct kvm_mmu {  	u64 *pae_root;  	u64 *lm_root;  	u64 rsvd_bits_mask[2][4]; +	u64 bad_mt_xwr;  	/*  	 * Bitmap: bit set = last pte in walk @@ -512,6 +513,9 @@ struct kvm_vcpu_arch {  	 * instruction.  	 */  	bool write_fault_to_shadow_pgtable; + +	/* set at EPT violation at this point */ +	unsigned long exit_qualification;  };  struct kvm_lpage_info { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 75d843bb4ca3..a215c41b5176 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3519,6 +3519,8 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,  	int maxphyaddr = cpuid_maxphyaddr(vcpu);  	u64 exb_bit_rsvd = 0; +	context->bad_mt_xwr = 0; +  	if (!context->nx)  		exb_bit_rsvd = rsvd_bits(63, 63);  	switch (context->root_level) { @@ -3574,7 +3576,40 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,  	}  } -static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) +static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, +		struct kvm_mmu *context, bool execonly) +{ +	int maxphyaddr = cpuid_maxphyaddr(vcpu); +	int pte; + +	context->rsvd_bits_mask[0][3] = +		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); +	context->rsvd_bits_mask[0][2] = +		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6); +	context->rsvd_bits_mask[0][1] = +		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6); +	context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51); + +	/* large page */ +	context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; +	context->rsvd_bits_mask[1][2] = +		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29); +	context->rsvd_bits_mask[1][1] = +		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20); +	context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; + +	for (pte = 0; pte < 64; pte++) { +		int rwx_bits = pte & 7; +		int mt = pte >> 3; +		if (mt == 0x2 || mt == 0x3 || mt == 0x7 || +				rwx_bits == 0x2 || rwx_bits == 0x6 || +				(rwx_bits == 0x4 && !execonly)) +			context->bad_mt_xwr |= (1ull << pte); +	} +} + +static void update_permission_bitmask(struct kvm_vcpu *vcpu, +		struct kvm_mmu *mmu, bool ept)  {  	unsigned bit, byte, pfec;  	u8 map; @@ -3592,12 +3627,16 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu  			w = bit & ACC_WRITE_MASK;  			u = bit & ACC_USER_MASK; -			/* Not really needed: !nx will cause pte.nx to fault */ -			x |= !mmu->nx; -			/* Allow supervisor writes if !cr0.wp */ -			w |= !is_write_protection(vcpu) && !uf; -			/* Disallow supervisor fetches of user code if cr4.smep */ -			x &= !(smep && u && !uf); +			if (!ept) { +				/* Not really needed: !nx will cause pte.nx to fault */ +				x |= !mmu->nx; +				/* Allow supervisor writes if !cr0.wp */ +				w |= !is_write_protection(vcpu) && !uf; +				/* Disallow supervisor fetches of user code if cr4.smep */ +				x &= !(smep && u && !uf); +			} else +				/* Not really needed: no U/S accesses on ept  */ +				u = 1;  			fault = (ff && !x) || (uf && !u) || (wf && !w);  			map |= fault << bit; @@ -3632,7 +3671,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,  	context->root_level = level;  	reset_rsvds_bits_mask(vcpu, context); -	update_permission_bitmask(vcpu, context); +	update_permission_bitmask(vcpu, context, false);  	update_last_pte_bitmap(vcpu, context);  	ASSERT(is_pae(vcpu)); @@ -3662,7 +3701,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,  	context->root_level = PT32_ROOT_LEVEL;  	reset_rsvds_bits_mask(vcpu, context); -	update_permission_bitmask(vcpu, context); +	update_permission_bitmask(vcpu, context, false);  	update_last_pte_bitmap(vcpu, context);  	context->new_cr3 = paging_new_cr3; @@ -3724,7 +3763,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)  		context->gva_to_gpa = paging32_gva_to_gpa;  	} -	update_permission_bitmask(vcpu, context); +	update_permission_bitmask(vcpu, context, false);  	update_last_pte_bitmap(vcpu, context);  	return 0; @@ -3803,7 +3842,7 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)  		g_context->gva_to_gpa = paging32_gva_to_gpa_nested;  	} -	update_permission_bitmask(vcpu, g_context); +	update_permission_bitmask(vcpu, g_context, false);  	update_last_pte_bitmap(vcpu, g_context);  	return 0; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 50b8679d4dd8..043330159179 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -129,10 +129,10 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)  static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level)  { -	int bit7; +	int bit7 = (gpte >> 7) & 1, low6 = gpte & 0x3f; -	bit7 = (gpte >> 7) & 1; -	return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0; +	return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) | +		((mmu->bad_mt_xwr & (1ull << low6)) != 0);  }  static inline int FNAME(is_present_gpte)(unsigned long pte) @@ -386,6 +386,25 @@ error:  	walker->fault.vector = PF_VECTOR;  	walker->fault.error_code_valid = true;  	walker->fault.error_code = errcode; + +#if PTTYPE == PTTYPE_EPT +	/* +	 * Use PFERR_RSVD_MASK in error_code to to tell if EPT +	 * misconfiguration requires to be injected. The detection is +	 * done by is_rsvd_bits_set() above. +	 * +	 * We set up the value of exit_qualification to inject: +	 * [2:0] - Derive from [2:0] of real exit_qualification at EPT violation +	 * [5:3] - Calculated by the page walk of the guest EPT page tables +	 * [7:8] - Derived from [7:8] of real exit_qualification +	 * +	 * The other bits are set to 0. +	 */ +	if (!(errcode & PFERR_RSVD_MASK)) { +		vcpu->arch.exit_qualification &= 0x187; +		vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3; +	} +#endif  	walker->fault.address = addr;  	walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e3c8e28aeb35..0d18ed31671c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5317,9 +5317,13 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)  	/* It is a write fault? */  	error_code = exit_qualification & (1U << 1); +	/* It is a fetch fault? */ +	error_code |= (exit_qualification & (1U << 2)) << 2;  	/* ept page table is present? */  	error_code |= (exit_qualification >> 3) & 0x1; +	vcpu->arch.exit_qualification = exit_qualification; +  	return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);  } @@ -7348,6 +7352,21 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)  		entry->ecx |= bit(X86_FEATURE_VMX);  } +static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, +		struct x86_exception *fault) +{ +	struct vmcs12 *vmcs12; +	nested_vmx_vmexit(vcpu); +	vmcs12 = get_vmcs12(vcpu); + +	if (fault->error_code & PFERR_RSVD_MASK) +		vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; +	else +		vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION; +	vmcs12->exit_qualification = vcpu->arch.exit_qualification; +	vmcs12->guest_physical_address = fault->address; +} +  /*   * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested   * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it  | 
