diff options
| author | Paul Mackerras <paulus@ozlabs.org> | 2018-01-30 10:51:32 +1100 | 
|---|---|---|
| committer | Paul Mackerras <paulus@ozlabs.org> | 2018-02-01 13:35:33 +1100 | 
| commit | 36ee41d161c67a6fcf696d4817a0da31f778938c (patch) | |
| tree | 790868f34e9c7ec17b6d53d17c61aa27d112586a | |
| parent | 9b9b13a6d1537ddc4caccd6f1c41b78edbc08437 (diff) | |
KVM: PPC: Book3S HV: Drop locks before reading guest memory
Running with CONFIG_DEBUG_ATOMIC_SLEEP reveals that HV KVM tries to
read guest memory, in order to emulate guest instructions, while
preempt is disabled and a vcore lock is held.  This occurs in
kvmppc_handle_exit_hv(), called from post_guest_process(), when
emulating guest doorbell instructions on POWER9 systems, and also
when checking whether we have hit a hypervisor breakpoint.
Reading guest memory can cause a page fault and thus cause the
task to sleep, so we need to avoid reading guest memory while
holding a spinlock or when preempt is disabled.
To fix this, we move the preempt_enable() in kvmppc_run_core() to
before the loop that calls post_guest_process() for each vcore that
has just run, and we drop and re-take the vcore lock around the calls
to kvmppc_emulate_debug_inst() and kvmppc_emulate_doorbell_instr().
Dropping the lock is safe with respect to the iteration over the
runnable vcpus in post_guest_process(); for_each_runnable_thread
is actually safe to use locklessly.  It is possible for a vcpu
to become runnable and add itself to the runnable_threads array
(code near the beginning of kvmppc_run_vcpu()) and then get included
in the iteration in post_guest_process despite the fact that it
has not just run.  This is benign because vcpu->arch.trap and
vcpu->arch.ceded will be zero.
Cc: stable@vger.kernel.org # v4.13+
Fixes: 579006944e0d ("KVM: PPC: Book3S HV: Virtualize doorbell facility on POWER9")
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
| -rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 16 | 
1 files changed, 12 insertions, 4 deletions
| diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e5f81fc108e0..aa6130b56b5e 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1008,8 +1008,6 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)  	struct kvm *kvm = vcpu->kvm;  	struct kvm_vcpu *tvcpu; -	if (!cpu_has_feature(CPU_FTR_ARCH_300)) -		return EMULATE_FAIL;  	if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE)  		return RESUME_GUEST;  	if (get_op(inst) != 31) @@ -1059,6 +1057,7 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)  	return RESUME_GUEST;  } +/* Called with vcpu->arch.vcore->lock held */  static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,  				 struct task_struct *tsk)  { @@ -1179,7 +1178,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,  				swab32(vcpu->arch.emul_inst) :  				vcpu->arch.emul_inst;  		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { +			/* Need vcore unlocked to call kvmppc_get_last_inst */ +			spin_unlock(&vcpu->arch.vcore->lock);  			r = kvmppc_emulate_debug_inst(run, vcpu); +			spin_lock(&vcpu->arch.vcore->lock);  		} else {  			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);  			r = RESUME_GUEST; @@ -1194,8 +1196,13 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,  	 */  	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:  		r = EMULATE_FAIL; -		if ((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) +		if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) && +		    cpu_has_feature(CPU_FTR_ARCH_300)) { +			/* Need vcore unlocked to call kvmppc_get_last_inst */ +			spin_unlock(&vcpu->arch.vcore->lock);  			r = kvmppc_emulate_doorbell_instr(vcpu); +			spin_lock(&vcpu->arch.vcore->lock); +		}  		if (r == EMULATE_FAIL) {  			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);  			r = RESUME_GUEST; @@ -2946,13 +2953,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)  	/* make sure updates to secondary vcpu structs are visible now */  	smp_mb(); +	preempt_enable(); +  	for (sub = 0; sub < core_info.n_subcores; ++sub) {  		pvc = core_info.vc[sub];  		post_guest_process(pvc, pvc == vc);  	}  	spin_lock(&vc->lock); -	preempt_enable();   out:  	vc->vcore_state = VCORE_INACTIVE; | 
