summaryrefslogtreecommitdiff
path: root/arch/powerpc/kvm/book3s_hv.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kvm/book3s_hv.c')
-rw-r--r--arch/powerpc/kvm/book3s_hv.c533
1 files changed, 373 insertions, 160 deletions
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2fd5580c8f6e..3686471be32b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -53,11 +53,15 @@
#include <asm/smp.h>
#include <asm/dbell.h>
#include <asm/hmi.h>
+#include <asm/pnv-pci.h>
#include <linux/gfp.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/hugetlb.h>
+#include <linux/kvm_irqfd.h>
+#include <linux/irqbypass.h>
#include <linux/module.h>
+#include <linux/compiler.h>
#include "book3s.h"
@@ -70,6 +74,8 @@
/* Used to indicate that a guest page fault needs to be handled */
#define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1)
+/* Used to indicate that a guest passthrough interrupt needs to be handled */
+#define RESUME_PASSTHROUGH (RESUME_GUEST | RESUME_FLAG_ARCH2)
/* Used as a "null" value for timebase values */
#define TB_NIL (~(u64)0)
@@ -89,14 +95,55 @@ static struct kernel_param_ops module_param_ops = {
.get = param_get_int,
};
+module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass,
+ S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization");
+
module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
#endif
+/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */
+static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT;
+module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns");
+
+/* Factor by which the vcore halt poll interval is grown, default is to double
+ */
+static unsigned int halt_poll_ns_grow = 2;
+module_param(halt_poll_ns_grow, int, S_IRUGO);
+MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by");
+
+/* Factor by which the vcore halt poll interval is shrunk, default is to reset
+ */
+static unsigned int halt_poll_ns_shrink;
+module_param(halt_poll_ns_shrink, int, S_IRUGO);
+MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by");
+
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
+static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
+ int *ip)
+{
+ int i = *ip;
+ struct kvm_vcpu *vcpu;
+
+ while (++i < MAX_SMT_THREADS) {
+ vcpu = READ_ONCE(vc->runnable_threads[i]);
+ if (vcpu) {
+ *ip = i;
+ return vcpu;
+ }
+ }
+ return NULL;
+}
+
+/* Used to traverse the list of runnable threads for a given vcore */
+#define for_each_runnable_thread(i, vcpu, vc) \
+ for (i = -1; (vcpu = next_runnable_thread(vc, &i)); )
+
static bool kvmppc_ipi_thread(int cpu)
{
/* On POWER8 for IPIs to threads in the same core, use msgsnd */
@@ -991,6 +1038,9 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
break;
+ case BOOK3S_INTERRUPT_HV_RM_HARD:
+ r = RESUME_PASSTHROUGH;
+ break;
default:
kvmppc_dump_regs(vcpu);
printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
@@ -1149,6 +1199,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_DPDES:
*val = get_reg_val(id, vcpu->arch.vcore->dpdes);
break;
+ case KVM_REG_PPC_VTB:
+ *val = get_reg_val(id, vcpu->arch.vcore->vtb);
+ break;
case KVM_REG_PPC_DAWR:
*val = get_reg_val(id, vcpu->arch.dawr);
break;
@@ -1341,6 +1394,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_DPDES:
vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
break;
+ case KVM_REG_PPC_VTB:
+ vcpu->arch.vcore->vtb = set_reg_val(id, *val);
+ break;
case KVM_REG_PPC_DAWR:
vcpu->arch.dawr = set_reg_val(id, *val);
break;
@@ -1493,7 +1549,6 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
if (vcore == NULL)
return NULL;
- INIT_LIST_HEAD(&vcore->runnable_threads);
spin_lock_init(&vcore->lock);
spin_lock_init(&vcore->stoltb_lock);
init_swait_queue_head(&vcore->wq);
@@ -1802,7 +1857,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
spin_unlock_irq(&vcpu->arch.tbacct_lock);
--vc->n_runnable;
- list_del(&vcpu->arch.run_list);
+ WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], NULL);
}
static int kvmppc_grab_hwthread(int cpu)
@@ -2048,66 +2103,6 @@ static void init_master_vcore(struct kvmppc_vcore *vc)
vc->conferring_threads = 0;
}
-/*
- * See if the existing subcores can be split into 3 (or fewer) subcores
- * of at most two threads each, so we can fit in another vcore. This
- * assumes there are at most two subcores and at most 6 threads in total.
- */
-static bool can_split_piggybacked_subcores(struct core_info *cip)
-{
- int sub, new_sub;
- int large_sub = -1;
- int thr;
- int n_subcores = cip->n_subcores;
- struct kvmppc_vcore *vc, *vcnext;
- struct kvmppc_vcore *master_vc = NULL;
-
- for (sub = 0; sub < cip->n_subcores; ++sub) {
- if (cip->subcore_threads[sub] <= 2)
- continue;
- if (large_sub >= 0)
- return false;
- large_sub = sub;
- vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
- preempt_list);
- if (vc->num_threads > 2)
- return false;
- n_subcores += (cip->subcore_threads[sub] - 1) >> 1;
- }
- if (large_sub < 0 || !subcore_config_ok(n_subcores + 1, 2))
- return false;
-
- /*
- * Seems feasible, so go through and move vcores to new subcores.
- * Note that when we have two or more vcores in one subcore,
- * all those vcores must have only one thread each.
- */
- new_sub = cip->n_subcores;
- thr = 0;
- sub = large_sub;
- list_for_each_entry_safe(vc, vcnext, &cip->vcs[sub], preempt_list) {
- if (thr >= 2) {
- list_del(&vc->preempt_list);
- list_add_tail(&vc->preempt_list, &cip->vcs[new_sub]);
- /* vc->num_threads must be 1 */
- if (++cip->subcore_threads[new_sub] == 1) {
- cip->subcore_vm[new_sub] = vc->kvm;
- init_master_vcore(vc);
- master_vc = vc;
- ++cip->n_subcores;
- } else {
- vc->master_vcore = master_vc;
- ++new_sub;
- }
- }
- thr += vc->num_threads;
- }
- cip->subcore_threads[large_sub] = 2;
- cip->max_subcore_threads = 2;
-
- return true;
-}
-
static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
{
int n_threads = vc->num_threads;
@@ -2118,23 +2113,9 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
if (n_threads < cip->max_subcore_threads)
n_threads = cip->max_subcore_threads;
- if (subcore_config_ok(cip->n_subcores + 1, n_threads)) {
- cip->max_subcore_threads = n_threads;
- } else if (cip->n_subcores <= 2 && cip->total_threads <= 6 &&
- vc->num_threads <= 2) {
- /*
- * We may be able to fit another subcore in by
- * splitting an existing subcore with 3 or 4
- * threads into two 2-thread subcores, or one
- * with 5 or 6 threads into three subcores.
- * We can only do this if those subcores have
- * piggybacked virtual cores.
- */
- if (!can_split_piggybacked_subcores(cip))
- return false;
- } else {
+ if (!subcore_config_ok(cip->n_subcores + 1, n_threads))
return false;
- }
+ cip->max_subcore_threads = n_threads;
sub = cip->n_subcores;
++cip->n_subcores;
@@ -2148,43 +2129,6 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
return true;
}
-static bool can_piggyback_subcore(struct kvmppc_vcore *pvc,
- struct core_info *cip, int sub)
-{
- struct kvmppc_vcore *vc;
- int n_thr;
-
- vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
- preempt_list);
-
- /* require same VM and same per-core reg values */
- if (pvc->kvm != vc->kvm ||
- pvc->tb_offset != vc->tb_offset ||
- pvc->pcr != vc->pcr ||
- pvc->lpcr != vc->lpcr)
- return false;
-
- /* P8 guest with > 1 thread per core would see wrong TIR value */
- if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
- (vc->num_threads > 1 || pvc->num_threads > 1))
- return false;
-
- n_thr = cip->subcore_threads[sub] + pvc->num_threads;
- if (n_thr > cip->max_subcore_threads) {
- if (!subcore_config_ok(cip->n_subcores, n_thr))
- return false;
- cip->max_subcore_threads = n_thr;
- }
-
- cip->total_threads += pvc->num_threads;
- cip->subcore_threads[sub] = n_thr;
- pvc->master_vcore = vc;
- list_del(&pvc->preempt_list);
- list_add_tail(&pvc->preempt_list, &cip->vcs[sub]);
-
- return true;
-}
-
/*
* Work out whether it is possible to piggyback the execution of
* vcore *pvc onto the execution of the other vcores described in *cip.
@@ -2192,27 +2136,18 @@ static bool can_piggyback_subcore(struct kvmppc_vcore *pvc,
static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
int target_threads)
{
- int sub;
-
if (cip->total_threads + pvc->num_threads > target_threads)
return false;
- for (sub = 0; sub < cip->n_subcores; ++sub)
- if (cip->subcore_threads[sub] &&
- can_piggyback_subcore(pvc, cip, sub))
- return true;
-
- if (can_dynamic_split(pvc, cip))
- return true;
- return false;
+ return can_dynamic_split(pvc, cip);
}
static void prepare_threads(struct kvmppc_vcore *vc)
{
- struct kvm_vcpu *vcpu, *vnext;
+ int i;
+ struct kvm_vcpu *vcpu;
- list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
- arch.run_list) {
+ for_each_runnable_thread(i, vcpu, vc) {
if (signal_pending(vcpu->arch.run_task))
vcpu->arch.ret = -EINTR;
else if (vcpu->arch.vpa.update_pending ||
@@ -2259,15 +2194,14 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
{
- int still_running = 0;
+ int still_running = 0, i;
u64 now;
long ret;
- struct kvm_vcpu *vcpu, *vnext;
+ struct kvm_vcpu *vcpu;
spin_lock(&vc->lock);
now = get_tb();
- list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
- arch.run_list) {
+ for_each_runnable_thread(i, vcpu, vc) {
/* cancel pending dec exception if dec is positive */
if (now < vcpu->arch.dec_expires &&
kvmppc_core_pending_dec(vcpu))
@@ -2307,8 +2241,8 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
}
if (vc->n_runnable > 0 && vc->runner == NULL) {
/* make sure there's a candidate runner awake */
- vcpu = list_first_entry(&vc->runnable_threads,
- struct kvm_vcpu, arch.run_list);
+ i = -1;
+ vcpu = next_runnable_thread(vc, &i);
wake_up(&vcpu->arch.cpu_run);
}
}
@@ -2361,7 +2295,7 @@ static inline void kvmppc_set_host_core(int cpu)
*/
static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
{
- struct kvm_vcpu *vcpu, *vnext;
+ struct kvm_vcpu *vcpu;
int i;
int srcu_idx;
struct core_info core_info;
@@ -2397,8 +2331,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
*/
if ((threads_per_core > 1) &&
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
- list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
- arch.run_list) {
+ for_each_runnable_thread(i, vcpu, vc) {
vcpu->arch.ret = -EBUSY;
kvmppc_remove_runnable(vc, vcpu);
wake_up(&vcpu->arch.cpu_run);
@@ -2477,8 +2410,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
active |= 1 << thr;
list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
pvc->pcpu = pcpu + thr;
- list_for_each_entry(vcpu, &pvc->runnable_threads,
- arch.run_list) {
+ for_each_runnable_thread(i, vcpu, pvc) {
kvmppc_start_thread(vcpu, pvc);
kvmppc_create_dtl_entry(vcpu, pvc);
trace_kvm_guest_enter(vcpu);
@@ -2604,34 +2536,92 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
finish_wait(&vcpu->arch.cpu_run, &wait);
}
+static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
+{
+ /* 10us base */
+ if (vc->halt_poll_ns == 0 && halt_poll_ns_grow)
+ vc->halt_poll_ns = 10000;
+ else
+ vc->halt_poll_ns *= halt_poll_ns_grow;
+
+ if (vc->halt_poll_ns > halt_poll_max_ns)
+ vc->halt_poll_ns = halt_poll_max_ns;
+}
+
+static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
+{
+ if (halt_poll_ns_shrink == 0)
+ vc->halt_poll_ns = 0;
+ else
+ vc->halt_poll_ns /= halt_poll_ns_shrink;
+}
+
+/* Check to see if any of the runnable vcpus on the vcore have pending
+ * exceptions or are no longer ceded
+ */
+static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
+{
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ for_each_runnable_thread(i, vcpu, vc) {
+ if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded)
+ return 1;
+ }
+
+ return 0;
+}
+
/*
* All the vcpus in this vcore are idle, so wait for a decrementer
* or external interrupt to one of the vcpus. vc->lock is held.
*/
static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
{
- struct kvm_vcpu *vcpu;
+ ktime_t cur, start_poll, start_wait;
int do_sleep = 1;
+ u64 block_ns;
DECLARE_SWAITQUEUE(wait);
- prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+ /* Poll for pending exceptions and ceded state */
+ cur = start_poll = ktime_get();
+ if (vc->halt_poll_ns) {
+ ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns);
+ ++vc->runner->stat.halt_attempted_poll;
- /*
- * Check one last time for pending exceptions and ceded state after
- * we put ourselves on the wait queue
- */
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
- if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) {
- do_sleep = 0;
- break;
+ vc->vcore_state = VCORE_POLLING;
+ spin_unlock(&vc->lock);
+
+ do {
+ if (kvmppc_vcore_check_block(vc)) {
+ do_sleep = 0;
+ break;
+ }
+ cur = ktime_get();
+ } while (single_task_running() && ktime_before(cur, stop));
+
+ spin_lock(&vc->lock);
+ vc->vcore_state = VCORE_INACTIVE;
+
+ if (!do_sleep) {
+ ++vc->runner->stat.halt_successful_poll;
+ goto out;
}
}
- if (!do_sleep) {
+ prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+
+ if (kvmppc_vcore_check_block(vc)) {
finish_swait(&vc->wq, &wait);
- return;
+ do_sleep = 0;
+ /* If we polled, count this as a successful poll */
+ if (vc->halt_poll_ns)
+ ++vc->runner->stat.halt_successful_poll;
+ goto out;
}
+ start_wait = ktime_get();
+
vc->vcore_state = VCORE_SLEEPING;
trace_kvmppc_vcore_blocked(vc, 0);
spin_unlock(&vc->lock);
@@ -2640,13 +2630,52 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
spin_lock(&vc->lock);
vc->vcore_state = VCORE_INACTIVE;
trace_kvmppc_vcore_blocked(vc, 1);
+ ++vc->runner->stat.halt_successful_wait;
+
+ cur = ktime_get();
+
+out:
+ block_ns = ktime_to_ns(cur) - ktime_to_ns(start_poll);
+
+ /* Attribute wait time */
+ if (do_sleep) {
+ vc->runner->stat.halt_wait_ns +=
+ ktime_to_ns(cur) - ktime_to_ns(start_wait);
+ /* Attribute failed poll time */
+ if (vc->halt_poll_ns)
+ vc->runner->stat.halt_poll_fail_ns +=
+ ktime_to_ns(start_wait) -
+ ktime_to_ns(start_poll);
+ } else {
+ /* Attribute successful poll time */
+ if (vc->halt_poll_ns)
+ vc->runner->stat.halt_poll_success_ns +=
+ ktime_to_ns(cur) -
+ ktime_to_ns(start_poll);
+ }
+
+ /* Adjust poll time */
+ if (halt_poll_max_ns) {
+ if (block_ns <= vc->halt_poll_ns)
+ ;
+ /* We slept and blocked for longer than the max halt time */
+ else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns)
+ shrink_halt_poll_ns(vc);
+ /* We slept and our poll time is too small */
+ else if (vc->halt_poll_ns < halt_poll_max_ns &&
+ block_ns < halt_poll_max_ns)
+ grow_halt_poll_ns(vc);
+ } else
+ vc->halt_poll_ns = 0;
+
+ trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
}
static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
{
- int n_ceded;
+ int n_ceded, i;
struct kvmppc_vcore *vc;
- struct kvm_vcpu *v, *vn;
+ struct kvm_vcpu *v;
trace_kvmppc_run_vcpu_enter(vcpu);
@@ -2666,7 +2695,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
vcpu->arch.busy_preempt = TB_NIL;
- list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
+ WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], vcpu);
++vc->n_runnable;
/*
@@ -2706,8 +2735,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE);
continue;
}
- list_for_each_entry_safe(v, vn, &vc->runnable_threads,
- arch.run_list) {
+ for_each_runnable_thread(i, v, vc) {
kvmppc_core_prepare_to_enter(v);
if (signal_pending(v->arch.run_task)) {
kvmppc_remove_runnable(vc, v);
@@ -2720,7 +2748,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
break;
n_ceded = 0;
- list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
+ for_each_runnable_thread(i, v, vc) {
if (!v->arch.pending_exceptions)
n_ceded += v->arch.ceded;
else
@@ -2759,8 +2787,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) {
/* Wake up some vcpu to run the core */
- v = list_first_entry(&vc->runnable_threads,
- struct kvm_vcpu, arch.run_list);
+ i = -1;
+ v = next_runnable_thread(vc, &i);
wake_up(&v->arch.cpu_run);
}
@@ -2818,7 +2846,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
r = kvmppc_book3s_hv_page_fault(run, vcpu,
vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
- }
+ } else if (r == RESUME_PASSTHROUGH)
+ r = kvmppc_xics_rm_complete(vcpu, 0);
} while (is_kvmppc_resume_guest(r));
out:
@@ -3247,6 +3276,8 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
kvmppc_free_vcores(kvm);
kvmppc_free_hpt(kvm);
+
+ kvmppc_free_pimap(kvm);
}
/* We don't need to emulate any privileged instructions or dcbz */
@@ -3282,6 +3313,184 @@ static int kvmppc_core_check_processor_compat_hv(void)
return 0;
}
+#ifdef CONFIG_KVM_XICS
+
+void kvmppc_free_pimap(struct kvm *kvm)
+{
+ kfree(kvm->arch.pimap);
+}
+
+static struct kvmppc_passthru_irqmap *kvmppc_alloc_pimap(void)
+{
+ return kzalloc(sizeof(struct kvmppc_passthru_irqmap), GFP_KERNEL);
+}
+
+static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
+{
+ struct irq_desc *desc;
+ struct kvmppc_irq_map *irq_map;
+ struct kvmppc_passthru_irqmap *pimap;
+ struct irq_chip *chip;
+ int i;
+
+ if (!kvm_irq_bypass)
+ return 1;
+
+ desc = irq_to_desc(host_irq);
+ if (!desc)
+ return -EIO;
+
+ mutex_lock(&kvm->lock);
+
+ pimap = kvm->arch.pimap;
+ if (pimap == NULL) {
+ /* First call, allocate structure to hold IRQ map */
+ pimap = kvmppc_alloc_pimap();
+ if (pimap == NULL) {
+ mutex_unlock(&kvm->lock);
+ return -ENOMEM;
+ }
+ kvm->arch.pimap = pimap;
+ }
+
+ /*
+ * For now, we only support interrupts for which the EOI operation
+ * is an OPAL call followed by a write to XIRR, since that's
+ * what our real-mode EOI code does.
+ */
+ chip = irq_data_get_irq_chip(&desc->irq_data);
+ if (!chip || !is_pnv_opal_msi(chip)) {
+ pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
+ host_irq, guest_gsi);
+ mutex_unlock(&kvm->lock);
+ return -ENOENT;
+ }
+
+ /*
+ * See if we already have an entry for this guest IRQ number.
+ * If it's mapped to a hardware IRQ number, that's an error,
+ * otherwise re-use this entry.
+ */
+ for (i = 0; i < pimap->n_mapped; i++) {
+ if (guest_gsi == pimap->mapped[i].v_hwirq) {
+ if (pimap->mapped[i].r_hwirq) {
+ mutex_unlock(&kvm->lock);
+ return -EINVAL;
+ }
+ break;
+ }
+ }
+
+ if (i == KVMPPC_PIRQ_MAPPED) {
+ mutex_unlock(&kvm->lock);
+ return -EAGAIN; /* table is full */
+ }
+
+ irq_map = &pimap->mapped[i];
+
+ irq_map->v_hwirq = guest_gsi;
+ irq_map->desc = desc;
+
+ /*
+ * Order the above two stores before the next to serialize with
+ * the KVM real mode handler.
+ */
+ smp_wmb();
+ irq_map->r_hwirq = desc->irq_data.hwirq;
+
+ if (i == pimap->n_mapped)
+ pimap->n_mapped++;
+
+ kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
+
+ mutex_unlock(&kvm->lock);
+
+ return 0;
+}
+
+static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
+{
+ struct irq_desc *desc;
+ struct kvmppc_passthru_irqmap *pimap;
+ int i;
+
+ if (!kvm_irq_bypass)
+ return 0;
+
+ desc = irq_to_desc(host_irq);
+ if (!desc)
+ return -EIO;
+
+ mutex_lock(&kvm->lock);
+
+ if (kvm->arch.pimap == NULL) {
+ mutex_unlock(&kvm->lock);
+ return 0;
+ }
+ pimap = kvm->arch.pimap;
+
+ for (i = 0; i < pimap->n_mapped; i++) {
+ if (guest_gsi == pimap->mapped[i].v_hwirq)
+ break;
+ }
+
+ if (i == pimap->n_mapped) {
+ mutex_unlock(&kvm->lock);
+ return -ENODEV;
+ }
+
+ kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
+
+ /* invalidate the entry */
+ pimap->mapped[i].r_hwirq = 0;
+
+ /*
+ * We don't free this structure even when the count goes to
+ * zero. The structure is freed when we destroy the VM.
+ */
+
+ mutex_unlock(&kvm->lock);
+ return 0;
+}
+
+static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+ int ret = 0;
+ struct kvm_kernel_irqfd *irqfd =
+ container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+ irqfd->producer = prod;
+
+ ret = kvmppc_set_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
+ if (ret)
+ pr_info("kvmppc_set_passthru_irq (irq %d, gsi %d) fails: %d\n",
+ prod->irq, irqfd->gsi, ret);
+
+ return ret;
+}
+
+static void kvmppc_irq_bypass_del_producer_hv(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+ int ret;
+ struct kvm_kernel_irqfd *irqfd =
+ container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+ irqfd->producer = NULL;
+
+ /*
+ * When producer of consumer is unregistered, we change back to
+ * default external interrupt handling mode - KVM real mode
+ * will switch back to host.
+ */
+ ret = kvmppc_clr_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
+ if (ret)
+ pr_warn("kvmppc_clr_passthru_irq (irq %d, gsi %d) fails: %d\n",
+ prod->irq, irqfd->gsi, ret);
+}
+#endif
+
static long kvm_arch_vm_ioctl_hv(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -3400,6 +3609,10 @@ static struct kvmppc_ops kvm_ops_hv = {
.fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
.arch_vm_ioctl = kvm_arch_vm_ioctl_hv,
.hcall_implemented = kvmppc_hcall_impl_hv,
+#ifdef CONFIG_KVM_XICS
+ .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv,
+ .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv,
+#endif
};
static int kvm_init_subcore_bitmap(void)