summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt8
-rw-r--r--drivers/xen/events/events_2l.c7
-rw-r--r--drivers/xen/events/events_base.c189
-rw-r--r--drivers/xen/events/events_fifo.c30
-rw-r--r--drivers/xen/events/events_internal.h14
5 files changed, 216 insertions, 32 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a1068742a6df..89d977f0b786 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5828,6 +5828,14 @@
improve timer resolution at the expense of processing
more timer interrupts.
+ xen.event_eoi_delay= [XEN]
+ How long to delay EOI handling in case of event
+ storms (jiffies). Default is 10.
+
+ xen.event_loop_timeout= [XEN]
+ After which time (jiffies) the event handling loop
+ should start to delay EOI handling. Default is 2.
+
nopv= [X86,XEN,KVM,HYPER_V,VMWARE]
Disables the PV optimizations forcing the guest to run
as generic guest with no PV drivers. Currently support
diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
index e1af5e093ff4..fe5ad0e89cd8 100644
--- a/drivers/xen/events/events_2l.c
+++ b/drivers/xen/events/events_2l.c
@@ -161,7 +161,7 @@ static inline xen_ulong_t active_evtchns(unsigned int cpu,
* a bitset of words which contain pending event bits. The second
* level is a bitset of pending events themselves.
*/
-static void evtchn_2l_handle_events(unsigned cpu)
+static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl)
{
int irq;
xen_ulong_t pending_words;
@@ -242,10 +242,7 @@ static void evtchn_2l_handle_events(unsigned cpu)
/* Process port. */
port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
- irq = get_evtchn_to_irq(port);
-
- if (irq != -1)
- generic_handle_irq(irq);
+ handle_irq_for_port(port, ctrl);
bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 9cbfea5e9a08..cde096a6f11d 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -35,6 +35,8 @@
#include <linux/pci.h>
#include <linux/spinlock.h>
#include <linux/cpuhotplug.h>
+#include <linux/atomic.h>
+#include <linux/ktime.h>
#ifdef CONFIG_X86
#include <asm/desc.h>
@@ -65,6 +67,15 @@
#include "events_internal.h"
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "xen."
+
+static uint __read_mostly event_loop_timeout = 2;
+module_param(event_loop_timeout, uint, 0644);
+
+static uint __read_mostly event_eoi_delay = 10;
+module_param(event_eoi_delay, uint, 0644);
+
const struct evtchn_ops *evtchn_ops;
/*
@@ -88,6 +99,7 @@ static DEFINE_RWLOCK(evtchn_rwlock);
* irq_mapping_update_lock
* evtchn_rwlock
* IRQ-desc lock
+ * percpu eoi_list_lock
*/
static LIST_HEAD(xen_irq_list_head);
@@ -120,6 +132,8 @@ static struct irq_chip xen_pirq_chip;
static void enable_dynirq(struct irq_data *data);
static void disable_dynirq(struct irq_data *data);
+static DEFINE_PER_CPU(unsigned int, irq_epoch);
+
static void clear_evtchn_to_irq_row(unsigned row)
{
unsigned col;
@@ -399,17 +413,120 @@ void notify_remote_via_irq(int irq)
}
EXPORT_SYMBOL_GPL(notify_remote_via_irq);
+struct lateeoi_work {
+ struct delayed_work delayed;
+ spinlock_t eoi_list_lock;
+ struct list_head eoi_list;
+};
+
+static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);
+
+static void lateeoi_list_del(struct irq_info *info)
+{
+ struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
+ unsigned long flags;
+
+ spin_lock_irqsave(&eoi->eoi_list_lock, flags);
+ list_del_init(&info->eoi_list);
+ spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
+}
+
+static void lateeoi_list_add(struct irq_info *info)
+{
+ struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
+ struct irq_info *elem;
+ u64 now = get_jiffies_64();
+ unsigned long delay;
+ unsigned long flags;
+
+ if (now < info->eoi_time)
+ delay = info->eoi_time - now;
+ else
+ delay = 1;
+
+ spin_lock_irqsave(&eoi->eoi_list_lock, flags);
+
+ if (list_empty(&eoi->eoi_list)) {
+ list_add(&info->eoi_list, &eoi->eoi_list);
+ mod_delayed_work_on(info->eoi_cpu, system_wq,
+ &eoi->delayed, delay);
+ } else {
+ list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
+ if (elem->eoi_time <= info->eoi_time)
+ break;
+ }
+ list_add(&info->eoi_list, &elem->eoi_list);
+ }
+
+ spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
+}
+
static void xen_irq_lateeoi_locked(struct irq_info *info)
{
evtchn_port_t evtchn;
+ unsigned int cpu;
evtchn = info->evtchn;
- if (!VALID_EVTCHN(evtchn))
+ if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
return;
+ cpu = info->eoi_cpu;
+ if (info->eoi_time && info->irq_epoch == per_cpu(irq_epoch, cpu)) {
+ lateeoi_list_add(info);
+ return;
+ }
+
+ info->eoi_time = 0;
unmask_evtchn(evtchn);
}
+static void xen_irq_lateeoi_worker(struct work_struct *work)
+{
+ struct lateeoi_work *eoi;
+ struct irq_info *info;
+ u64 now = get_jiffies_64();
+ unsigned long flags;
+
+ eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
+
+ read_lock_irqsave(&evtchn_rwlock, flags);
+
+ while (true) {
+ spin_lock(&eoi->eoi_list_lock);
+
+ info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
+ eoi_list);
+
+ if (info == NULL || now < info->eoi_time) {
+ spin_unlock(&eoi->eoi_list_lock);
+ break;
+ }
+
+ list_del_init(&info->eoi_list);
+
+ spin_unlock(&eoi->eoi_list_lock);
+
+ info->eoi_time = 0;
+
+ xen_irq_lateeoi_locked(info);
+ }
+
+ if (info)
+ mod_delayed_work_on(info->eoi_cpu, system_wq,
+ &eoi->delayed, info->eoi_time - now);
+
+ read_unlock_irqrestore(&evtchn_rwlock, flags);
+}
+
+static void xen_cpu_init_eoi(unsigned int cpu)
+{
+ struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);
+
+ INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
+ spin_lock_init(&eoi->eoi_list_lock);
+ INIT_LIST_HEAD(&eoi->eoi_list);
+}
+
void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
{
struct irq_info *info;
@@ -429,6 +546,7 @@ EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
static void xen_irq_init(unsigned irq)
{
struct irq_info *info;
+
#ifdef CONFIG_SMP
/* By default all event channels notify CPU#0. */
cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0));
@@ -443,6 +561,7 @@ static void xen_irq_init(unsigned irq)
set_info_for_irq(irq, info);
+ INIT_LIST_HEAD(&info->eoi_list);
list_add_tail(&info->list, &xen_irq_list_head);
}
@@ -498,6 +617,9 @@ static void xen_free_irq(unsigned irq)
write_lock_irqsave(&evtchn_rwlock, flags);
+ if (!list_empty(&info->eoi_list))
+ lateeoi_list_del(info);
+
list_del(&info->list);
set_info_for_irq(irq, NULL);
@@ -1358,17 +1480,66 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
notify_remote_via_irq(irq);
}
+struct evtchn_loop_ctrl {
+ ktime_t timeout;
+ unsigned count;
+ bool defer_eoi;
+};
+
+void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
+{
+ int irq;
+ struct irq_info *info;
+
+ irq = get_evtchn_to_irq(port);
+ if (irq == -1)
+ return;
+
+ /*
+ * Check for timeout every 256 events.
+ * We are setting the timeout value only after the first 256
+ * events in order to not hurt the common case of few loop
+ * iterations. The 256 is basically an arbitrary value.
+ *
+ * In case we are hitting the timeout we need to defer all further
+ * EOIs in order to ensure to leave the event handling loop rather
+ * sooner than later.
+ */
+ if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
+ ktime_t kt = ktime_get();
+
+ if (!ctrl->timeout) {
+ kt = ktime_add_ms(kt,
+ jiffies_to_msecs(event_loop_timeout));
+ ctrl->timeout = kt;
+ } else if (kt > ctrl->timeout) {
+ ctrl->defer_eoi = true;
+ }
+ }
+
+ info = info_for_irq(irq);
+
+ if (ctrl->defer_eoi) {
+ info->eoi_cpu = smp_processor_id();
+ info->irq_epoch = __this_cpu_read(irq_epoch);
+ info->eoi_time = get_jiffies_64() + event_eoi_delay;
+ }
+
+ generic_handle_irq(irq);
+}
+
static void __xen_evtchn_do_upcall(void)
{
struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
int cpu = smp_processor_id();
+ struct evtchn_loop_ctrl ctrl = { 0 };
read_lock(&evtchn_rwlock);
do {
vcpu_info->evtchn_upcall_pending = 0;
- xen_evtchn_handle_events(cpu);
+ xen_evtchn_handle_events(cpu, &ctrl);
BUG_ON(!irqs_disabled());
@@ -1377,6 +1548,13 @@ static void __xen_evtchn_do_upcall(void)
} while (vcpu_info->evtchn_upcall_pending);
read_unlock(&evtchn_rwlock);
+
+ /*
+ * Increment irq_epoch only now to defer EOIs only for
+ * xen_irq_lateeoi() invocations occurring from inside the loop
+ * above.
+ */
+ __this_cpu_inc(irq_epoch);
}
void xen_evtchn_do_upcall(struct pt_regs *regs)
@@ -1825,9 +2003,6 @@ void xen_setup_callback_vector(void) {}
static inline void xen_alloc_callback_vector(void) {}
#endif
-#undef MODULE_PARAM_PREFIX
-#define MODULE_PARAM_PREFIX "xen."
-
static bool fifo_events = true;
module_param(fifo_events, bool, 0);
@@ -1835,6 +2010,8 @@ static int xen_evtchn_cpu_prepare(unsigned int cpu)
{
int ret = 0;
+ xen_cpu_init_eoi(cpu);
+
if (evtchn_ops->percpu_init)
ret = evtchn_ops->percpu_init(cpu);
@@ -1861,6 +2038,8 @@ void __init xen_init_IRQ(void)
if (ret < 0)
xen_evtchn_2l_init();
+ xen_cpu_init_eoi(smp_processor_id());
+
cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
"xen/evtchn:prepare",
xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
index 40e4ca1685aa..6085a808da95 100644
--- a/drivers/xen/events/events_fifo.c
+++ b/drivers/xen/events/events_fifo.c
@@ -275,19 +275,9 @@ static uint32_t clear_linked(volatile event_word_t *word)
return w & EVTCHN_FIFO_LINK_MASK;
}
-static void handle_irq_for_port(evtchn_port_t port)
-{
- int irq;
-
- irq = get_evtchn_to_irq(port);
- if (irq != -1)
- generic_handle_irq(irq);
-}
-
-static void consume_one_event(unsigned cpu,
+static void consume_one_event(unsigned cpu, struct evtchn_loop_ctrl *ctrl,
struct evtchn_fifo_control_block *control_block,
- unsigned priority, unsigned long *ready,
- bool drop)
+ unsigned priority, unsigned long *ready)
{
struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
uint32_t head;
@@ -320,16 +310,17 @@ static void consume_one_event(unsigned cpu,
clear_bit(priority, ready);
if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) {
- if (unlikely(drop))
+ if (unlikely(!ctrl))
pr_warn("Dropping pending event for port %u\n", port);
else
- handle_irq_for_port(port);
+ handle_irq_for_port(port, ctrl);
}
q->head[priority] = head;
}
-static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
+static void __evtchn_fifo_handle_events(unsigned cpu,
+ struct evtchn_loop_ctrl *ctrl)
{
struct evtchn_fifo_control_block *control_block;
unsigned long ready;
@@ -341,14 +332,15 @@ static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
while (ready) {
q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES);
- consume_one_event(cpu, control_block, q, &ready, drop);
+ consume_one_event(cpu, ctrl, control_block, q, &ready);
ready |= xchg(&control_block->ready, 0);
}
}
-static void evtchn_fifo_handle_events(unsigned cpu)
+static void evtchn_fifo_handle_events(unsigned cpu,
+ struct evtchn_loop_ctrl *ctrl)
{
- __evtchn_fifo_handle_events(cpu, false);
+ __evtchn_fifo_handle_events(cpu, ctrl);
}
static void evtchn_fifo_resume(void)
@@ -416,7 +408,7 @@ static int evtchn_fifo_percpu_init(unsigned int cpu)
static int evtchn_fifo_percpu_deinit(unsigned int cpu)
{
- __evtchn_fifo_handle_events(cpu, true);
+ __evtchn_fifo_handle_events(cpu, NULL);
return 0;
}
diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h
index 558abea19d0d..aac05cf52ced 100644
--- a/drivers/xen/events/events_internal.h
+++ b/drivers/xen/events/events_internal.h
@@ -30,11 +30,15 @@ enum xen_irq_type {
*/
struct irq_info {
struct list_head list;
+ struct list_head eoi_list;
int refcnt;
enum xen_irq_type type; /* type */
unsigned irq;
evtchn_port_t evtchn; /* event channel */
unsigned short cpu; /* cpu bound */
+ unsigned short eoi_cpu; /* EOI must happen on this cpu */
+ unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
+ u64 eoi_time; /* Time in jiffies when to EOI. */
union {
unsigned short virq;
@@ -53,6 +57,8 @@ struct irq_info {
#define PIRQ_SHAREABLE (1 << 1)
#define PIRQ_MSI_GROUP (1 << 2)
+struct evtchn_loop_ctrl;
+
struct evtchn_ops {
unsigned (*max_channels)(void);
unsigned (*nr_channels)(void);
@@ -67,7 +73,7 @@ struct evtchn_ops {
void (*mask)(evtchn_port_t port);
void (*unmask)(evtchn_port_t port);
- void (*handle_events)(unsigned cpu);
+ void (*handle_events)(unsigned cpu, struct evtchn_loop_ctrl *ctrl);
void (*resume)(void);
int (*percpu_init)(unsigned int cpu);
@@ -78,6 +84,7 @@ extern const struct evtchn_ops *evtchn_ops;
extern int **evtchn_to_irq;
int get_evtchn_to_irq(evtchn_port_t evtchn);
+void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl);
struct irq_info *info_for_irq(unsigned irq);
unsigned cpu_from_irq(unsigned irq);
@@ -135,9 +142,10 @@ static inline void unmask_evtchn(evtchn_port_t port)
return evtchn_ops->unmask(port);
}
-static inline void xen_evtchn_handle_events(unsigned cpu)
+static inline void xen_evtchn_handle_events(unsigned cpu,
+ struct evtchn_loop_ctrl *ctrl)
{
- return evtchn_ops->handle_events(cpu);
+ return evtchn_ops->handle_events(cpu, ctrl);
}
static inline void xen_evtchn_resume(void)