summaryrefslogtreecommitdiff
path: root/fs/eventpoll.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-04-07 14:11:54 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-04-07 14:11:54 -0700
commit63bef48fd6c9d3f1ba4f0e23b4da1e007db6a3c0 (patch)
treef27c1ea7686b2ee30eea6b973a430f1c102bb03f /fs/eventpoll.c
parent04de788e61a576820baf03ff8accc246ca146cb3 (diff)
parent1cd377baa91844b9f87a2b72eabf7ff783946b5e (diff)
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton: - a lot more of MM, quite a bit more yet to come: (memcg, pagemap, vmalloc, pagealloc, migration, thp, ksm, madvise, virtio, userfaultfd, memory-hotplug, shmem, rmap, zswap, zsmalloc, cleanups) - various other subsystems (procfs, misc, MAINTAINERS, bitops, lib, checkpatch, epoll, binfmt, kallsyms, reiserfs, kmod, gcov, kconfig, ubsan, fault-injection, ipc) * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (158 commits) ipc/shm.c: make compat_ksys_shmctl() static ipc/mqueue.c: fix a brace coding style issue lib/Kconfig.debug: fix a typo "capabilitiy" -> "capability" ubsan: include bug type in report header kasan: unset panic_on_warn before calling panic() ubsan: check panic_on_warn drivers/misc/lkdtm/bugs.c: add arithmetic overflow and array bounds checks ubsan: split "bounds" checker from other options ubsan: add trap instrumentation option init/Kconfig: clean up ANON_INODES and old IO schedulers options kernel/gcov/fs.c: replace zero-length array with flexible-array member gcov: gcc_3_4: replace zero-length array with flexible-array member gcov: gcc_4_7: replace zero-length array with flexible-array member kernel/kmod.c: fix a typo "assuems" -> "assumes" reiserfs: clean up several indentation issues kallsyms: unexport kallsyms_lookup_name() and kallsyms_on_each_symbol() samples/hw_breakpoint: drop use of kallsyms_lookup_name() samples/hw_breakpoint: drop HW_BREAKPOINT_R when reporting writes fs/binfmt_elf.c: don't free interpreter's ELF pheaders on common path fs/binfmt_elf.c: allocate less for static executable ...
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r--fs/eventpoll.c64
1 files changed, 43 insertions, 21 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index eee3c92a9ebf..8c596641a72b 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -218,13 +218,18 @@ struct eventpoll {
struct file *file;
/* used to optimize loop detection check */
- int visited;
struct list_head visited_list_link;
+ int visited;
#ifdef CONFIG_NET_RX_BUSY_POLL
/* used to track busy poll napi_id */
unsigned int napi_id;
#endif
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ /* tracks wakeup nests for lockdep validation */
+ u8 nests;
+#endif
};
/* Wait structure used by the poll hooks */
@@ -545,30 +550,47 @@ out_unlock:
*/
#ifdef CONFIG_DEBUG_LOCK_ALLOC
-static DEFINE_PER_CPU(int, wakeup_nest);
-
-static void ep_poll_safewake(wait_queue_head_t *wq)
+static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi)
{
+ struct eventpoll *ep_src;
unsigned long flags;
- int subclass;
+ u8 nests = 0;
- local_irq_save(flags);
- preempt_disable();
- subclass = __this_cpu_read(wakeup_nest);
- spin_lock_nested(&wq->lock, subclass + 1);
- __this_cpu_inc(wakeup_nest);
- wake_up_locked_poll(wq, POLLIN);
- __this_cpu_dec(wakeup_nest);
- spin_unlock(&wq->lock);
- local_irq_restore(flags);
- preempt_enable();
+ /*
+ * To set the subclass or nesting level for spin_lock_irqsave_nested()
+ * it might be natural to create a per-cpu nest count. However, since
+ * we can recurse on ep->poll_wait.lock, and a non-raw spinlock can
+ * schedule() in the -rt kernel, the per-cpu variable are no longer
+ * protected. Thus, we are introducing a per eventpoll nest field.
+ * If we are not being call from ep_poll_callback(), epi is NULL and
+ * we are at the first level of nesting, 0. Otherwise, we are being
+ * called from ep_poll_callback() and if a previous wakeup source is
+ * not an epoll file itself, we are at depth 1 since the wakeup source
+ * is depth 0. If the wakeup source is a previous epoll file in the
+ * wakeup chain then we use its nests value and record ours as
+ * nests + 1. The previous epoll file nests value is stable since its
+ * already holding its own poll_wait.lock.
+ */
+ if (epi) {
+ if ((is_file_epoll(epi->ffd.file))) {
+ ep_src = epi->ffd.file->private_data;
+ nests = ep_src->nests;
+ } else {
+ nests = 1;
+ }
+ }
+ spin_lock_irqsave_nested(&ep->poll_wait.lock, flags, nests);
+ ep->nests = nests + 1;
+ wake_up_locked_poll(&ep->poll_wait, EPOLLIN);
+ ep->nests = 0;
+ spin_unlock_irqrestore(&ep->poll_wait.lock, flags);
}
#else
-static void ep_poll_safewake(wait_queue_head_t *wq)
+static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi)
{
- wake_up_poll(wq, EPOLLIN);
+ wake_up_poll(&ep->poll_wait, EPOLLIN);
}
#endif
@@ -789,7 +811,7 @@ static void ep_free(struct eventpoll *ep)
/* We need to release all tasks waiting for these file */
if (waitqueue_active(&ep->poll_wait))
- ep_poll_safewake(&ep->poll_wait);
+ ep_poll_safewake(ep, NULL);
/*
* We need to lock this because we could be hit by
@@ -1258,7 +1280,7 @@ out_unlock:
/* We have to call this outside the lock */
if (pwake)
- ep_poll_safewake(&ep->poll_wait);
+ ep_poll_safewake(ep, epi);
if (!(epi->event.events & EPOLLEXCLUSIVE))
ewake = 1;
@@ -1562,7 +1584,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
/* We have to call this outside the lock */
if (pwake)
- ep_poll_safewake(&ep->poll_wait);
+ ep_poll_safewake(ep, NULL);
return 0;
@@ -1666,7 +1688,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
/* We have to call this outside the lock */
if (pwake)
- ep_poll_safewake(&ep->poll_wait);
+ ep_poll_safewake(ep, NULL);
return 0;
}