summaryrefslogtreecommitdiff
path: root/mm/filemap.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-09-22 16:45:34 -0700
committerDavid S. Miller <davem@davemloft.net>2020-09-22 16:45:34 -0700
commit3ab0a7a0c349a1d7beb2bb371a62669d1528269d (patch)
treed2ae17c3bfc829ce0c747ad97021cd4bc8fb11dc /mm/filemap.c
parent92ec804f3dbf0d986f8e10850bfff14f316d7aaf (diff)
parent805c6d3c19210c90c109107d189744e960eae025 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Two minor conflicts: 1) net/ipv4/route.c, adding a new local variable while moving another local variable and removing it's initial assignment. 2) drivers/net/dsa/microchip/ksz9477.c, overlapping changes. One pretty prints the port mode differently, whilst another changes the driver to try and obtain the port mode from the port node rather than the switch node. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c160
1 files changed, 129 insertions, 31 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 054d93a86f8a..8c8f2deee4e3 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -988,9 +988,43 @@ void __init pagecache_init(void)
page_writeback_init();
}
+/*
+ * The page wait code treats the "wait->flags" somewhat unusually, because
+ * we have multiple different kinds of waits, not just the usual "exclusive"
+ * one.
+ *
+ * We have:
+ *
+ * (a) no special bits set:
+ *
+ * We're just waiting for the bit to be released, and when a waker
+ * calls the wakeup function, we set WQ_FLAG_WOKEN and wake it up,
+ * and remove it from the wait queue.
+ *
+ * Simple and straightforward.
+ *
+ * (b) WQ_FLAG_EXCLUSIVE:
+ *
+ * The waiter is waiting to get the lock, and only one waiter should
+ * be woken up to avoid any thundering herd behavior. We'll set the
+ * WQ_FLAG_WOKEN bit, wake it up, and remove it from the wait queue.
+ *
+ * This is the traditional exclusive wait.
+ *
+ * (c) WQ_FLAG_EXCLUSIVE | WQ_FLAG_CUSTOM:
+ *
+ * The waiter is waiting to get the bit, and additionally wants the
+ * lock to be transferred to it for fair lock behavior. If the lock
+ * cannot be taken, we stop walking the wait queue without waking
+ * the waiter.
+ *
+ * This is the "fair lock handoff" case, and in addition to setting
+ * WQ_FLAG_WOKEN, we set WQ_FLAG_DONE to let the waiter easily see
+ * that it now has the lock.
+ */
static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
{
- int ret;
+ unsigned int flags;
struct wait_page_key *key = arg;
struct wait_page_queue *wait_page
= container_of(wait, struct wait_page_queue, wait);
@@ -999,35 +1033,44 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
return 0;
/*
- * If it's an exclusive wait, we get the bit for it, and
- * stop walking if we can't.
- *
- * If it's a non-exclusive wait, then the fact that this
- * wake function was called means that the bit already
- * was cleared, and we don't care if somebody then
- * re-took it.
+ * If it's a lock handoff wait, we get the bit for it, and
+ * stop walking (and do not wake it up) if we can't.
*/
- ret = 0;
- if (wait->flags & WQ_FLAG_EXCLUSIVE) {
- if (test_and_set_bit(key->bit_nr, &key->page->flags))
+ flags = wait->flags;
+ if (flags & WQ_FLAG_EXCLUSIVE) {
+ if (test_bit(key->bit_nr, &key->page->flags))
return -1;
- ret = 1;
+ if (flags & WQ_FLAG_CUSTOM) {
+ if (test_and_set_bit(key->bit_nr, &key->page->flags))
+ return -1;
+ flags |= WQ_FLAG_DONE;
+ }
}
- wait->flags |= WQ_FLAG_WOKEN;
+ /*
+ * We are holding the wait-queue lock, but the waiter that
+ * is waiting for this will be checking the flags without
+ * any locking.
+ *
+ * So update the flags atomically, and wake up the waiter
+ * afterwards to avoid any races. This store-release pairs
+ * with the load-acquire in wait_on_page_bit_common().
+ */
+ smp_store_release(&wait->flags, flags | WQ_FLAG_WOKEN);
wake_up_state(wait->private, mode);
/*
* Ok, we have successfully done what we're waiting for,
* and we can unconditionally remove the wait entry.
*
- * Note that this has to be the absolute last thing we do,
- * since after list_del_init(&wait->entry) the wait entry
+ * Note that this pairs with the "finish_wait()" in the
+ * waiter, and has to be the absolute last thing we do.
+ * After this list_del_init(&wait->entry) the wait entry
* might be de-allocated and the process might even have
* exited.
*/
list_del_init_careful(&wait->entry);
- return ret;
+ return (flags & WQ_FLAG_EXCLUSIVE) != 0;
}
static void wake_up_page_bit(struct page *page, int bit_nr)
@@ -1107,8 +1150,8 @@ enum behavior {
};
/*
- * Attempt to check (or get) the page bit, and mark the
- * waiter woken if successful.
+ * Attempt to check (or get) the page bit, and mark us done
+ * if successful.
*/
static inline bool trylock_page_bit_common(struct page *page, int bit_nr,
struct wait_queue_entry *wait)
@@ -1119,13 +1162,17 @@ static inline bool trylock_page_bit_common(struct page *page, int bit_nr,
} else if (test_bit(bit_nr, &page->flags))
return false;
- wait->flags |= WQ_FLAG_WOKEN;
+ wait->flags |= WQ_FLAG_WOKEN | WQ_FLAG_DONE;
return true;
}
+/* How many times do we accept lock stealing from under a waiter? */
+int sysctl_page_lock_unfairness = 5;
+
static inline int wait_on_page_bit_common(wait_queue_head_t *q,
struct page *page, int bit_nr, int state, enum behavior behavior)
{
+ int unfairness = sysctl_page_lock_unfairness;
struct wait_page_queue wait_page;
wait_queue_entry_t *wait = &wait_page.wait;
bool thrashing = false;
@@ -1143,11 +1190,18 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
}
init_wait(wait);
- wait->flags = behavior == EXCLUSIVE ? WQ_FLAG_EXCLUSIVE : 0;
wait->func = wake_page_function;
wait_page.page = page;
wait_page.bit_nr = bit_nr;
+repeat:
+ wait->flags = 0;
+ if (behavior == EXCLUSIVE) {
+ wait->flags = WQ_FLAG_EXCLUSIVE;
+ if (--unfairness < 0)
+ wait->flags |= WQ_FLAG_CUSTOM;
+ }
+
/*
* Do one last check whether we can get the
* page bit synchronously.
@@ -1170,27 +1224,63 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
/*
* From now on, all the logic will be based on
- * the WQ_FLAG_WOKEN flag, and the and the page
- * bit testing (and setting) will be - or has
- * already been - done by the wake function.
+ * the WQ_FLAG_WOKEN and WQ_FLAG_DONE flag, to
+ * see whether the page bit testing has already
+ * been done by the wake function.
*
* We can drop our reference to the page.
*/
if (behavior == DROP)
put_page(page);
+ /*
+ * Note that until the "finish_wait()", or until
+ * we see the WQ_FLAG_WOKEN flag, we need to
+ * be very careful with the 'wait->flags', because
+ * we may race with a waker that sets them.
+ */
for (;;) {
+ unsigned int flags;
+
set_current_state(state);
- if (signal_pending_state(state, current))
+ /* Loop until we've been woken or interrupted */
+ flags = smp_load_acquire(&wait->flags);
+ if (!(flags & WQ_FLAG_WOKEN)) {
+ if (signal_pending_state(state, current))
+ break;
+
+ io_schedule();
+ continue;
+ }
+
+ /* If we were non-exclusive, we're done */
+ if (behavior != EXCLUSIVE)
break;
- if (wait->flags & WQ_FLAG_WOKEN)
+ /* If the waker got the lock for us, we're done */
+ if (flags & WQ_FLAG_DONE)
break;
- io_schedule();
+ /*
+ * Otherwise, if we're getting the lock, we need to
+ * try to get it ourselves.
+ *
+ * And if that fails, we'll have to retry this all.
+ */
+ if (unlikely(test_and_set_bit(bit_nr, &page->flags)))
+ goto repeat;
+
+ wait->flags |= WQ_FLAG_DONE;
+ break;
}
+ /*
+ * If a signal happened, this 'finish_wait()' may remove the last
+ * waiter from the wait-queues, but the PageWaiters bit will remain
+ * set. That's ok. The next wakeup will take care of it, and trying
+ * to do it here would be difficult and prone to races.
+ */
finish_wait(q, wait);
if (thrashing) {
@@ -1200,12 +1290,20 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
}
/*
- * A signal could leave PageWaiters set. Clearing it here if
- * !waitqueue_active would be possible (by open-coding finish_wait),
- * but still fail to catch it in the case of wait hash collision. We
- * already can fail to clear wait hash collision cases, so don't
- * bother with signals either.
+ * NOTE! The wait->flags weren't stable until we've done the
+ * 'finish_wait()', and we could have exited the loop above due
+ * to a signal, and had a wakeup event happen after the signal
+ * test but before the 'finish_wait()'.
+ *
+ * So only after the finish_wait() can we reliably determine
+ * if we got woken up or not, so we can now figure out the final
+ * return value based on that state without races.
+ *
+ * Also note that WQ_FLAG_WOKEN is sufficient for a non-exclusive
+ * waiter, but an exclusive one requires WQ_FLAG_DONE.
*/
+ if (behavior == EXCLUSIVE)
+ return wait->flags & WQ_FLAG_DONE ? 0 : -EINTR;
return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR;
}