summaryrefslogtreecommitdiff
path: root/mm/memfd.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-10-28 11:35:40 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-10-28 11:35:40 -0700
commitdad4f140edaa3f6bb452b6913d41af1ffd672e45 (patch)
tree1c0ebdcdfcdfb4ec9af7810c5ad9bae0f791ff5c /mm/memfd.c
parent69d5b97c597307773fe6c59775a5d5a88bb7e6b3 (diff)
parent3a08cd52c37c793ffc199f6fc2ecfc368e284b2d (diff)
Merge branch 'xarray' of git://git.infradead.org/users/willy/linux-dax
Pull XArray conversion from Matthew Wilcox: "The XArray provides an improved interface to the radix tree data structure, providing locking as part of the API, specifying GFP flags at allocation time, eliminating preloading, less re-walking the tree, more efficient iterations and not exposing RCU-protected pointers to its users. This patch set 1. Introduces the XArray implementation 2. Converts the pagecache to use it 3. Converts memremap to use it The page cache is the most complex and important user of the radix tree, so converting it was most important. Converting the memremap code removes the only other user of the multiorder code, which allows us to remove the radix tree code that supported it. I have 40+ followup patches to convert many other users of the radix tree over to the XArray, but I'd like to get this part in first. The other conversions haven't been in linux-next and aren't suitable for applying yet, but you can see them in the xarray-conv branch if you're interested" * 'xarray' of git://git.infradead.org/users/willy/linux-dax: (90 commits) radix tree: Remove multiorder support radix tree test: Convert multiorder tests to XArray radix tree tests: Convert item_delete_rcu to XArray radix tree tests: Convert item_kill_tree to XArray radix tree tests: Move item_insert_order radix tree test suite: Remove multiorder benchmarking radix tree test suite: Remove __item_insert memremap: Convert to XArray xarray: Add range store functionality xarray: Move multiorder_check to in-kernel tests xarray: Move multiorder_shrink to kernel tests xarray: Move multiorder account test in-kernel radix tree test suite: Convert iteration test to XArray radix tree test suite: Convert tag_tagged_items to XArray radix tree: Remove radix_tree_clear_tags radix tree: Remove radix_tree_maybe_preload_order radix tree: Remove split/join code radix tree: Remove radix_tree_update_node_t page cache: Finish XArray conversion dax: Convert page fault handlers to XArray ...
Diffstat (limited to 'mm/memfd.c')
-rw-r--r--mm/memfd.c105
1 files changed, 43 insertions, 62 deletions
diff --git a/mm/memfd.c b/mm/memfd.c
index 2bb5e257080e..97264c79d2cd 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -21,44 +21,36 @@
#include <uapi/linux/memfd.h>
/*
- * We need a tag: a new tag would expand every radix_tree_node by 8 bytes,
+ * We need a tag: a new tag would expand every xa_node by 8 bytes,
* so reuse a tag which we firmly believe is never set or cleared on tmpfs
* or hugetlbfs because they are memory only filesystems.
*/
#define MEMFD_TAG_PINNED PAGECACHE_TAG_TOWRITE
#define LAST_SCAN 4 /* about 150ms max */
-static void memfd_tag_pins(struct address_space *mapping)
+static void memfd_tag_pins(struct xa_state *xas)
{
- struct radix_tree_iter iter;
- void __rcu **slot;
- pgoff_t start;
struct page *page;
+ unsigned int tagged = 0;
lru_add_drain();
- start = 0;
- rcu_read_lock();
-
- radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
- page = radix_tree_deref_slot(slot);
- if (!page || radix_tree_exception(page)) {
- if (radix_tree_deref_retry(page)) {
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
- } else if (page_count(page) - page_mapcount(page) > 1) {
- xa_lock_irq(&mapping->i_pages);
- radix_tree_tag_set(&mapping->i_pages, iter.index,
- MEMFD_TAG_PINNED);
- xa_unlock_irq(&mapping->i_pages);
- }
- if (need_resched()) {
- slot = radix_tree_iter_resume(slot, &iter);
- cond_resched_rcu();
- }
+ xas_lock_irq(xas);
+ xas_for_each(xas, page, ULONG_MAX) {
+ if (xa_is_value(page))
+ continue;
+ if (page_count(page) - page_mapcount(page) > 1)
+ xas_set_mark(xas, MEMFD_TAG_PINNED);
+
+ if (++tagged % XA_CHECK_SCHED)
+ continue;
+
+ xas_pause(xas);
+ xas_unlock_irq(xas);
+ cond_resched();
+ xas_lock_irq(xas);
}
- rcu_read_unlock();
+ xas_unlock_irq(xas);
}
/*
@@ -72,17 +64,17 @@ static void memfd_tag_pins(struct address_space *mapping)
*/
static int memfd_wait_for_pins(struct address_space *mapping)
{
- struct radix_tree_iter iter;
- void __rcu **slot;
- pgoff_t start;
+ XA_STATE(xas, &mapping->i_pages, 0);
struct page *page;
int error, scan;
- memfd_tag_pins(mapping);
+ memfd_tag_pins(&xas);
error = 0;
for (scan = 0; scan <= LAST_SCAN; scan++) {
- if (!radix_tree_tagged(&mapping->i_pages, MEMFD_TAG_PINNED))
+ unsigned int tagged = 0;
+
+ if (!xas_marked(&xas, MEMFD_TAG_PINNED))
break;
if (!scan)
@@ -90,45 +82,34 @@ static int memfd_wait_for_pins(struct address_space *mapping)
else if (schedule_timeout_killable((HZ << scan) / 200))
scan = LAST_SCAN;
- start = 0;
- rcu_read_lock();
- radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter,
- start, MEMFD_TAG_PINNED) {
-
- page = radix_tree_deref_slot(slot);
- if (radix_tree_exception(page)) {
- if (radix_tree_deref_retry(page)) {
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
-
- page = NULL;
- }
-
- if (page &&
- page_count(page) - page_mapcount(page) != 1) {
- if (scan < LAST_SCAN)
- goto continue_resched;
-
+ xas_set(&xas, 0);
+ xas_lock_irq(&xas);
+ xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) {
+ bool clear = true;
+ if (xa_is_value(page))
+ continue;
+ if (page_count(page) - page_mapcount(page) != 1) {
/*
* On the last scan, we clean up all those tags
* we inserted; but make a note that we still
* found pages pinned.
*/
- error = -EBUSY;
+ if (scan == LAST_SCAN)
+ error = -EBUSY;
+ else
+ clear = false;
}
+ if (clear)
+ xas_clear_mark(&xas, MEMFD_TAG_PINNED);
+ if (++tagged % XA_CHECK_SCHED)
+ continue;
- xa_lock_irq(&mapping->i_pages);
- radix_tree_tag_clear(&mapping->i_pages,
- iter.index, MEMFD_TAG_PINNED);
- xa_unlock_irq(&mapping->i_pages);
-continue_resched:
- if (need_resched()) {
- slot = radix_tree_iter_resume(slot, &iter);
- cond_resched_rcu();
- }
+ xas_pause(&xas);
+ xas_unlock_irq(&xas);
+ cond_resched();
+ xas_lock_irq(&xas);
}
- rcu_read_unlock();
+ xas_unlock_irq(&xas);
}
return error;