summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig9
-rw-r--r--mm/Makefile1
-rw-r--r--mm/compaction.c48
-rw-r--r--mm/gup_benchmark.c100
-rw-r--r--mm/internal.h1
-rw-r--r--mm/mmap.c15
-rw-r--r--mm/page_alloc.c1
-rw-r--r--mm/shmem.c2
-rw-r--r--mm/z3fold.c10
9 files changed, 170 insertions, 17 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 9c4bdddd80c2..03ff7703d322 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -756,3 +756,12 @@ config PERCPU_STATS
This feature collects and exposes statistics via debugfs. The
information includes global and per chunk statistics, which can
be used to help understand percpu memory usage.
+
+config GUP_BENCHMARK
+ bool "Enable infrastructure for get_user_pages_fast() benchmarking"
+ default n
+ help
+ Provides /sys/kernel/debug/gup_benchmark that helps with testing
+ performance of get_user_pages_fast().
+
+ See tools/testing/selftests/vm/gup_benchmark.c
diff --git a/mm/Makefile b/mm/Makefile
index e7ebd176fb93..e669f02c5a54 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -80,6 +80,7 @@ obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
obj-$(CONFIG_MEMCG_SWAP) += swap_cgroup.o
obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
+obj-$(CONFIG_GUP_BENCHMARK) += gup_benchmark.o
obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
diff --git a/mm/compaction.c b/mm/compaction.c
index 85395dc6eb13..10cd757f1006 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -219,6 +219,24 @@ static void reset_cached_positions(struct zone *zone)
}
/*
+ * Compound pages of >= pageblock_order should consistenly be skipped until
+ * released. It is always pointless to compact pages of such order (if they are
+ * migratable), and the pageblocks they occupy cannot contain any free pages.
+ */
+static bool pageblock_skip_persistent(struct page *page)
+{
+ if (!PageCompound(page))
+ return false;
+
+ page = compound_head(page);
+
+ if (compound_order(page) >= pageblock_order)
+ return true;
+
+ return false;
+}
+
+/*
* This function is called to clear all cached information on pageblocks that
* should be skipped for page isolation when the migrate and free page scanner
* meet.
@@ -242,6 +260,8 @@ static void __reset_isolation_suitable(struct zone *zone)
continue;
if (zone != page_zone(page))
continue;
+ if (pageblock_skip_persistent(page))
+ continue;
clear_pageblock_skip(page);
}
@@ -275,7 +295,7 @@ static void update_pageblock_skip(struct compact_control *cc,
struct zone *zone = cc->zone;
unsigned long pfn;
- if (cc->ignore_skip_hint)
+ if (cc->no_set_skip_hint)
return;
if (!page)
@@ -307,7 +327,12 @@ static inline bool isolation_suitable(struct compact_control *cc,
return true;
}
-static void update_pageblock_skip(struct compact_control *cc,
+static inline bool pageblock_skip_persistent(struct page *page)
+{
+ return false;
+}
+
+static inline void update_pageblock_skip(struct compact_control *cc,
struct page *page, unsigned long nr_isolated,
bool migrate_scanner)
{
@@ -449,13 +474,12 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
* and the only danger is skipping too much.
*/
if (PageCompound(page)) {
- unsigned int comp_order = compound_order(page);
+ const unsigned int order = compound_order(page);
- if (likely(comp_order < MAX_ORDER)) {
- blockpfn += (1UL << comp_order) - 1;
- cursor += (1UL << comp_order) - 1;
+ if (likely(order < MAX_ORDER)) {
+ blockpfn += (1UL << order) - 1;
+ cursor += (1UL << order) - 1;
}
-
goto isolate_fail;
}
@@ -772,11 +796,10 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
* danger is skipping too much.
*/
if (PageCompound(page)) {
- unsigned int comp_order = compound_order(page);
-
- if (likely(comp_order < MAX_ORDER))
- low_pfn += (1UL << comp_order) - 1;
+ const unsigned int order = compound_order(page);
+ if (likely(order < MAX_ORDER))
+ low_pfn += (1UL << order) - 1;
goto isolate_fail;
}
@@ -1928,9 +1951,8 @@ static void kcompactd_do_work(pg_data_t *pgdat)
.total_free_scanned = 0,
.classzone_idx = pgdat->kcompactd_classzone_idx,
.mode = MIGRATE_SYNC_LIGHT,
- .ignore_skip_hint = true,
+ .ignore_skip_hint = false,
.gfp_mask = GFP_KERNEL,
-
};
trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order,
cc.classzone_idx);
diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c
new file mode 100644
index 000000000000..5c8e2abeaa15
--- /dev/null
+++ b/mm/gup_benchmark.c
@@ -0,0 +1,100 @@
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/ktime.h>
+#include <linux/debugfs.h>
+
+#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark)
+
+struct gup_benchmark {
+ __u64 delta_usec;
+ __u64 addr;
+ __u64 size;
+ __u32 nr_pages_per_call;
+ __u32 flags;
+};
+
+static int __gup_benchmark_ioctl(unsigned int cmd,
+ struct gup_benchmark *gup)
+{
+ ktime_t start_time, end_time;
+ unsigned long i, nr, nr_pages, addr, next;
+ struct page **pages;
+
+ nr_pages = gup->size / PAGE_SIZE;
+ pages = kvmalloc(sizeof(void *) * nr_pages, GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+ i = 0;
+ nr = gup->nr_pages_per_call;
+ start_time = ktime_get();
+ for (addr = gup->addr; addr < gup->addr + gup->size; addr = next) {
+ if (nr != gup->nr_pages_per_call)
+ break;
+
+ next = addr + nr * PAGE_SIZE;
+ if (next > gup->addr + gup->size) {
+ next = gup->addr + gup->size;
+ nr = (next - addr) / PAGE_SIZE;
+ }
+
+ nr = get_user_pages_fast(addr, nr, gup->flags & 1, pages + i);
+ i += nr;
+ }
+ end_time = ktime_get();
+
+ gup->delta_usec = ktime_us_delta(end_time, start_time);
+ gup->size = addr - gup->addr;
+
+ for (i = 0; i < nr_pages; i++) {
+ if (!pages[i])
+ break;
+ put_page(pages[i]);
+ }
+
+ kvfree(pages);
+ return 0;
+}
+
+static long gup_benchmark_ioctl(struct file *filep, unsigned int cmd,
+ unsigned long arg)
+{
+ struct gup_benchmark gup;
+ int ret;
+
+ if (cmd != GUP_FAST_BENCHMARK)
+ return -EINVAL;
+
+ if (copy_from_user(&gup, (void __user *)arg, sizeof(gup)))
+ return -EFAULT;
+
+ ret = __gup_benchmark_ioctl(cmd, &gup);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *)arg, &gup, sizeof(gup)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static const struct file_operations gup_benchmark_fops = {
+ .open = nonseekable_open,
+ .unlocked_ioctl = gup_benchmark_ioctl,
+};
+
+static int gup_benchmark_init(void)
+{
+ void *ret;
+
+ ret = debugfs_create_file_unsafe("gup_benchmark", 0600, NULL, NULL,
+ &gup_benchmark_fops);
+ if (!ret)
+ pr_warn("Failed to create gup_benchmark in debugfs");
+
+ return 0;
+}
+
+late_initcall(gup_benchmark_init);
diff --git a/mm/internal.h b/mm/internal.h
index 1df011f62480..e6bd35182dae 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -198,6 +198,7 @@ struct compact_control {
const int classzone_idx; /* zone index of a direct compactor */
enum migrate_mode mode; /* Async or sync migration mode */
bool ignore_skip_hint; /* Scan blocks even if marked skip */
+ bool no_set_skip_hint; /* Don't mark blocks for skipping */
bool ignore_block_suitable; /* Scan blocks considered unsuitable */
bool direct_compaction; /* False from kcompactd or /proc/... */
bool whole_zone; /* Whole zone should/has been scanned */
diff --git a/mm/mmap.c b/mm/mmap.c
index 680506faceae..924839fac0e6 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1387,9 +1387,24 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
if (file) {
struct inode *inode = file_inode(file);
+ unsigned long flags_mask;
+
+ flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags;
switch (flags & MAP_TYPE) {
case MAP_SHARED:
+ /*
+ * Force use of MAP_SHARED_VALIDATE with non-legacy
+ * flags. E.g. MAP_SYNC is dangerous to use with
+ * MAP_SHARED as you don't know which consistency model
+ * you will get. We silently ignore unsupported flags
+ * with MAP_SHARED to preserve backward compatibility.
+ */
+ flags &= LEGACY_MAP_MASK;
+ /* fall through */
+ case MAP_SHARED_VALIDATE:
+ if (flags & ~flags_mask)
+ return -EOPNOTSUPP;
if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
return -EACCES;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 55ded92f9809..d4096f4a5c1f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7619,6 +7619,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
.zone = page_zone(pfn_to_page(start)),
.mode = MIGRATE_SYNC,
.ignore_skip_hint = true,
+ .no_set_skip_hint = true,
.gfp_mask = current_gfp_context(gfp_mask),
};
INIT_LIST_HEAD(&cc.migratepages);
diff --git a/mm/shmem.c b/mm/shmem.c
index 1f97d77551c3..4aa9307feab0 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3202,7 +3202,6 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
int len;
struct inode *inode;
struct page *page;
- struct shmem_inode_info *info;
len = strlen(symname) + 1;
if (len > PAGE_SIZE)
@@ -3222,7 +3221,6 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
error = 0;
}
- info = SHMEM_I(inode);
inode->i_size = len-1;
if (len <= SHORT_SYMLINK_LEN) {
inode->i_link = kmemdup(symname, len, GFP_KERNEL);
diff --git a/mm/z3fold.c b/mm/z3fold.c
index b2ba2ba585f3..39e19125d6a0 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -404,8 +404,7 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked)
WARN_ON(z3fold_page_trylock(zhdr));
else
z3fold_page_lock(zhdr);
- if (test_bit(PAGE_STALE, &page->private) ||
- !test_and_clear_bit(NEEDS_COMPACTING, &page->private)) {
+ if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) {
z3fold_page_unlock(zhdr);
return;
}
@@ -413,6 +412,11 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked)
list_del_init(&zhdr->buddy);
spin_unlock(&pool->lock);
+ if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
+ atomic64_dec(&pool->pages_nr);
+ return;
+ }
+
z3fold_compact_page(zhdr);
unbuddied = get_cpu_ptr(pool->unbuddied);
fchunks = num_free_chunks(zhdr);
@@ -753,9 +757,11 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
list_del_init(&zhdr->buddy);
spin_unlock(&pool->lock);
zhdr->cpu = -1;
+ kref_get(&zhdr->refcount);
do_compact_page(zhdr, true);
return;
}
+ kref_get(&zhdr->refcount);
queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work);
z3fold_page_unlock(zhdr);
}