9 files changed, 170 insertions, 17 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 9c4bdddd80c2..03ff7703d322 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -756,3 +756,12 @@ config PERCPU_STATS
 	  This feature collects and exposes statistics via debugfs. The
 	  information includes global and per chunk statistics, which can
 	  be used to help understand percpu memory usage.
+
+config GUP_BENCHMARK
+	bool "Enable infrastructure for get_user_pages_fast() benchmarking"
+	default n
+	help
+	  Provides /sys/kernel/debug/gup_benchmark that helps with testing
+	  performance of get_user_pages_fast().
+
+	  See tools/testing/selftests/vm/gup_benchmark.c
diff --git a/mm/Makefile b/mm/Makefile
index e7ebd176fb93..e669f02c5a54 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -80,6 +80,7 @@ obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
 obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
 obj-$(CONFIG_MEMCG_SWAP) += swap_cgroup.o
 obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
+obj-$(CONFIG_GUP_BENCHMARK) += gup_benchmark.o
 obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
 obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
 obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
diff --git a/mm/compaction.c b/mm/compaction.c
index 85395dc6eb13..10cd757f1006 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -219,6 +219,24 @@ static void reset_cached_positions(struct zone *zone)
 }
 
 /*
+ * Compound pages of >= pageblock_order should consistenly be skipped until
+ * released. It is always pointless to compact pages of such order (if they are
+ * migratable), and the pageblocks they occupy cannot contain any free pages.
+ */
+static bool pageblock_skip_persistent(struct page *page)
+{
+	if (!PageCompound(page))
+		return false;
+
+	page = compound_head(page);
+
+	if (compound_order(page) >= pageblock_order)
+		return true;
+
+	return false;
+}
+
+/*
  * This function is called to clear all cached information on pageblocks that
  * should be skipped for page isolation when the migrate and free page scanner
  * meet.
@@ -242,6 +260,8 @@ static void __reset_isolation_suitable(struct zone *zone)
 			continue;
 		if (zone != page_zone(page))
 			continue;
+		if (pageblock_skip_persistent(page))
+			continue;
 
 		clear_pageblock_skip(page);
 	}
@@ -275,7 +295,7 @@ static void update_pageblock_skip(struct compact_control *cc,
 	struct zone *zone = cc->zone;
 	unsigned long pfn;
 
-	if (cc->ignore_skip_hint)
+	if (cc->no_set_skip_hint)
 		return;
 
 	if (!page)
@@ -307,7 +327,12 @@ static inline bool isolation_suitable(struct compact_control *cc,
 	return true;
 }
 
-static void update_pageblock_skip(struct compact_control *cc,
+static inline bool pageblock_skip_persistent(struct page *page)
+{
+	return false;
+}
+
+static inline void update_pageblock_skip(struct compact_control *cc,
 			struct page *page, unsigned long nr_isolated,
 			bool migrate_scanner)
 {
@@ -449,13 +474,12 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
 		 * and the only danger is skipping too much.
 		 */
 		if (PageCompound(page)) {
-			unsigned int comp_order = compound_order(page);
+			const unsigned int order = compound_order(page);
 
-			if (likely(comp_order < MAX_ORDER)) {
-				blockpfn += (1UL << comp_order) - 1;
-				cursor += (1UL << comp_order) - 1;
+			if (likely(order < MAX_ORDER)) {
+				blockpfn += (1UL << order) - 1;
+				cursor += (1UL << order) - 1;
 			}
-
 			goto isolate_fail;
 		}
 
@@ -772,11 +796,10 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 		 * danger is skipping too much.
 		 */
 		if (PageCompound(page)) {
-			unsigned int comp_order = compound_order(page);
-
-			if (likely(comp_order < MAX_ORDER))
-				low_pfn += (1UL << comp_order) - 1;
+			const unsigned int order = compound_order(page);
 
+			if (likely(order < MAX_ORDER))
+				low_pfn += (1UL << order) - 1;
 			goto isolate_fail;
 		}
 
@@ -1928,9 +1951,8 @@ static void kcompactd_do_work(pg_data_t *pgdat)
 		.total_free_scanned = 0,
 		.classzone_idx = pgdat->kcompactd_classzone_idx,
 		.mode = MIGRATE_SYNC_LIGHT,
-		.ignore_skip_hint = true,
+		.ignore_skip_hint = false,
 		.gfp_mask = GFP_KERNEL,
-
 	};
 	trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order,
 							cc.classzone_idx);
diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c
new file mode 100644
index 000000000000..5c8e2abeaa15
--- /dev/null
+++ b/mm/gup_benchmark.c
@@ -0,0 +1,100 @@
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/ktime.h>
+#include <linux/debugfs.h>
+
+#define GUP_FAST_BENCHMARK	_IOWR('g', 1, struct gup_benchmark)
+
+struct gup_benchmark {
+	__u64 delta_usec;
+	__u64 addr;
+	__u64 size;
+	__u32 nr_pages_per_call;
+	__u32 flags;
+};
+
+static int __gup_benchmark_ioctl(unsigned int cmd,
+		struct gup_benchmark *gup)
+{
+	ktime_t start_time, end_time;
+	unsigned long i, nr, nr_pages, addr, next;
+	struct page **pages;
+
+	nr_pages = gup->size / PAGE_SIZE;
+	pages = kvmalloc(sizeof(void *) * nr_pages, GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	i = 0;
+	nr = gup->nr_pages_per_call;
+	start_time = ktime_get();
+	for (addr = gup->addr; addr < gup->addr + gup->size; addr = next) {
+		if (nr != gup->nr_pages_per_call)
+			break;
+
+		next = addr + nr * PAGE_SIZE;
+		if (next > gup->addr + gup->size) {
+			next = gup->addr + gup->size;
+			nr = (next - addr) / PAGE_SIZE;
+		}
+
+		nr = get_user_pages_fast(addr, nr, gup->flags & 1, pages + i);
+		i += nr;
+	}
+	end_time = ktime_get();
+
+	gup->delta_usec = ktime_us_delta(end_time, start_time);
+	gup->size = addr - gup->addr;
+
+	for (i = 0; i < nr_pages; i++) {
+		if (!pages[i])
+			break;
+		put_page(pages[i]);
+	}
+
+	kvfree(pages);
+	return 0;
+}
+
+static long gup_benchmark_ioctl(struct file *filep, unsigned int cmd,
+		unsigned long arg)
+{
+	struct gup_benchmark gup;
+	int ret;
+
+	if (cmd != GUP_FAST_BENCHMARK)
+		return -EINVAL;
+
+	if (copy_from_user(&gup, (void __user *)arg, sizeof(gup)))
+		return -EFAULT;
+
+	ret = __gup_benchmark_ioctl(cmd, &gup);
+	if (ret)
+		return ret;
+
+	if (copy_to_user((void __user *)arg, &gup, sizeof(gup)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static const struct file_operations gup_benchmark_fops = {
+	.open = nonseekable_open,
+	.unlocked_ioctl = gup_benchmark_ioctl,
+};
+
+static int gup_benchmark_init(void)
+{
+	void *ret;
+
+	ret = debugfs_create_file_unsafe("gup_benchmark", 0600, NULL, NULL,
+			&gup_benchmark_fops);
+	if (!ret)
+		pr_warn("Failed to create gup_benchmark in debugfs");
+
+	return 0;
+}
+
+late_initcall(gup_benchmark_init);
diff --git a/mm/internal.h b/mm/internal.h
index 1df011f62480..e6bd35182dae 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -198,6 +198,7 @@ struct compact_control {
 	const int classzone_idx;	/* zone index of a direct compactor */
 	enum migrate_mode mode;		/* Async or sync migration mode */
 	bool ignore_skip_hint;		/* Scan blocks even if marked skip */
+	bool no_set_skip_hint;		/* Don't mark blocks for skipping */
 	bool ignore_block_suitable;	/* Scan blocks considered unsuitable */
 	bool direct_compaction;		/* False from kcompactd or /proc/... */
 	bool whole_zone;		/* Whole zone should/has been scanned */
diff --git a/mm/mmap.c b/mm/mmap.c
index 680506faceae..924839fac0e6 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1387,9 +1387,24 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 
 	if (file) {
 		struct inode *inode = file_inode(file);
+		unsigned long flags_mask;
+
+		flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags;
 
 		switch (flags & MAP_TYPE) {
 		case MAP_SHARED:
+			/*
+			 * Force use of MAP_SHARED_VALIDATE with non-legacy
+			 * flags. E.g. MAP_SYNC is dangerous to use with
+			 * MAP_SHARED as you don't know which consistency model
+			 * you will get. We silently ignore unsupported flags
+			 * with MAP_SHARED to preserve backward compatibility.
+			 */
+			flags &= LEGACY_MAP_MASK;
+			/* fall through */
+		case MAP_SHARED_VALIDATE:
+			if (flags & ~flags_mask)
+				return -EOPNOTSUPP;
 			if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
 				return -EACCES;
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 55ded92f9809..d4096f4a5c1f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7619,6 +7619,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
 		.zone = page_zone(pfn_to_page(start)),
 		.mode = MIGRATE_SYNC,
 		.ignore_skip_hint = true,
+		.no_set_skip_hint = true,
 		.gfp_mask = current_gfp_context(gfp_mask),
 	};
 	INIT_LIST_HEAD(&cc.migratepages);
diff --git a/mm/shmem.c b/mm/shmem.c
index 1f97d77551c3..4aa9307feab0 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3202,7 +3202,6 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
 	int len;
 	struct inode *inode;
 	struct page *page;
-	struct shmem_inode_info *info;
 
 	len = strlen(symname) + 1;
 	if (len > PAGE_SIZE)
@@ -3222,7 +3221,6 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
 		error = 0;
 	}
 
-	info = SHMEM_I(inode);
 	inode->i_size = len-1;
 	if (len <= SHORT_SYMLINK_LEN) {
 		inode->i_link = kmemdup(symname, len, GFP_KERNEL);
diff --git a/mm/z3fold.c b/mm/z3fold.c
index b2ba2ba585f3..39e19125d6a0 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -404,8 +404,7 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked)
 		WARN_ON(z3fold_page_trylock(zhdr));
 	else
 		z3fold_page_lock(zhdr);
-	if (test_bit(PAGE_STALE, &page->private) ||
-	    !test_and_clear_bit(NEEDS_COMPACTING, &page->private)) {
+	if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) {
 		z3fold_page_unlock(zhdr);
 		return;
 	}
@@ -413,6 +412,11 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked)
 	list_del_init(&zhdr->buddy);
 	spin_unlock(&pool->lock);
 
+	if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
+		atomic64_dec(&pool->pages_nr);
+		return;
+	}
+
 	z3fold_compact_page(zhdr);
 	unbuddied = get_cpu_ptr(pool->unbuddied);
 	fchunks = num_free_chunks(zhdr);
@@ -753,9 +757,11 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		list_del_init(&zhdr->buddy);
 		spin_unlock(&pool->lock);
 		zhdr->cpu = -1;
+		kref_get(&zhdr->refcount);
 		do_compact_page(zhdr, true);
 		return;
 	}
+	kref_get(&zhdr->refcount);
 	queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work);
 	z3fold_page_unlock(zhdr);
 }