summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-09 10:30:07 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-09 10:30:07 -0700
commitfbf4432ff71b7a25bef993a5312906946d27f446 (patch)
treecf3e0024af4b8f9376eff75743f1fa1526e40900 /fs
parentc054be10ffdbd5507a1fd738067d76acfb4808fd (diff)
parent0cfb6aee70bddbef6ec796b255f588ce0e126766 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton: - most of the rest of MM - a small number of misc things - lib/ updates - checkpatch - autofs updates - ipc/ updates * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (126 commits) ipc: optimize semget/shmget/msgget for lots of keys ipc/sem: play nicer with large nsops allocations ipc/sem: drop sem_checkid helper ipc: convert kern_ipc_perm.refcount from atomic_t to refcount_t ipc: convert sem_undo_list.refcnt from atomic_t to refcount_t ipc: convert ipc_namespace.count from atomic_t to refcount_t kcov: support compat processes sh: defconfig: cleanup from old Kconfig options mn10300: defconfig: cleanup from old Kconfig options m32r: defconfig: cleanup from old Kconfig options drivers/pps: use surrounding "if PPS" to remove numerous dependency checks drivers/pps: aesthetic tweaks to PPS-related content cpumask: make cpumask_next() out-of-line kmod: move #ifdef CONFIG_MODULES wrapper to Makefile kmod: split off umh headers into its own file MAINTAINERS: clarify kmod is just a kernel module loader kmod: split out umh code into its own file test_kmod: flip INT checks to be consistent test_kmod: remove paranoid UINT_MAX check on uint range processing vfat: deduplicate hex2bin() ...
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c8
-rw-r--r--fs/autofs4/autofs_i.h22
-rw-r--r--fs/autofs4/dev-ioctl.c44
-rw-r--r--fs/binfmt_flat.c8
-rw-r--r--fs/eventpoll.c30
-rw-r--r--fs/f2fs/data.c5
-rw-r--r--fs/fat/namei_vfat.c35
-rw-r--r--fs/hugetlbfs/inode.c11
-rw-r--r--fs/inode.c2
-rw-r--r--fs/namei.c15
-rw-r--r--fs/proc/generic.c34
-rw-r--r--fs/proc/internal.h2
-rw-r--r--fs/proc/proc_net.c2
-rw-r--r--fs/proc/root.c2
-rw-r--r--fs/proc/task_mmu.c81
-rw-r--r--fs/proc/task_nommu.c5
-rw-r--r--fs/ubifs/file.c5
-rw-r--r--fs/userfaultfd.c20
18 files changed, 198 insertions, 133 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 8f0127526299..b5d69f28d8b1 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -373,6 +373,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
pgoff_t idx;
int rc;
+ /*
+ * We cannot support the _NO_COPY case here, because copy needs to
+ * happen under the ctx->completion_lock. That does not work with the
+ * migration workflow of MIGRATE_SYNC_NO_COPY.
+ */
+ if (mode == MIGRATE_SYNC_NO_COPY)
+ return -EINVAL;
+
rc = 0;
/* mapping->private_lock here protects against the kioctx teardown. */
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index beef981aa54f..4737615f0eaa 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -11,10 +11,21 @@
#include <linux/auto_fs4.h>
#include <linux/auto_dev-ioctl.h>
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/string.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/uaccess.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/completion.h>
+#include <asm/current.h>
/* This is the range of ioctl() numbers we claim as ours */
#define AUTOFS_IOC_FIRST AUTOFS_IOC_READY
@@ -24,17 +35,6 @@
#define AUTOFS_DEV_IOCTL_IOC_COUNT \
(AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD - AUTOFS_DEV_IOCTL_VERSION_CMD)
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/time.h>
-#include <linux/string.h>
-#include <linux/wait.h>
-#include <linux/sched.h>
-#include <linux/mount.h>
-#include <linux/namei.h>
-#include <asm/current.h>
-#include <linux/uaccess.h>
-
#ifdef pr_fmt
#undef pr_fmt
#endif
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index dd9f1bebb5a3..b7c816f39404 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -93,17 +93,17 @@ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param)
* at the end of the struct.
*/
static struct autofs_dev_ioctl *
- copy_dev_ioctl(struct autofs_dev_ioctl __user *in)
+copy_dev_ioctl(struct autofs_dev_ioctl __user *in)
{
struct autofs_dev_ioctl tmp, *res;
- if (copy_from_user(&tmp, in, sizeof(tmp)))
+ if (copy_from_user(&tmp, in, AUTOFS_DEV_IOCTL_SIZE))
return ERR_PTR(-EFAULT);
- if (tmp.size < sizeof(tmp))
+ if (tmp.size < AUTOFS_DEV_IOCTL_SIZE)
return ERR_PTR(-EINVAL);
- if (tmp.size > (PATH_MAX + sizeof(tmp)))
+ if (tmp.size > AUTOFS_DEV_IOCTL_SIZE + PATH_MAX)
return ERR_PTR(-ENAMETOOLONG);
res = memdup_user(in, tmp.size);
@@ -133,8 +133,8 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
goto out;
}
- if (param->size > sizeof(*param)) {
- err = invalid_str(param->path, param->size - sizeof(*param));
+ if (param->size > AUTOFS_DEV_IOCTL_SIZE) {
+ err = invalid_str(param->path, param->size - AUTOFS_DEV_IOCTL_SIZE);
if (err) {
pr_warn(
"path string terminator missing for cmd(0x%08x)\n",
@@ -258,11 +258,6 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid)
if (err)
goto out;
- /*
- * Find autofs super block that has the device number
- * corresponding to the autofs fs we want to open.
- */
-
filp = dentry_open(&path, O_RDONLY, current_cred());
path_put(&path);
if (IS_ERR(filp)) {
@@ -451,7 +446,7 @@ static int autofs_dev_ioctl_requester(struct file *fp,
dev_t devid;
int err = -ENOENT;
- if (param->size <= sizeof(*param)) {
+ if (param->size <= AUTOFS_DEV_IOCTL_SIZE) {
err = -EINVAL;
goto out;
}
@@ -539,7 +534,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
unsigned int devid, magic;
int err = -ENOENT;
- if (param->size <= sizeof(*param)) {
+ if (param->size <= AUTOFS_DEV_IOCTL_SIZE) {
err = -EINVAL;
goto out;
}
@@ -628,10 +623,6 @@ static int _autofs_dev_ioctl(unsigned int command,
ioctl_fn fn = NULL;
int err = 0;
- /* only root can play with this */
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
cmd_first = _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST);
cmd = _IOC_NR(command);
@@ -640,6 +631,14 @@ static int _autofs_dev_ioctl(unsigned int command,
return -ENOTTY;
}
+ /* Only root can use ioctls other than AUTOFS_DEV_IOCTL_VERSION_CMD
+ * and AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD
+ */
+ if (cmd != AUTOFS_DEV_IOCTL_VERSION_CMD &&
+ cmd != AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD &&
+ !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
/* Copy the parameters into kernel space. */
param = copy_dev_ioctl(user);
if (IS_ERR(param))
@@ -706,7 +705,8 @@ out:
return err;
}
-static long autofs_dev_ioctl(struct file *file, uint command, ulong u)
+static long autofs_dev_ioctl(struct file *file, unsigned int command,
+ unsigned long u)
{
int err;
@@ -715,9 +715,10 @@ static long autofs_dev_ioctl(struct file *file, uint command, ulong u)
}
#ifdef CONFIG_COMPAT
-static long autofs_dev_ioctl_compat(struct file *file, uint command, ulong u)
+static long autofs_dev_ioctl_compat(struct file *file, unsigned int command,
+ unsigned long u)
{
- return (long) autofs_dev_ioctl(file, command, (ulong) compat_ptr(u));
+ return autofs_dev_ioctl(file, command, (unsigned long) compat_ptr(u));
}
#else
#define autofs_dev_ioctl_compat NULL
@@ -733,7 +734,8 @@ static const struct file_operations _dev_ioctl_fops = {
static struct miscdevice _autofs_dev_ioctl_misc = {
.minor = AUTOFS_MINOR,
.name = AUTOFS_DEVICE_NAME,
- .fops = &_dev_ioctl_fops
+ .fops = &_dev_ioctl_fops,
+ .mode = 0644,
};
MODULE_ALIAS_MISCDEV(AUTOFS_MINOR);
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 604a176df0c2..ce6537c50ec1 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -192,13 +192,11 @@ static int decompress_exec(
memset(&strm, 0, sizeof(strm));
strm.workspace = kmalloc(zlib_inflate_workspacesize(), GFP_KERNEL);
- if (strm.workspace == NULL) {
- pr_debug("no memory for decompress workspace\n");
+ if (!strm.workspace)
return -ENOMEM;
- }
+
buf = kmalloc(LBUFSIZE, GFP_KERNEL);
- if (buf == NULL) {
- pr_debug("no memory for read buffer\n");
+ if (!buf) {
retval = -ENOMEM;
goto out_free;
}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index adbe328b957c..2fabd19cdeea 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -205,7 +205,7 @@ struct eventpoll {
struct list_head rdllist;
/* RB tree root used to store monitored fd structs */
- struct rb_root rbr;
+ struct rb_root_cached rbr;
/*
* This is a single linked list that chains all the "struct epitem" that
@@ -796,7 +796,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
list_del_rcu(&epi->fllink);
spin_unlock(&file->f_lock);
- rb_erase(&epi->rbn, &ep->rbr);
+ rb_erase_cached(&epi->rbn, &ep->rbr);
spin_lock_irqsave(&ep->lock, flags);
if (ep_is_linked(&epi->rdllink))
@@ -840,7 +840,7 @@ static void ep_free(struct eventpoll *ep)
/*
* Walks through the whole tree by unregistering poll callbacks.
*/
- for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+ for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn);
ep_unregister_pollwait(ep, epi);
@@ -856,7 +856,7 @@ static void ep_free(struct eventpoll *ep)
* a lockdep warning.
*/
mutex_lock(&ep->mtx);
- while ((rbp = rb_first(&ep->rbr)) != NULL) {
+ while ((rbp = rb_first_cached(&ep->rbr)) != NULL) {
epi = rb_entry(rbp, struct epitem, rbn);
ep_remove(ep, epi);
cond_resched();
@@ -963,7 +963,7 @@ static void ep_show_fdinfo(struct seq_file *m, struct file *f)
struct rb_node *rbp;
mutex_lock(&ep->mtx);
- for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+ for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
struct epitem *epi = rb_entry(rbp, struct epitem, rbn);
struct inode *inode = file_inode(epi->ffd.file);
@@ -1040,7 +1040,7 @@ static int ep_alloc(struct eventpoll **pep)
init_waitqueue_head(&ep->wq);
init_waitqueue_head(&ep->poll_wait);
INIT_LIST_HEAD(&ep->rdllist);
- ep->rbr = RB_ROOT;
+ ep->rbr = RB_ROOT_CACHED;
ep->ovflist = EP_UNACTIVE_PTR;
ep->user = user;
@@ -1066,7 +1066,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
struct epoll_filefd ffd;
ep_set_ffd(&ffd, file, fd);
- for (rbp = ep->rbr.rb_node; rbp; ) {
+ for (rbp = ep->rbr.rb_root.rb_node; rbp; ) {
epi = rb_entry(rbp, struct epitem, rbn);
kcmp = ep_cmp_ffd(&ffd, &epi->ffd);
if (kcmp > 0)
@@ -1088,7 +1088,7 @@ static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long t
struct rb_node *rbp;
struct epitem *epi;
- for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+ for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn);
if (epi->ffd.fd == tfd) {
if (toff == 0)
@@ -1273,20 +1273,22 @@ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
{
int kcmp;
- struct rb_node **p = &ep->rbr.rb_node, *parent = NULL;
+ struct rb_node **p = &ep->rbr.rb_root.rb_node, *parent = NULL;
struct epitem *epic;
+ bool leftmost = true;
while (*p) {
parent = *p;
epic = rb_entry(parent, struct epitem, rbn);
kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd);
- if (kcmp > 0)
+ if (kcmp > 0) {
p = &parent->rb_right;
- else
+ leftmost = false;
+ } else
p = &parent->rb_left;
}
rb_link_node(&epi->rbn, parent, p);
- rb_insert_color(&epi->rbn, &ep->rbr);
+ rb_insert_color_cached(&epi->rbn, &ep->rbr, leftmost);
}
@@ -1530,7 +1532,7 @@ error_remove_epi:
list_del_rcu(&epi->fllink);
spin_unlock(&tfile->f_lock);
- rb_erase(&epi->rbn, &ep->rbr);
+ rb_erase_cached(&epi->rbn, &ep->rbr);
error_unregister:
ep_unregister_pollwait(ep, epi);
@@ -1878,7 +1880,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
mutex_lock_nested(&ep->mtx, call_nests + 1);
ep->visited = 1;
list_add(&ep->visited_list_link, &visited_list);
- for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+ for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn);
if (unlikely(is_file_epoll(epi->ffd.file))) {
ep_tovisit = epi->ffd.file->private_data;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index a791aac4c5af..fb96bb71da00 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2253,7 +2253,10 @@ int f2fs_migrate_page(struct address_space *mapping,
SetPagePrivate(newpage);
set_page_private(newpage, page_private(page));
- migrate_page_copy(newpage, page);
+ if (mode != MIGRATE_SYNC_NO_COPY)
+ migrate_page_copy(newpage, page);
+ else
+ migrate_page_states(newpage, page);
return MIGRATEPAGE_SUCCESS;
}
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 6a7152d0c250..02c066663a3a 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -19,6 +19,8 @@
#include <linux/ctype.h>
#include <linux/slab.h>
#include <linux/namei.h>
+#include <linux/kernel.h>
+
#include "fat.h"
static inline unsigned long vfat_d_version(struct dentry *dentry)
@@ -510,10 +512,8 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
struct nls_table *nls)
{
const unsigned char *ip;
- unsigned char nc;
unsigned char *op;
- unsigned int ec;
- int i, k, fill;
+ int i, fill;
int charlen;
if (utf8) {
@@ -530,33 +530,22 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
i < len && *outlen < FAT_LFN_LEN;
*outlen += 1) {
if (escape && (*ip == ':')) {
+ u8 uc[2];
+
if (i > len - 5)
return -EINVAL;
- ec = 0;
- for (k = 1; k < 5; k++) {
- nc = ip[k];
- ec <<= 4;
- if (nc >= '0' && nc <= '9') {
- ec |= nc - '0';
- continue;
- }
- if (nc >= 'a' && nc <= 'f') {
- ec |= nc - ('a' - 10);
- continue;
- }
- if (nc >= 'A' && nc <= 'F') {
- ec |= nc - ('A' - 10);
- continue;
- }
+
+ if (hex2bin(uc, ip + 1, 2) < 0)
return -EINVAL;
- }
- *op++ = ec & 0xFF;
- *op++ = ec >> 8;
+
+ *(wchar_t *)op = uc[0] << 8 | uc[1];
+
+ op += 2;
ip += 5;
i += 5;
} else {
charlen = nls->char2uni(ip, len - i,
- (wchar_t *)op);
+ (wchar_t *)op);
if (charlen < 0)
return -EINVAL;
ip += charlen;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 7c02b3f738e1..59073e9f01a4 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -334,7 +334,7 @@ static void remove_huge_page(struct page *page)
}
static void
-hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end)
+hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
{
struct vm_area_struct *vma;
@@ -498,7 +498,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
i_size_write(inode, offset);
i_mmap_lock_write(mapping);
- if (!RB_EMPTY_ROOT(&mapping->i_mmap))
+ if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
i_mmap_unlock_write(mapping);
remove_inode_hugepages(inode, offset, LLONG_MAX);
@@ -523,7 +523,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
inode_lock(inode);
i_mmap_lock_write(mapping);
- if (!RB_EMPTY_ROOT(&mapping->i_mmap))
+ if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
hugetlb_vmdelete_list(&mapping->i_mmap,
hole_start >> PAGE_SHIFT,
hole_end >> PAGE_SHIFT);
@@ -830,7 +830,10 @@ static int hugetlbfs_migrate_page(struct address_space *mapping,
rc = migrate_huge_page_move_mapping(mapping, newpage, page);
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
- migrate_page_copy(newpage, page);
+ if (mode != MIGRATE_SYNC_NO_COPY)
+ migrate_page_copy(newpage, page);
+ else
+ migrate_page_states(newpage, page);
return MIGRATEPAGE_SUCCESS;
}
diff --git a/fs/inode.c b/fs/inode.c
index 6a1626e0edaf..210054157a49 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -353,7 +353,7 @@ void address_space_init_once(struct address_space *mapping)
init_rwsem(&mapping->i_mmap_rwsem);
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);
- mapping->i_mmap = RB_ROOT;
+ mapping->i_mmap = RB_ROOT_CACHED;
}
EXPORT_SYMBOL(address_space_init_once);
diff --git a/fs/namei.c b/fs/namei.c
index ddb6a7c2b3d4..1180f9c58093 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1129,9 +1129,18 @@ static int follow_automount(struct path *path, struct nameidata *nd,
* of the daemon to instantiate them before they can be used.
*/
if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
- LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
- path->dentry->d_inode)
- return -EISDIR;
+ LOOKUP_OPEN | LOOKUP_CREATE |
+ LOOKUP_AUTOMOUNT))) {
+ /* Positive dentry that isn't meant to trigger an
+ * automount, EISDIR will allow it to be used,
+ * otherwise there's no mount here "now" so return
+ * ENOENT.
+ */
+ if (path->dentry->d_inode)
+ return -EISDIR;
+ else
+ return -ENOENT;
+ }
if (path->dentry->d_sb->s_user_ns != &init_user_ns)
return -EACCES;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index e3cda0b5968f..793a67574668 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -40,8 +40,8 @@ static int proc_match(unsigned int len, const char *name, struct proc_dir_entry
static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir)
{
- return rb_entry_safe(rb_first(&dir->subdir), struct proc_dir_entry,
- subdir_node);
+ return rb_entry_safe(rb_first_cached(&dir->subdir),
+ struct proc_dir_entry, subdir_node);
}
static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir)
@@ -54,7 +54,7 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
const char *name,
unsigned int len)
{
- struct rb_node *node = dir->subdir.rb_node;
+ struct rb_node *node = dir->subdir.rb_root.rb_node;
while (node) {
struct proc_dir_entry *de = rb_entry(node,
@@ -75,8 +75,9 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
static bool pde_subdir_insert(struct proc_dir_entry *dir,
struct proc_dir_entry *de)
{
- struct rb_root *root = &dir->subdir;
- struct rb_node **new = &root->rb_node, *parent = NULL;
+ struct rb_root_cached *root = &dir->subdir;
+ struct rb_node **new = &root->rb_root.rb_node, *parent = NULL;
+ bool leftmost = true;
/* Figure out where to put new node */
while (*new) {
@@ -88,15 +89,16 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir,
parent = *new;
if (result < 0)
new = &(*new)->rb_left;
- else if (result > 0)
+ else if (result > 0) {
new = &(*new)->rb_right;
- else
+ leftmost = false;
+ } else
return false;
}
/* Add new node and rebalance tree. */
rb_link_node(&de->subdir_node, parent, new);
- rb_insert_color(&de->subdir_node, root);
+ rb_insert_color_cached(&de->subdir_node, root, leftmost);
return true;
}
@@ -369,7 +371,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
ent->namelen = qstr.len;
ent->mode = mode;
ent->nlink = nlink;
- ent->subdir = RB_ROOT;
+ ent->subdir = RB_ROOT_CACHED;
atomic_set(&ent->count, 1);
spin_lock_init(&ent->pde_unload_lock);
INIT_LIST_HEAD(&ent->pde_openers);
@@ -499,6 +501,14 @@ out:
}
EXPORT_SYMBOL(proc_create_data);
+struct proc_dir_entry *proc_create(const char *name, umode_t mode,
+ struct proc_dir_entry *parent,
+ const struct file_operations *proc_fops)
+{
+ return proc_create_data(name, mode, parent, proc_fops, NULL);
+}
+EXPORT_SYMBOL(proc_create);
+
void proc_set_size(struct proc_dir_entry *de, loff_t size)
{
de->size = size;
@@ -545,7 +555,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
de = pde_subdir_find(parent, fn, len);
if (de)
- rb_erase(&de->subdir_node, &parent->subdir);
+ rb_erase_cached(&de->subdir_node, &parent->subdir);
write_unlock(&proc_subdir_lock);
if (!de) {
WARN(1, "name '%s'\n", name);
@@ -582,13 +592,13 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
write_unlock(&proc_subdir_lock);
return -ENOENT;
}
- rb_erase(&root->subdir_node, &parent->subdir);
+ rb_erase_cached(&root->subdir_node, &parent->subdir);
de = root;
while (1) {
next = pde_subdir_first(de);
if (next) {
- rb_erase(&next->subdir_node, &de->subdir);
+ rb_erase_cached(&next->subdir_node, &de->subdir);
de = next;
continue;
}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 2cbfcd32e884..a34195e92b20 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -40,7 +40,7 @@ struct proc_dir_entry {
const struct inode_operations *proc_iops;
const struct file_operations *proc_fops;
struct proc_dir_entry *parent;
- struct rb_root subdir;
+ struct rb_root_cached subdir;
struct rb_node subdir_node;
void *data;
atomic_t count; /* use count */
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index d72fc40241d9..a2bf369c923d 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -196,7 +196,7 @@ static __net_init int proc_net_ns_init(struct net *net)
if (!netd)
goto out;
- netd->subdir = RB_ROOT;
+ netd->subdir = RB_ROOT_CACHED;
netd->data = net;
netd->nlink = 2;
netd->namelen = 3;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index deecb397daa3..926fb27f4ca2 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -210,7 +210,7 @@ struct proc_dir_entry proc_root = {
.proc_iops = &proc_root_inode_operations,
.proc_fops = &proc_root_operations,
.parent = &proc_root,
- .subdir = RB_ROOT,
+ .subdir = RB_ROOT_CACHED,
.name = "/proc",
};
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index a290966f91ec..7b40e11ede9b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -268,8 +268,7 @@ static int do_maps_open(struct inode *inode, struct file *file,
* Indicate if the VMA is a stack for the given task; for
* /proc/PID/maps that is the stack of the main task.
*/
-static int is_stack(struct proc_maps_private *priv,
- struct vm_area_struct *vma)
+static int is_stack(struct vm_area_struct *vma)
{
/*
* We make no effort to guess what a given thread considers to be
@@ -302,7 +301,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
{
struct mm_struct *mm = vma->vm_mm;
struct file *file = vma->vm_file;
- struct proc_maps_private *priv = m->private;
vm_flags_t flags = vma->vm_flags;
unsigned long ino = 0;
unsigned long long pgoff = 0;
@@ -350,7 +348,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
goto done;
}
- if (is_stack(priv, vma))
+ if (is_stack(vma))
name = "[stack]";
}
@@ -549,6 +547,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
}
} else if (is_migration_entry(swpent))
page = migration_entry_to_page(swpent);
+ else if (is_device_private_entry(swpent))
+ page = device_private_entry_to_page(swpent);
} else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap
&& pte_none(*pte))) {
page = find_get_entry(vma->vm_file->f_mapping,
@@ -608,13 +608,14 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
- smaps_pmd_entry(pmd, addr, walk);
+ if (pmd_present(*pmd))
+ smaps_pmd_entry(pmd, addr, walk);
spin_unlock(ptl);
- return 0;
+ goto out;
}
if (pmd_trans_unstable(pmd))
- return 0;
+ goto out;
/*
* The mmap_sem held all the way back in m_start() is what
* keeps khugepaged out of here and from collapsing things
@@ -624,6 +625,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
for (; addr != end; pte++, addr += PAGE_SIZE)
smaps_pte_entry(pte, addr, walk);
pte_unmap_unlock(pte - 1, ptl);
+out:
cond_resched();
return 0;
}
@@ -712,6 +714,8 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
if (is_migration_entry(swpent))
page = migration_entry_to_page(swpent);
+ else if (is_device_private_entry(swpent))
+ page = device_private_entry_to_page(swpent);
}
if (page) {
int mapcount = page_mapcount(page);
@@ -977,17 +981,22 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
{
pmd_t pmd = *pmdp;
- /* See comment in change_huge_pmd() */
- pmdp_invalidate(vma, addr, pmdp);
- if (pmd_dirty(*pmdp))
- pmd = pmd_mkdirty(pmd);
- if (pmd_young(*pmdp))
- pmd = pmd_mkyoung(pmd);
-
- pmd = pmd_wrprotect(pmd);
- pmd = pmd_clear_soft_dirty(pmd);
-
- set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
+ if (pmd_present(pmd)) {
+ /* See comment in change_huge_pmd() */
+ pmdp_invalidate(vma, addr, pmdp);
+ if (pmd_dirty(*pmdp))
+ pmd = pmd_mkdirty(pmd);
+ if (pmd_young(*pmdp))
+ pmd = pmd_mkyoung(pmd);
+
+ pmd = pmd_wrprotect(pmd);
+ pmd = pmd_clear_soft_dirty(pmd);
+
+ set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
+ } else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
+ pmd = pmd_swp_clear_soft_dirty(pmd);
+ set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
+ }
}
#else
static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
@@ -1012,6 +1021,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
goto out;
}
+ if (!pmd_present(*pmd))
+ goto out;
+
page = pmd_page(*pmd);
/* Clear accessed and referenced bits. */
@@ -1254,7 +1266,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
if (pm->show_pfn)
frame = pte_pfn(pte);
flags |= PM_PRESENT;
- page = vm_normal_page(vma, addr, pte);
+ page = _vm_normal_page(vma, addr, pte, true);
if (pte_soft_dirty(pte))
flags |= PM_SOFT_DIRTY;
} else if (is_swap_pte(pte)) {
@@ -1267,6 +1279,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
flags |= PM_SWAP;
if (is_migration_entry(entry))
page = migration_entry_to_page(entry);
+
+ if (is_device_private_entry(entry))
+ page = device_private_entry_to_page(entry);
}
if (page && !PageAnon(page))
@@ -1293,27 +1308,33 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
if (ptl) {
u64 flags = 0, frame = 0;
pmd_t pmd = *pmdp;
+ struct page *page = NULL;
if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd))
flags |= PM_SOFT_DIRTY;
- /*
- * Currently pmd for thp is always present because thp
- * can not be swapped-out, migrated, or HWPOISONed
- * (split in such cases instead.)
- * This if-check is just to prepare for future implementation.
- */
if (pmd_present(pmd)) {
- struct page *page = pmd_page(pmd);
-
- if (page_mapcount(page) == 1)
- flags |= PM_MMAP_EXCLUSIVE;
+ page = pmd_page(pmd);
flags |= PM_PRESENT;
if (pm->show_pfn)
frame = pmd_pfn(pmd) +
((addr & ~PMD_MASK) >> PAGE_SHIFT);
}
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+ else if (is_swap_pmd(pmd)) {
+ swp_entry_t entry = pmd_to_swp_entry(pmd);
+
+ frame = swp_type(entry) |
+ (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
+ flags |= PM_SWAP;
+ VM_BUG_ON(!is_pmd_migration_entry(pmd));
+ page = migration_entry_to_page(entry);
+ }
+#endif
+
+ if (page && page_mapcount(page) == 1)
+ flags |= PM_MMAP_EXCLUSIVE;
for (; addr != end; addr += PAGE_SIZE) {
pagemap_entry_t pme = make_pme(frame, flags);
@@ -1746,7 +1767,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
seq_file_path(m, file, "\n\t= ");
} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
seq_puts(m, " heap");
- } else if (is_stack(proc_priv, vma)) {
+ } else if (is_stack(vma)) {
seq_puts(m, " stack");
}
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 23266694db11..dea90b566a6e 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -125,8 +125,7 @@ unsigned long task_statm(struct mm_struct *mm,
return size;
}
-static int is_stack(struct proc_maps_private *priv,
- struct vm_area_struct *vma)
+static int is_stack(struct vm_area_struct *vma)
{
struct mm_struct *mm = vma->vm_mm;
@@ -178,7 +177,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
if (file) {
seq_pad(m, ' ');
seq_file_path(m, file, "");
- } else if (mm && is_stack(priv, vma)) {
+ } else if (mm && is_stack(vma)) {
seq_pad(m, ' ');
seq_printf(m, "[stack]");
}
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index f90a466ea5db..a02aa59d1e24 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1490,7 +1490,10 @@ static int ubifs_migrate_page(struct address_space *mapping,
SetPagePrivate(newpage);
}
- migrate_page_copy(newpage, page);
+ if (mode != MIGRATE_SYNC_NO_COPY)
+ migrate_page_copy(newpage, page);
+ else
+ migrate_page_states(newpage, page);
return MIGRATEPAGE_SUCCESS;
}
#endif
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 5419e7da82ba..ef4b48d1ea42 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -381,8 +381,26 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
* in __get_user_pages if userfaultfd_release waits on the
* caller of handle_userfault to release the mmap_sem.
*/
- if (unlikely(ACCESS_ONCE(ctx->released)))
+ if (unlikely(ACCESS_ONCE(ctx->released))) {
+ /*
+ * Don't return VM_FAULT_SIGBUS in this case, so a non
+ * cooperative manager can close the uffd after the
+ * last UFFDIO_COPY, without risking to trigger an
+ * involuntary SIGBUS if the process was starting the
+ * userfaultfd while the userfaultfd was still armed
+ * (but after the last UFFDIO_COPY). If the uffd
+ * wasn't already closed when the userfault reached
+ * this point, that would normally be solved by
+ * userfaultfd_must_wait returning 'false'.
+ *
+ * If we were to return VM_FAULT_SIGBUS here, the non
+ * cooperative manager would be instead forced to
+ * always call UFFDIO_UNREGISTER before it can safely
+ * close the uffd.
+ */
+ ret = VM_FAULT_NOPAGE;
goto out;
+ }
/*
* Check that we can return VM_FAULT_RETRY.