diff options
Diffstat (limited to 'fs/notify/inotify/inotify_user.c')
| -rw-r--r-- | fs/notify/inotify/inotify_user.c | 254 |
1 files changed, 159 insertions, 95 deletions
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 7cc7d3fb1862..b372fb2c56bd 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * fs/inotify_user.c - inotify support for userspace * @@ -10,16 +11,6 @@ * * Copyright (C) 2009 Eric Paris <Red Hat Inc> * inotify was largely rewriten to make use of the fsnotify infrastructure - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #include <linux/file.h> @@ -38,39 +29,53 @@ #include <linux/uaccess.h> #include <linux/poll.h> #include <linux/wait.h> +#include <linux/memcontrol.h> +#include <linux/security.h> #include "inotify.h" #include "../fdinfo.h" #include <asm/ioctls.h> +/* + * An inotify watch requires allocating an inotify_inode_mark structure as + * well as pinning the watched inode. Doubling the size of a VFS inode + * should be more than enough to cover the additional filesystem inode + * size increase. + */ +#define INOTIFY_WATCH_COST (sizeof(struct inotify_inode_mark) + \ + 2 * sizeof(struct inode)) + /* configurable via /proc/sys/fs/inotify/ */ static int inotify_max_queued_events __read_mostly; -struct kmem_cache *inotify_inode_mark_cachep __read_mostly; +struct kmem_cache *inotify_inode_mark_cachep __ro_after_init; #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> -static int zero; +static long it_zero = 0; +static long it_int_max = INT_MAX; -struct ctl_table inotify_table[] = { +static const struct ctl_table inotify_table[] = { { .procname = "max_user_instances", .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES], - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &it_zero, + .extra2 = &it_int_max, }, { .procname = "max_user_watches", .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES], - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &it_zero, + .extra2 = &it_int_max, }, { .procname = "max_queued_events", @@ -78,44 +83,68 @@ struct ctl_table inotify_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero + .extra1 = SYSCTL_ZERO }, - { } }; + +static void __init inotify_sysctls_init(void) +{ + register_sysctl("fs/inotify", inotify_table); +} + +#else +#define inotify_sysctls_init() do { } while (0) #endif /* CONFIG_SYSCTL */ -static inline __u32 inotify_arg_to_mask(u32 arg) +static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg) { __u32 mask; /* - * everything should accept their own ignored, cares about children, - * and should receive events when the inode is unmounted + * Everything should receive events when the inode is unmounted. + * All directories care about children. */ - mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD | FS_UNMOUNT); + mask = (FS_UNMOUNT); + if (S_ISDIR(inode->i_mode)) + mask |= FS_EVENT_ON_CHILD; /* mask off the flags used to open the fd */ - mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK)); + mask |= (arg & INOTIFY_USER_MASK); return mask; } +#define INOTIFY_MARK_FLAGS \ + (FSNOTIFY_MARK_FLAG_EXCL_UNLINK | FSNOTIFY_MARK_FLAG_IN_ONESHOT) + +static inline unsigned int inotify_arg_to_flags(u32 arg) +{ + unsigned int flags = 0; + + if (arg & IN_EXCL_UNLINK) + flags |= FSNOTIFY_MARK_FLAG_EXCL_UNLINK; + if (arg & IN_ONESHOT) + flags |= FSNOTIFY_MARK_FLAG_IN_ONESHOT; + + return flags; +} + static inline u32 inotify_mask_to_arg(__u32 mask) { return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED | IN_Q_OVERFLOW); } -/* intofiy userspace file descriptor functions */ -static unsigned int inotify_poll(struct file *file, poll_table *wait) +/* inotify userspace file descriptor functions */ +static __poll_t inotify_poll(struct file *file, poll_table *wait) { struct fsnotify_group *group = file->private_data; - int ret = 0; + __poll_t ret = 0; poll_wait(file, &group->notification_waitq, wait); spin_lock(&group->notification_lock); if (!fsnotify_notify_queue_is_empty(group)) - ret = POLLIN | POLLRDNORM; + ret = EPOLLIN | EPOLLRDNORM; spin_unlock(&group->notification_lock); return ret; @@ -144,10 +173,9 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, size_t event_size = sizeof(struct inotify_event); struct fsnotify_event *event; - if (fsnotify_notify_queue_is_empty(group)) - return NULL; - event = fsnotify_peek_first_event(group); + if (!event) + return NULL; pr_debug("%s: group=%p event=%p\n", __func__, group, event); @@ -188,7 +216,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, */ pad_name_len = round_event_name_len(fsn_event); inotify_event.len = pad_name_len; - inotify_event.mask = inotify_mask_to_arg(fsn_event->mask); + inotify_event.mask = inotify_mask_to_arg(event->mask); inotify_event.wd = event->wd; inotify_event.cookie = event->sync_cookie; @@ -307,6 +335,20 @@ static long inotify_ioctl(struct file *file, unsigned int cmd, spin_unlock(&group->notification_lock); ret = put_user(send_len, (int __user *) p); break; +#ifdef CONFIG_CHECKPOINT_RESTORE + case INOTIFY_IOC_SETNEXTWD: + ret = -EINVAL; + if (arg >= 1 && arg <= INT_MAX) { + struct inotify_group_private_data *data; + + data = &group->inotify_data; + spin_lock(&data->idr_lock); + idr_set_cursor(&data->idr, (unsigned int)arg); + spin_unlock(&data->idr_lock); + ret = 0; + } + break; +#endif /* CONFIG_CHECKPOINT_RESTORE */ } return ret; @@ -327,7 +369,8 @@ static const struct file_operations inotify_fops = { /* * find_inode - resolve a user-given path to a specific inode */ -static int inotify_find_inode(const char __user *dirname, struct path *path, unsigned flags) +static int inotify_find_inode(const char __user *dirname, struct path *path, + unsigned int flags, __u64 mask) { int error; @@ -335,9 +378,16 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns if (error) return error; /* you can only watch an inode if you have read permissions on it */ - error = inode_permission(path->dentry->d_inode, MAY_READ); + error = path_permission(path, MAY_READ); + if (error) { + path_put(path); + return error; + } + error = security_path_notify(path, mask, + FSNOTIFY_OBJ_TYPE_INODE); if (error) path_put(path); + return error; } @@ -376,7 +426,7 @@ static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group fsnotify_get_mark(fsn_mark); /* One ref for being in the idr, one ref we just took */ - BUG_ON(atomic_read(&fsn_mark->refcnt) < 2); + BUG_ON(refcount_read(&fsn_mark->refcnt) < 2); } return i_mark; @@ -446,7 +496,7 @@ static void inotify_remove_from_idr(struct fsnotify_group *group, * One ref for being in the idr * one ref grabbed by inotify_idr_find */ - if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 2)) { + if (unlikely(refcount_read(&i_mark->fsn_mark.refcnt) < 2)) { printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n", __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group); /* we can't really recover with bad ref cnting.. */ @@ -473,8 +523,8 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, struct inotify_inode_mark *i_mark; /* Queue ignore event for the watch */ - inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED, - NULL, FSNOTIFY_EVENT_NONE, NULL, 0, NULL); + inotify_handle_inode_event(fsn_mark, FS_IN_IGNORED, NULL, NULL, NULL, + 0); i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); /* remove this mark from the idr */ @@ -490,24 +540,28 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, struct fsnotify_mark *fsn_mark; struct inotify_inode_mark *i_mark; __u32 old_mask, new_mask; - __u32 mask; - int add = (arg & IN_MASK_ADD); + int replace = !(arg & IN_MASK_ADD); + int create = (arg & IN_MASK_CREATE); int ret; - mask = inotify_arg_to_mask(arg); - - fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); + fsn_mark = fsnotify_find_inode_mark(inode, group); if (!fsn_mark) return -ENOENT; + else if (create) { + ret = -EEXIST; + goto out; + } i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); spin_lock(&fsn_mark->lock); old_mask = fsn_mark->mask; - if (add) - fsn_mark->mask |= mask; - else - fsn_mark->mask = mask; + if (replace) { + fsn_mark->mask = 0; + fsn_mark->flags &= ~INOTIFY_MARK_FLAGS; + } + fsn_mark->mask |= inotify_arg_to_mask(inode, arg); + fsn_mark->flags |= inotify_arg_to_flags(arg); new_mask = fsn_mark->mask; spin_unlock(&fsn_mark->lock); @@ -515,7 +569,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, /* more bits in old than in new? */ int dropped = (old_mask & ~new_mask); /* more bits in this fsn_mark than the inode's mask? */ - int do_inode = (new_mask & ~inode->i_fsnotify_mask); + int do_inode = (new_mask & ~READ_ONCE(inode->i_fsnotify_mask)); /* update the inode with this new fsn_mark */ if (dropped || do_inode) @@ -526,6 +580,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, /* return the wd */ ret = i_mark->wd; +out: /* match the get from fsnotify_find_mark() */ fsnotify_put_mark(fsn_mark); @@ -537,19 +592,17 @@ static int inotify_new_watch(struct fsnotify_group *group, u32 arg) { struct inotify_inode_mark *tmp_i_mark; - __u32 mask; int ret; struct idr *idr = &group->inotify_data.idr; spinlock_t *idr_lock = &group->inotify_data.idr_lock; - mask = inotify_arg_to_mask(arg); - tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); if (unlikely(!tmp_i_mark)) return -ENOMEM; fsnotify_init_mark(&tmp_i_mark->fsn_mark, group); - tmp_i_mark->fsn_mark.mask = mask; + tmp_i_mark->fsn_mark.mask = inotify_arg_to_mask(inode, arg); + tmp_i_mark->fsn_mark.flags = inotify_arg_to_flags(arg); tmp_i_mark->wd = -1; ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark); @@ -564,7 +617,7 @@ static int inotify_new_watch(struct fsnotify_group *group, } /* we are on the idr, now get on the inode */ - ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, inode, NULL, 0); + ret = fsnotify_add_inode_mark_locked(&tmp_i_mark->fsn_mark, inode, 0); if (ret) { /* we failed to get on the inode, get off the idr */ inotify_remove_from_idr(group, tmp_i_mark); @@ -586,13 +639,13 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod { int ret = 0; - mutex_lock(&group->mark_mutex); + fsnotify_group_lock(group); /* try to update and existing watch with the new arg */ ret = inotify_update_existing_watch(group, inode, arg); /* no mark present, try to add a new one */ if (ret == -ENOENT) ret = inotify_new_watch(group, inode, arg); - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); return ret; } @@ -602,22 +655,25 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) struct fsnotify_group *group; struct inotify_event_info *oevent; - group = fsnotify_alloc_group(&inotify_fsnotify_ops); + group = fsnotify_alloc_group(&inotify_fsnotify_ops, + FSNOTIFY_GROUP_USER); if (IS_ERR(group)) return group; - oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL); + oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL_ACCOUNT); if (unlikely(!oevent)) { fsnotify_destroy_group(group); return ERR_PTR(-ENOMEM); } group->overflow_event = &oevent->fse; - fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW); + fsnotify_init_event(group->overflow_event); + oevent->mask = FS_Q_OVERFLOW; oevent->wd = -1; oevent->sync_cookie = 0; oevent->name_len = 0; group->max_events = max_events; + group->memcg = get_mem_cgroup_from_mm(current->mm); spin_lock_init(&group->inotify_data.idr_lock); idr_init(&group->inotify_data.idr); @@ -635,7 +691,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) /* inotify syscalls */ -SYSCALL_DEFINE1(inotify_init1, int, flags) +static int do_inotify_init(int flags) { struct fsnotify_group *group; int ret; @@ -660,9 +716,14 @@ SYSCALL_DEFINE1(inotify_init1, int, flags) return ret; } +SYSCALL_DEFINE1(inotify_init1, int, flags) +{ + return do_inotify_init(flags); +} + SYSCALL_DEFINE0(inotify_init) { - return sys_inotify_init1(0); + return do_inotify_init(0); } SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, @@ -671,7 +732,6 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, struct fsnotify_group *group; struct inode *inode; struct path path; - struct fd f; int ret; unsigned flags = 0; @@ -691,34 +751,35 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, if (unlikely(!(mask & ALL_INOTIFY_BITS))) return -EINVAL; - f = fdget(fd); - if (unlikely(!f.file)) + CLASS(fd, f)(fd); + if (fd_empty(f)) return -EBADF; + /* IN_MASK_ADD and IN_MASK_CREATE don't make sense together */ + if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) + return -EINVAL; + /* verify that this is indeed an inotify instance */ - if (unlikely(f.file->f_op != &inotify_fops)) { - ret = -EINVAL; - goto fput_and_out; - } + if (unlikely(fd_file(f)->f_op != &inotify_fops)) + return -EINVAL; if (!(mask & IN_DONT_FOLLOW)) flags |= LOOKUP_FOLLOW; if (mask & IN_ONLYDIR) flags |= LOOKUP_DIRECTORY; - ret = inotify_find_inode(pathname, &path, flags); + ret = inotify_find_inode(pathname, &path, flags, + (mask & IN_ALL_EVENTS)); if (ret) - goto fput_and_out; + return ret; /* inode held in place by reference to path; group by fget on fd */ inode = path.dentry->d_inode; - group = f.file->private_data; + group = fd_file(f)->private_data; /* create/update an inode mark */ ret = inotify_update_watch(group, inode, mask); path_put(&path); -fput_and_out: - fdput(f); return ret; } @@ -726,35 +787,26 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) { struct fsnotify_group *group; struct inotify_inode_mark *i_mark; - struct fd f; - int ret = 0; + CLASS(fd, f)(fd); - f = fdget(fd); - if (unlikely(!f.file)) + if (fd_empty(f)) return -EBADF; /* verify that this is indeed an inotify instance */ - ret = -EINVAL; - if (unlikely(f.file->f_op != &inotify_fops)) - goto out; + if (unlikely(fd_file(f)->f_op != &inotify_fops)) + return -EINVAL; - group = f.file->private_data; + group = fd_file(f)->private_data; - ret = -EINVAL; i_mark = inotify_idr_find(group, wd); if (unlikely(!i_mark)) - goto out; - - ret = 0; + return -EINVAL; fsnotify_destroy_mark(&i_mark->fsn_mark, group); /* match ref taken by inotify_idr_find */ fsnotify_put_mark(&i_mark->fsn_mark); - -out: - fdput(f); - return ret; + return 0; } /* @@ -764,6 +816,18 @@ out: */ static int __init inotify_user_setup(void) { + unsigned long watches_max; + struct sysinfo si; + + si_meminfo(&si); + /* + * Allow up to 1% of addressable memory to be allocated for inotify + * watches (per user) limited to the range [8192, 1048576]. + */ + watches_max = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) / + INOTIFY_WATCH_COST; + watches_max = clamp(watches_max, 8192UL, 1048576UL); + BUILD_BUG_ON(IN_ACCESS != FS_ACCESS); BUILD_BUG_ON(IN_MODIFY != FS_MODIFY); BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB); @@ -779,17 +843,17 @@ static int __init inotify_user_setup(void) BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT); BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW); BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED); - BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK); BUILD_BUG_ON(IN_ISDIR != FS_ISDIR); - BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); - BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21); + BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22); - inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC); + inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, + SLAB_PANIC|SLAB_ACCOUNT); inotify_max_queued_events = 16384; init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128; - init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = 8192; + init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = watches_max; + inotify_sysctls_init(); return 0; } |
