diff options
Diffstat (limited to 'fs/notify/inotify/inotify_user.c')
| -rw-r--r-- | fs/notify/inotify/inotify_user.c | 213 |
1 files changed, 122 insertions, 91 deletions
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 105576daca4a..b372fb2c56bd 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * fs/inotify_user.c - inotify support for userspace * @@ -10,16 +11,6 @@ * * Copyright (C) 2009 Eric Paris <Red Hat Inc> * inotify was largely rewriten to make use of the fsnotify infrastructure - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #include <linux/file.h> @@ -39,39 +30,52 @@ #include <linux/poll.h> #include <linux/wait.h> #include <linux/memcontrol.h> +#include <linux/security.h> #include "inotify.h" #include "../fdinfo.h" #include <asm/ioctls.h> +/* + * An inotify watch requires allocating an inotify_inode_mark structure as + * well as pinning the watched inode. Doubling the size of a VFS inode + * should be more than enough to cover the additional filesystem inode + * size increase. + */ +#define INOTIFY_WATCH_COST (sizeof(struct inotify_inode_mark) + \ + 2 * sizeof(struct inode)) + /* configurable via /proc/sys/fs/inotify/ */ static int inotify_max_queued_events __read_mostly; -struct kmem_cache *inotify_inode_mark_cachep __read_mostly; +struct kmem_cache *inotify_inode_mark_cachep __ro_after_init; #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> -static int zero; +static long it_zero = 0; +static long it_int_max = INT_MAX; -struct ctl_table inotify_table[] = { +static const struct ctl_table inotify_table[] = { { .procname = "max_user_instances", .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES], - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &it_zero, + .extra2 = &it_int_max, }, { .procname = "max_user_watches", .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES], - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &it_zero, + .extra2 = &it_int_max, }, { .procname = "max_queued_events", @@ -79,35 +83,59 @@ struct ctl_table inotify_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero + .extra1 = SYSCTL_ZERO }, - { } }; + +static void __init inotify_sysctls_init(void) +{ + register_sysctl("fs/inotify", inotify_table); +} + +#else +#define inotify_sysctls_init() do { } while (0) #endif /* CONFIG_SYSCTL */ -static inline __u32 inotify_arg_to_mask(u32 arg) +static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg) { __u32 mask; /* - * everything should accept their own ignored, cares about children, - * and should receive events when the inode is unmounted + * Everything should receive events when the inode is unmounted. + * All directories care about children. */ - mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD | FS_UNMOUNT); + mask = (FS_UNMOUNT); + if (S_ISDIR(inode->i_mode)) + mask |= FS_EVENT_ON_CHILD; /* mask off the flags used to open the fd */ - mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK)); + mask |= (arg & INOTIFY_USER_MASK); return mask; } +#define INOTIFY_MARK_FLAGS \ + (FSNOTIFY_MARK_FLAG_EXCL_UNLINK | FSNOTIFY_MARK_FLAG_IN_ONESHOT) + +static inline unsigned int inotify_arg_to_flags(u32 arg) +{ + unsigned int flags = 0; + + if (arg & IN_EXCL_UNLINK) + flags |= FSNOTIFY_MARK_FLAG_EXCL_UNLINK; + if (arg & IN_ONESHOT) + flags |= FSNOTIFY_MARK_FLAG_IN_ONESHOT; + + return flags; +} + static inline u32 inotify_mask_to_arg(__u32 mask) { return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED | IN_Q_OVERFLOW); } -/* intofiy userspace file descriptor functions */ +/* inotify userspace file descriptor functions */ static __poll_t inotify_poll(struct file *file, poll_table *wait) { struct fsnotify_group *group = file->private_data; @@ -145,10 +173,9 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, size_t event_size = sizeof(struct inotify_event); struct fsnotify_event *event; - if (fsnotify_notify_queue_is_empty(group)) - return NULL; - event = fsnotify_peek_first_event(group); + if (!event) + return NULL; pr_debug("%s: group=%p event=%p\n", __func__, group, event); @@ -189,7 +216,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, */ pad_name_len = round_event_name_len(fsn_event); inotify_event.len = pad_name_len; - inotify_event.mask = inotify_mask_to_arg(fsn_event->mask); + inotify_event.mask = inotify_mask_to_arg(event->mask); inotify_event.wd = event->wd; inotify_event.cookie = event->sync_cookie; @@ -342,7 +369,8 @@ static const struct file_operations inotify_fops = { /* * find_inode - resolve a user-given path to a specific inode */ -static int inotify_find_inode(const char __user *dirname, struct path *path, unsigned flags) +static int inotify_find_inode(const char __user *dirname, struct path *path, + unsigned int flags, __u64 mask) { int error; @@ -350,9 +378,16 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns if (error) return error; /* you can only watch an inode if you have read permissions on it */ - error = inode_permission(path->dentry->d_inode, MAY_READ); + error = path_permission(path, MAY_READ); + if (error) { + path_put(path); + return error; + } + error = security_path_notify(path, mask, + FSNOTIFY_OBJ_TYPE_INODE); if (error) path_put(path); + return error; } @@ -486,14 +521,10 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group) { struct inotify_inode_mark *i_mark; - struct fsnotify_iter_info iter_info = { }; - - fsnotify_iter_set_report_type_mark(&iter_info, FSNOTIFY_OBJ_TYPE_INODE, - fsn_mark); /* Queue ignore event for the watch */ - inotify_handle_event(group, NULL, FS_IN_IGNORED, NULL, - FSNOTIFY_EVENT_NONE, NULL, 0, &iter_info); + inotify_handle_inode_event(fsn_mark, FS_IN_IGNORED, NULL, NULL, NULL, + 0); i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); /* remove this mark from the idr */ @@ -509,27 +540,28 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, struct fsnotify_mark *fsn_mark; struct inotify_inode_mark *i_mark; __u32 old_mask, new_mask; - __u32 mask; - int add = (arg & IN_MASK_ADD); + int replace = !(arg & IN_MASK_ADD); int create = (arg & IN_MASK_CREATE); int ret; - mask = inotify_arg_to_mask(arg); - - fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); + fsn_mark = fsnotify_find_inode_mark(inode, group); if (!fsn_mark) return -ENOENT; - else if (create) - return -EEXIST; + else if (create) { + ret = -EEXIST; + goto out; + } i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); spin_lock(&fsn_mark->lock); old_mask = fsn_mark->mask; - if (add) - fsn_mark->mask |= mask; - else - fsn_mark->mask = mask; + if (replace) { + fsn_mark->mask = 0; + fsn_mark->flags &= ~INOTIFY_MARK_FLAGS; + } + fsn_mark->mask |= inotify_arg_to_mask(inode, arg); + fsn_mark->flags |= inotify_arg_to_flags(arg); new_mask = fsn_mark->mask; spin_unlock(&fsn_mark->lock); @@ -537,7 +569,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, /* more bits in old than in new? */ int dropped = (old_mask & ~new_mask); /* more bits in this fsn_mark than the inode's mask? */ - int do_inode = (new_mask & ~inode->i_fsnotify_mask); + int do_inode = (new_mask & ~READ_ONCE(inode->i_fsnotify_mask)); /* update the inode with this new fsn_mark */ if (dropped || do_inode) @@ -548,6 +580,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, /* return the wd */ ret = i_mark->wd; +out: /* match the get from fsnotify_find_mark() */ fsnotify_put_mark(fsn_mark); @@ -559,19 +592,17 @@ static int inotify_new_watch(struct fsnotify_group *group, u32 arg) { struct inotify_inode_mark *tmp_i_mark; - __u32 mask; int ret; struct idr *idr = &group->inotify_data.idr; spinlock_t *idr_lock = &group->inotify_data.idr_lock; - mask = inotify_arg_to_mask(arg); - tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); if (unlikely(!tmp_i_mark)) return -ENOMEM; fsnotify_init_mark(&tmp_i_mark->fsn_mark, group); - tmp_i_mark->fsn_mark.mask = mask; + tmp_i_mark->fsn_mark.mask = inotify_arg_to_mask(inode, arg); + tmp_i_mark->fsn_mark.flags = inotify_arg_to_flags(arg); tmp_i_mark->wd = -1; ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark); @@ -608,13 +639,13 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod { int ret = 0; - mutex_lock(&group->mark_mutex); + fsnotify_group_lock(group); /* try to update and existing watch with the new arg */ ret = inotify_update_existing_watch(group, inode, arg); /* no mark present, try to add a new one */ if (ret == -ENOENT) ret = inotify_new_watch(group, inode, arg); - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); return ret; } @@ -624,17 +655,19 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) struct fsnotify_group *group; struct inotify_event_info *oevent; - group = fsnotify_alloc_group(&inotify_fsnotify_ops); + group = fsnotify_alloc_group(&inotify_fsnotify_ops, + FSNOTIFY_GROUP_USER); if (IS_ERR(group)) return group; - oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL); + oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL_ACCOUNT); if (unlikely(!oevent)) { fsnotify_destroy_group(group); return ERR_PTR(-ENOMEM); } group->overflow_event = &oevent->fse; - fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW); + fsnotify_init_event(group->overflow_event); + oevent->mask = FS_Q_OVERFLOW; oevent->wd = -1; oevent->sync_cookie = 0; oevent->name_len = 0; @@ -699,7 +732,6 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, struct fsnotify_group *group; struct inode *inode; struct path path; - struct fd f; int ret; unsigned flags = 0; @@ -719,8 +751,8 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, if (unlikely(!(mask & ALL_INOTIFY_BITS))) return -EINVAL; - f = fdget(fd); - if (unlikely(!f.file)) + CLASS(fd, f)(fd); + if (fd_empty(f)) return -EBADF; /* IN_MASK_ADD and IN_MASK_CREATE don't make sense together */ @@ -728,29 +760,26 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, return -EINVAL; /* verify that this is indeed an inotify instance */ - if (unlikely(f.file->f_op != &inotify_fops)) { - ret = -EINVAL; - goto fput_and_out; - } + if (unlikely(fd_file(f)->f_op != &inotify_fops)) + return -EINVAL; if (!(mask & IN_DONT_FOLLOW)) flags |= LOOKUP_FOLLOW; if (mask & IN_ONLYDIR) flags |= LOOKUP_DIRECTORY; - ret = inotify_find_inode(pathname, &path, flags); + ret = inotify_find_inode(pathname, &path, flags, + (mask & IN_ALL_EVENTS)); if (ret) - goto fput_and_out; + return ret; /* inode held in place by reference to path; group by fget on fd */ inode = path.dentry->d_inode; - group = f.file->private_data; + group = fd_file(f)->private_data; /* create/update an inode mark */ ret = inotify_update_watch(group, inode, mask); path_put(&path); -fput_and_out: - fdput(f); return ret; } @@ -758,35 +787,26 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) { struct fsnotify_group *group; struct inotify_inode_mark *i_mark; - struct fd f; - int ret = 0; + CLASS(fd, f)(fd); - f = fdget(fd); - if (unlikely(!f.file)) + if (fd_empty(f)) return -EBADF; /* verify that this is indeed an inotify instance */ - ret = -EINVAL; - if (unlikely(f.file->f_op != &inotify_fops)) - goto out; + if (unlikely(fd_file(f)->f_op != &inotify_fops)) + return -EINVAL; - group = f.file->private_data; + group = fd_file(f)->private_data; - ret = -EINVAL; i_mark = inotify_idr_find(group, wd); if (unlikely(!i_mark)) - goto out; - - ret = 0; + return -EINVAL; fsnotify_destroy_mark(&i_mark->fsn_mark, group); /* match ref taken by inotify_idr_find */ fsnotify_put_mark(&i_mark->fsn_mark); - -out: - fdput(f); - return ret; + return 0; } /* @@ -796,6 +816,18 @@ out: */ static int __init inotify_user_setup(void) { + unsigned long watches_max; + struct sysinfo si; + + si_meminfo(&si); + /* + * Allow up to 1% of addressable memory to be allocated for inotify + * watches (per user) limited to the range [8192, 1048576]. + */ + watches_max = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) / + INOTIFY_WATCH_COST; + watches_max = clamp(watches_max, 8192UL, 1048576UL); + BUILD_BUG_ON(IN_ACCESS != FS_ACCESS); BUILD_BUG_ON(IN_MODIFY != FS_MODIFY); BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB); @@ -811,9 +843,7 @@ static int __init inotify_user_setup(void) BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT); BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW); BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED); - BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK); BUILD_BUG_ON(IN_ISDIR != FS_ISDIR); - BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22); @@ -822,7 +852,8 @@ static int __init inotify_user_setup(void) inotify_max_queued_events = 16384; init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128; - init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = 8192; + init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = watches_max; + inotify_sysctls_init(); return 0; } |
