diff options
Diffstat (limited to 'fs/notify')
-rw-r--r-- | fs/notify/dnotify/dnotify.c | 20 | ||||
-rw-r--r-- | fs/notify/fanotify/Kconfig | 1 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify.c | 71 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify.h | 42 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify_user.c | 549 | ||||
-rw-r--r-- | fs/notify/fdinfo.c | 29 | ||||
-rw-r--r-- | fs/notify/fsnotify.c | 232 | ||||
-rw-r--r-- | fs/notify/fsnotify.h | 50 | ||||
-rw-r--r-- | fs/notify/group.c | 11 | ||||
-rw-r--r-- | fs/notify/inotify/inotify_fsnotify.c | 2 | ||||
-rw-r--r-- | fs/notify/inotify/inotify_user.c | 50 | ||||
-rw-r--r-- | fs/notify/mark.c | 228 |
12 files changed, 875 insertions, 410 deletions
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 3464fa7e8538..c4cdaf5fa7ed 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -16,12 +16,11 @@ #include <linux/security.h> #include <linux/spinlock.h> #include <linux/slab.h> -#include <linux/fdtable.h> #include <linux/fsnotify_backend.h> static int dir_notify_enable __read_mostly = 1; #ifdef CONFIG_SYSCTL -static struct ctl_table dnotify_sysctls[] = { +static const struct ctl_table dnotify_sysctls[] = { { .procname = "dir-notify-enable", .data = &dir_notify_enable, @@ -110,7 +109,7 @@ static int dnotify_handle_event(struct fsnotify_mark *inode_mark, u32 mask, prev = &dn->dn_next; continue; } - fown = &dn->dn_filp->f_owner; + fown = file_f_owner(dn->dn_filp); send_sigio(fown, dn->dn_fd, POLL_MSG); if (dn->dn_mask & FS_DN_MULTISHOT) prev = &dn->dn_next; @@ -162,7 +161,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) if (!S_ISDIR(inode->i_mode)) return; - fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group); + fsn_mark = fsnotify_find_inode_mark(inode, dnotify_group); if (!fsn_mark) return; dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); @@ -316,6 +315,10 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) goto out_err; } + error = file_f_owner_allocate(filp); + if (error) + goto out_err; + /* set up the new_fsn_mark and new_dn_mark */ new_fsn_mark = &new_dn_mark->fsn_mark; fsnotify_init_mark(new_fsn_mark, dnotify_group); @@ -326,7 +329,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) fsnotify_group_lock(dnotify_group); /* add the new_fsn_mark or find an old one. */ - fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group); + fsn_mark = fsnotify_find_inode_mark(inode, dnotify_group); if (fsn_mark) { dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); spin_lock(&fsn_mark->lock); @@ -343,9 +346,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) new_fsn_mark = NULL; } - rcu_read_lock(); - f = lookup_fdget_rcu(fd); - rcu_read_unlock(); + f = fget_raw(fd); /* if (f != filp) means that we lost a race and another task/thread * actually closed the fd we are still playing with before we grabbed @@ -402,8 +403,7 @@ static int __init dnotify_init(void) SLAB_PANIC|SLAB_ACCOUNT); dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); - dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops, - FSNOTIFY_GROUP_NOFS); + dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops, 0); if (IS_ERR(dnotify_group)) panic("unable to allocate fsnotify group for dnotify\n"); dnotify_sysctl_init(); diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig index a511f9d8677b..0e36aaf379b7 100644 --- a/fs/notify/fanotify/Kconfig +++ b/fs/notify/fanotify/Kconfig @@ -15,7 +15,6 @@ config FANOTIFY config FANOTIFY_ACCESS_PERMISSIONS bool "fanotify permissions checking" depends on FANOTIFY - depends on SECURITY default n help Say Y here is you want fanotify listeners to be able to make permissions diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 224bccaab4cc..3083643b864b 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/fanotify.h> -#include <linux/fdtable.h> #include <linux/fsnotify_backend.h> #include <linux/init.h> #include <linux/jiffies.h> @@ -167,6 +166,8 @@ static bool fanotify_should_merge(struct fanotify_event *old, case FANOTIFY_EVENT_TYPE_FS_ERROR: return fanotify_error_event_equal(FANOTIFY_EE(old), FANOTIFY_EE(new)); + case FANOTIFY_EVENT_TYPE_MNT: + return false; default: WARN_ON_ONCE(1); } @@ -224,7 +225,7 @@ static int fanotify_get_response(struct fsnotify_group *group, struct fanotify_perm_event *event, struct fsnotify_iter_info *iter_info) { - int ret; + int ret, errno; pr_debug("%s: group=%p event=%p\n", __func__, group, event); @@ -263,14 +264,23 @@ static int fanotify_get_response(struct fsnotify_group *group, ret = 0; break; case FAN_DENY: + /* Check custom errno from pre-content events */ + errno = fanotify_get_response_errno(event->response); + if (errno) { + ret = -errno; + break; + } + fallthrough; default: ret = -EPERM; } /* Check if the response should be audited */ - if (event->response & FAN_AUDIT) - audit_fanotify(event->response & ~FAN_AUDIT, - &event->audit_rule); + if (event->response & FAN_AUDIT) { + u32 response = event->response & + (FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS); + audit_fanotify(response & ~FAN_AUDIT, &event->audit_rule); + } pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__, group, event, ret); @@ -304,7 +314,10 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n", __func__, iter_info->report_mask, event_mask, data, data_type); - if (!fid_mode) { + if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) { + if (data_type != FSNOTIFY_EVENT_MNT) + return 0; + } else if (!fid_mode) { /* Do we have path to open a file descriptor? */ if (!path) return 0; @@ -402,7 +415,7 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, { int dwords, type = 0; char *ext_buf = NULL; - void *buf = fh->buf; + void *buf = fh + 1; int err; fh->type = FILEID_ROOT; @@ -549,9 +562,27 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path, return &pevent->fae; } -static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path, +static struct fanotify_event *fanotify_alloc_mnt_event(u64 mnt_id, gfp_t gfp) +{ + struct fanotify_mnt_event *pevent; + + pevent = kmem_cache_alloc(fanotify_mnt_event_cachep, gfp); + if (!pevent) + return NULL; + + pevent->fae.type = FANOTIFY_EVENT_TYPE_MNT; + pevent->mnt_id = mnt_id; + + return &pevent->fae; +} + +static struct fanotify_event *fanotify_alloc_perm_event(const void *data, + int data_type, gfp_t gfp) { + const struct path *path = fsnotify_data_path(data, data_type); + const struct file_range *range = + fsnotify_data_file_range(data, data_type); struct fanotify_perm_event *pevent; pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp); @@ -565,6 +596,9 @@ static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path, pevent->hdr.len = 0; pevent->state = FAN_EVENT_INIT; pevent->path = *path; + /* NULL ppos means no range info */ + pevent->ppos = range ? &range->pos : NULL; + pevent->count = range ? range->count : 0; path_get(path); return &pevent->fae; @@ -716,6 +750,7 @@ static struct fanotify_event *fanotify_alloc_event( fid_mode); struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir); const struct path *path = fsnotify_data_path(data, data_type); + u64 mnt_id = fsnotify_data_mnt_id(data, data_type); struct mem_cgroup *old_memcg; struct dentry *moved = NULL; struct inode *child = NULL; @@ -802,7 +837,7 @@ static struct fanotify_event *fanotify_alloc_event( old_memcg = set_active_memcg(group->memcg); if (fanotify_is_perm_event(mask)) { - event = fanotify_alloc_perm_event(path, gfp); + event = fanotify_alloc_perm_event(data, data_type, gfp); } else if (fanotify_is_error_event(mask)) { event = fanotify_alloc_error_event(group, fsid, data, data_type, &hash); @@ -811,8 +846,12 @@ static struct fanotify_event *fanotify_alloc_event( moved, &hash, gfp); } else if (fid_mode) { event = fanotify_alloc_fid_event(id, fsid, &hash, gfp); - } else { + } else if (path) { event = fanotify_alloc_path_event(path, &hash, gfp); + } else if (mnt_id) { + event = fanotify_alloc_mnt_event(mnt_id, gfp); + } else { + WARN_ON_ONCE(1); } if (!event) @@ -910,8 +949,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM); BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR); BUILD_BUG_ON(FAN_RENAME != FS_RENAME); + BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS); - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21); + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 24); mask = fanotify_group_event_mask(group, iter_info, &match_mask, mask, data, data_type, dir); @@ -969,6 +1009,7 @@ finish: static void fanotify_free_group_priv(struct fsnotify_group *group) { + put_user_ns(group->user_ns); kfree(group->fanotify_data.merge_hash); if (group->fanotify_data.ucounts) dec_ucount(group->fanotify_data.ucounts, @@ -1012,6 +1053,11 @@ static void fanotify_free_error_event(struct fsnotify_group *group, mempool_free(fee, &group->fanotify_data.error_events_pool); } +static void fanotify_free_mnt_event(struct fanotify_event *event) +{ + kmem_cache_free(fanotify_mnt_event_cachep, FANOTIFY_ME(event)); +} + static void fanotify_free_event(struct fsnotify_group *group, struct fsnotify_event *fsn_event) { @@ -1038,6 +1084,9 @@ static void fanotify_free_event(struct fsnotify_group *group, case FANOTIFY_EVENT_TYPE_FS_ERROR: fanotify_free_error_event(group, event); break; + case FANOTIFY_EVENT_TYPE_MNT: + fanotify_free_mnt_event(event); + break; default: WARN_ON_ONCE(1); } diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index e5ab33cae6a7..b78308975082 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -9,6 +9,7 @@ extern struct kmem_cache *fanotify_mark_cache; extern struct kmem_cache *fanotify_fid_event_cachep; extern struct kmem_cache *fanotify_path_event_cachep; extern struct kmem_cache *fanotify_perm_event_cachep; +extern struct kmem_cache *fanotify_mnt_event_cachep; /* Possible states of the permission event */ enum { @@ -24,7 +25,7 @@ enum { * stored in either the first or last 2 dwords. */ #define FANOTIFY_INLINE_FH_LEN (3 << 2) -#define FANOTIFY_FH_HDR_LEN offsetof(struct fanotify_fh, buf) +#define FANOTIFY_FH_HDR_LEN sizeof(struct fanotify_fh) /* Fixed size struct for file handle */ struct fanotify_fh { @@ -33,7 +34,6 @@ struct fanotify_fh { #define FANOTIFY_FH_FLAG_EXT_BUF 1 u8 flags; u8 pad; - unsigned char buf[]; } __aligned(4); /* Variable size struct for dir file handle + child file handle + name */ @@ -91,7 +91,7 @@ static inline char **fanotify_fh_ext_buf_ptr(struct fanotify_fh *fh) BUILD_BUG_ON(FANOTIFY_FH_HDR_LEN % 4); BUILD_BUG_ON(__alignof__(char *) - 4 + sizeof(char *) > FANOTIFY_INLINE_FH_LEN); - return (char **)ALIGN((unsigned long)(fh->buf), __alignof__(char *)); + return (char **)ALIGN((unsigned long)(fh + 1), __alignof__(char *)); } static inline void *fanotify_fh_ext_buf(struct fanotify_fh *fh) @@ -101,7 +101,7 @@ static inline void *fanotify_fh_ext_buf(struct fanotify_fh *fh) static inline void *fanotify_fh_buf(struct fanotify_fh *fh) { - return fanotify_fh_has_ext_buf(fh) ? fanotify_fh_ext_buf(fh) : fh->buf; + return fanotify_fh_has_ext_buf(fh) ? fanotify_fh_ext_buf(fh) : fh + 1; } static inline int fanotify_info_dir_fh_len(struct fanotify_info *info) @@ -244,6 +244,7 @@ enum fanotify_event_type { FANOTIFY_EVENT_TYPE_PATH_PERM, FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */ FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */ + FANOTIFY_EVENT_TYPE_MNT, __FANOTIFY_EVENT_TYPE_NUM }; @@ -276,7 +277,7 @@ static inline void fanotify_init_event(struct fanotify_event *event, #define FANOTIFY_INLINE_FH(name, size) \ struct { \ struct fanotify_fh name; \ - /* Space for object_fh.buf[] - access with fanotify_fh_buf() */ \ + /* Space for filehandle - access with fanotify_fh_buf() */ \ unsigned char _inline_fh_buf[size]; \ } @@ -409,12 +410,23 @@ struct fanotify_path_event { struct path path; }; +struct fanotify_mnt_event { + struct fanotify_event fae; + u64 mnt_id; +}; + static inline struct fanotify_path_event * FANOTIFY_PE(struct fanotify_event *event) { return container_of(event, struct fanotify_path_event, fae); } +static inline struct fanotify_mnt_event * +FANOTIFY_ME(struct fanotify_event *event) +{ + return container_of(event, struct fanotify_mnt_event, fae); +} + /* * Structure for permission fanotify events. It gets allocated and freed in * fanotify_handle_event() since we wait there for user response. When the @@ -425,6 +437,8 @@ FANOTIFY_PE(struct fanotify_event *event) struct fanotify_perm_event { struct fanotify_event fae; struct path path; + const loff_t *ppos; /* optional file range info */ + size_t count; u32 response; /* userspace answer to the event */ unsigned short state; /* state of the event */ int fd; /* fd we passed to userspace for this event */ @@ -446,6 +460,14 @@ static inline bool fanotify_is_perm_event(u32 mask) mask & FANOTIFY_PERM_EVENTS; } +static inline bool fanotify_event_has_access_range(struct fanotify_event *event) +{ + if (!(event->mask & FANOTIFY_PRE_CONTENT_EVENTS)) + return false; + + return FANOTIFY_PERM(event)->ppos; +} + static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse) { return container_of(fse, struct fanotify_event, fse); @@ -456,6 +478,11 @@ static inline bool fanotify_is_error_event(u32 mask) return mask & FAN_FS_ERROR; } +static inline bool fanotify_is_mnt_event(u32 mask) +{ + return mask & (FAN_MNT_ATTACH | FAN_MNT_DETACH); +} + static inline const struct path *fanotify_event_path(struct fanotify_event *event) { if (event->type == FANOTIFY_EVENT_TYPE_PATH) @@ -518,3 +545,8 @@ static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark) return mflags; } + +static inline u32 fanotify_get_response_errno(int res) +{ + return (res >> FAN_ERRNO_SHIFT) & FAN_ERRNO_MASK; +} diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index fbdc63cc10d9..b192ee068a7a 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/fanotify.h> #include <linux/fcntl.h> -#include <linux/fdtable.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/anon_inodes.h> @@ -59,7 +58,7 @@ static int fanotify_max_queued_events __read_mostly; static long ft_zero = 0; static long ft_int_max = INT_MAX; -static struct ctl_table fanotify_table[] = { +static const struct ctl_table fanotify_table[] = { { .procname = "max_user_groups", .data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS], @@ -101,8 +100,7 @@ static void __init fanotify_sysctls_init(void) * * Internal and external open flags are stored together in field f_flags of * struct file. Only external open flags shall be allowed in event_f_flags. - * Internal flags like FMODE_NONOTIFY, FMODE_EXEC, FMODE_NOCMTIME shall be - * excluded. + * Internal flags like FMODE_EXEC shall be excluded. */ #define FANOTIFY_INIT_ALL_EVENT_F_BITS ( \ O_ACCMODE | O_APPEND | O_NONBLOCK | \ @@ -115,14 +113,19 @@ struct kmem_cache *fanotify_mark_cache __ro_after_init; struct kmem_cache *fanotify_fid_event_cachep __ro_after_init; struct kmem_cache *fanotify_path_event_cachep __ro_after_init; struct kmem_cache *fanotify_perm_event_cachep __ro_after_init; +struct kmem_cache *fanotify_mnt_event_cachep __ro_after_init; #define FANOTIFY_EVENT_ALIGN 4 #define FANOTIFY_FID_INFO_HDR_LEN \ (sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle)) -#define FANOTIFY_PIDFD_INFO_HDR_LEN \ +#define FANOTIFY_PIDFD_INFO_LEN \ sizeof(struct fanotify_event_info_pidfd) #define FANOTIFY_ERROR_INFO_LEN \ (sizeof(struct fanotify_event_info_error)) +#define FANOTIFY_RANGE_INFO_LEN \ + (sizeof(struct fanotify_event_info_range)) +#define FANOTIFY_MNT_INFO_LEN \ + (sizeof(struct fanotify_event_info_mnt)) static int fanotify_fid_info_len(int fh_len, int name_len) { @@ -160,9 +163,6 @@ static size_t fanotify_event_len(unsigned int info_mode, int fh_len; int dot_len = 0; - if (!info_mode) - return event_len; - if (fanotify_is_error_event(event->mask)) event_len += FANOTIFY_ERROR_INFO_LEN; @@ -177,13 +177,18 @@ static size_t fanotify_event_len(unsigned int info_mode, dot_len = 1; } - if (info_mode & FAN_REPORT_PIDFD) - event_len += FANOTIFY_PIDFD_INFO_HDR_LEN; - if (fanotify_event_has_object_fh(event)) { fh_len = fanotify_event_object_fh_len(event); event_len += fanotify_fid_info_len(fh_len, dot_len); } + if (fanotify_is_mnt_event(event->mask)) + event_len += FANOTIFY_MNT_INFO_LEN; + + if (info_mode & FAN_REPORT_PIDFD) + event_len += FANOTIFY_PIDFD_INFO_LEN; + + if (fanotify_event_has_access_range(event)) + event_len += FANOTIFY_RANGE_INFO_LEN; return event_len; } @@ -259,20 +264,12 @@ static int create_fd(struct fsnotify_group *group, const struct path *path, return client_fd; /* - * we need a new file handle for the userspace program so it can read even if it was - * originally opened O_WRONLY. + * We provide an fd for the userspace program, so it could access the + * file without generating fanotify events itself. */ - new_file = dentry_open(path, - group->fanotify_data.f_flags | __FMODE_NONOTIFY, - current_cred()); + new_file = dentry_open_nonotify(path, group->fanotify_data.f_flags, + current_cred()); if (IS_ERR(new_file)) { - /* - * we still send an event even if we can't open the file. this - * can happen when say tasks are gone and we try to open their - * /proc files or we try to open a WRONLY file like in sysfs - * we just send the errno to userspace since there isn't much - * else we can do. - */ put_unused_fd(client_fd); client_fd = PTR_ERR(new_file); } else { @@ -335,11 +332,12 @@ static int process_access_response(struct fsnotify_group *group, struct fanotify_perm_event *event; int fd = response_struct->fd; u32 response = response_struct->response; + int errno = fanotify_get_response_errno(response); int ret = info_len; struct fanotify_response_info_audit_rule friar; - pr_debug("%s: group=%p fd=%d response=%u buf=%p size=%zu\n", __func__, - group, fd, response, info, info_len); + pr_debug("%s: group=%p fd=%d response=%x errno=%d buf=%p size=%zu\n", + __func__, group, fd, response, errno, info, info_len); /* * make sure the response is valid, if invalid we do nothing and either * userspace can send a valid response or we will clean it up after the @@ -350,7 +348,31 @@ static int process_access_response(struct fsnotify_group *group, switch (response & FANOTIFY_RESPONSE_ACCESS) { case FAN_ALLOW: + if (errno) + return -EINVAL; + break; case FAN_DENY: + /* Custom errno is supported only for pre-content groups */ + if (errno && group->priority != FSNOTIFY_PRIO_PRE_CONTENT) + return -EINVAL; + + /* + * Limit errno to values expected on open(2)/read(2)/write(2) + * of regular files. + */ + switch (errno) { + case 0: + case EIO: + case EPERM: + case EBUSY: + case ETXTBSY: + case EAGAIN: + case ENOSPC: + case EDQUOT: + break; + default: + return -EINVAL; + } break; default: return -EINVAL; @@ -388,6 +410,25 @@ static int process_access_response(struct fsnotify_group *group, return -ENOENT; } +static size_t copy_mnt_info_to_user(struct fanotify_event *event, + char __user *buf, int count) +{ + struct fanotify_event_info_mnt info = { }; + + info.hdr.info_type = FAN_EVENT_INFO_TYPE_MNT; + info.hdr.len = FANOTIFY_MNT_INFO_LEN; + + if (WARN_ON(count < info.hdr.len)) + return -EFAULT; + + info.mnt_id = FANOTIFY_ME(event)->mnt_id; + + if (copy_to_user(buf, &info, sizeof(info))) + return -EFAULT; + + return info.hdr.len; +} + static size_t copy_error_info_to_user(struct fanotify_event *event, char __user *buf, int count) { @@ -514,7 +555,7 @@ static int copy_pidfd_info_to_user(int pidfd, size_t count) { struct fanotify_event_info_pidfd info = { }; - size_t info_len = FANOTIFY_PIDFD_INFO_HDR_LEN; + size_t info_len = FANOTIFY_PIDFD_INFO_LEN; if (WARN_ON_ONCE(info_len > count)) return -EFAULT; @@ -529,6 +570,30 @@ static int copy_pidfd_info_to_user(int pidfd, return info_len; } +static size_t copy_range_info_to_user(struct fanotify_event *event, + char __user *buf, int count) +{ + struct fanotify_perm_event *pevent = FANOTIFY_PERM(event); + struct fanotify_event_info_range info = { }; + size_t info_len = FANOTIFY_RANGE_INFO_LEN; + + if (WARN_ON_ONCE(info_len > count)) + return -EFAULT; + + if (WARN_ON_ONCE(!pevent->ppos)) + return -EINVAL; + + info.hdr.info_type = FAN_EVENT_INFO_TYPE_RANGE; + info.hdr.len = info_len; + info.offset = *(pevent->ppos); + info.count = pevent->count; + + if (copy_to_user(buf, &info, info_len)) + return -EFAULT; + + return info_len; +} + static int copy_info_records_to_user(struct fanotify_event *event, struct fanotify_info *info, unsigned int info_mode, int pidfd, @@ -650,6 +715,24 @@ static int copy_info_records_to_user(struct fanotify_event *event, total_bytes += ret; } + if (fanotify_event_has_access_range(event)) { + ret = copy_range_info_to_user(event, buf, count); + if (ret < 0) + return ret; + buf += ret; + count -= ret; + total_bytes += ret; + } + + if (fanotify_is_mnt_event(event->mask)) { + ret = copy_mnt_info_to_user(event, buf, count); + if (ret < 0) + return ret; + buf += ret; + count -= ret; + total_bytes += ret; + } + return total_bytes; } @@ -663,7 +746,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES); unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD; struct file *f = NULL, *pidfd_file = NULL; - int ret, pidfd = FAN_NOPIDFD, fd = FAN_NOFD; + int ret, pidfd = -ESRCH, fd = -EBADF; pr_debug("%s: group=%p event=%p\n", __func__, group, event); @@ -691,10 +774,39 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, if (!FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && path && path->mnt && path->dentry) { fd = create_fd(group, path, &f); - if (fd < 0) - return fd; + /* + * Opening an fd from dentry can fail for several reasons. + * For example, when tasks are gone and we try to open their + * /proc files or we try to open a WRONLY file like in sysfs + * or when trying to open a file that was deleted on the + * remote network server. + * + * For a group with FAN_REPORT_FD_ERROR, we will send the + * event with the error instead of the open fd, otherwise + * Userspace may not get the error at all. + * In any case, userspace will not know which file failed to + * open, so add a debug print for further investigation. + */ + if (fd < 0) { + pr_debug("fanotify: create_fd(%pd2) failed err=%d\n", + path->dentry, fd); + if (!FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR)) { + /* + * Historically, we've handled EOPENSTALE in a + * special way and silently dropped such + * events. Now we have to keep it to maintain + * backward compatibility... + */ + if (fd == -EOPENSTALE) + fd = 0; + return fd; + } + } } - metadata.fd = fd; + if (FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR)) + metadata.fd = fd; + else + metadata.fd = fd >= 0 ? fd : FAN_NOFD; if (pidfd_mode) { /* @@ -709,18 +821,16 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, * The PIDTYPE_TGID check for an event->pid is performed * preemptively in an attempt to catch out cases where the event * listener reads events after the event generating process has - * already terminated. Report FAN_NOPIDFD to the event listener - * in those cases, with all other pidfd creation errors being - * reported as FAN_EPIDFD. + * already terminated. Depending on flag FAN_REPORT_FD_ERROR, + * report either -ESRCH or FAN_NOPIDFD to the event listener in + * those cases with all other pidfd creation errors reported as + * the error code itself or as FAN_EPIDFD. */ - if (metadata.pid == 0 || - !pid_has_task(event->pid, PIDTYPE_TGID)) { - pidfd = FAN_NOPIDFD; - } else { + if (metadata.pid && pid_has_task(event->pid, PIDTYPE_TGID)) pidfd = pidfd_prepare(event->pid, 0, &pidfd_file); - if (pidfd < 0) - pidfd = FAN_EPIDFD; - } + + if (!FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR) && pidfd < 0) + pidfd = pidfd == -ESRCH ? FAN_NOPIDFD : FAN_EPIDFD; } ret = -EFAULT; @@ -737,15 +847,10 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, buf += FAN_EVENT_METADATA_LEN; count -= FAN_EVENT_METADATA_LEN; - if (fanotify_is_perm_event(event->mask)) - FANOTIFY_PERM(event)->fd = fd; - - if (info_mode) { - ret = copy_info_records_to_user(event, info, info_mode, pidfd, - buf, count); - if (ret < 0) - goto out_close_fd; - } + ret = copy_info_records_to_user(event, info, info_mode, pidfd, + buf, count); + if (ret < 0) + goto out_close_fd; if (f) fd_install(fd, f); @@ -753,15 +858,18 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, if (pidfd_file) fd_install(pidfd, pidfd_file); + if (fanotify_is_perm_event(event->mask)) + FANOTIFY_PERM(event)->fd = fd; + return metadata.event_len; out_close_fd: - if (fd != FAN_NOFD) { + if (f) { put_unused_fd(fd); fput(f); } - if (pidfd >= 0) { + if (pidfd_file) { put_unused_fd(pidfd); fput(pidfd_file); } @@ -828,15 +936,6 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, } ret = copy_event_to_user(group, event, buf, count); - if (unlikely(ret == -EOPENSTALE)) { - /* - * We cannot report events with stale fd so drop it. - * Setting ret to 0 will continue the event loop and - * do the right thing if there are no more events to - * read (i.e. return bytes read, -EAGAIN or wait). - */ - ret = 0; - } /* * Permission events get queued to wait for response. Other @@ -845,7 +944,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, if (!fanotify_is_perm_event(event->mask)) { fsnotify_destroy_event(group, &event->fse); } else { - if (ret <= 0) { + if (ret <= 0 || FANOTIFY_PERM(event)->fd < 0) { spin_lock(&group->notification_lock); finish_permission_event(group, FANOTIFY_PERM(event), FAN_DENY, NULL); @@ -1003,22 +1102,17 @@ static int fanotify_find_path(int dfd, const char __user *filename, dfd, filename, flags); if (filename == NULL) { - struct fd f = fdget(dfd); + CLASS(fd, f)(dfd); - ret = -EBADF; - if (!f.file) - goto out; + if (fd_empty(f)) + return -EBADF; - ret = -ENOTDIR; if ((flags & FAN_MARK_ONLYDIR) && - !(S_ISDIR(file_inode(f.file)->i_mode))) { - fdput(f); - goto out; - } + !(S_ISDIR(file_inode(fd_file(f))->i_mode))) + return -ENOTDIR; - *path = f.file->f_path; + *path = fd_file(f)->f_path; path_get(path); - fdput(f); } else { unsigned int lookup_flags = 0; @@ -1076,7 +1170,7 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, } static int fanotify_remove_mark(struct fsnotify_group *group, - fsnotify_connp_t *connp, __u32 mask, + void *obj, unsigned int obj_type, __u32 mask, unsigned int flags, __u32 umask) { struct fsnotify_mark *fsn_mark = NULL; @@ -1084,7 +1178,7 @@ static int fanotify_remove_mark(struct fsnotify_group *group, int destroy_mark; fsnotify_group_lock(group); - fsn_mark = fsnotify_find_mark(connp, group); + fsn_mark = fsnotify_find_mark(obj, obj_type, group); if (!fsn_mark) { fsnotify_group_unlock(group); return -ENOENT; @@ -1105,30 +1199,6 @@ static int fanotify_remove_mark(struct fsnotify_group *group, return 0; } -static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, - struct vfsmount *mnt, __u32 mask, - unsigned int flags, __u32 umask) -{ - return fanotify_remove_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, - mask, flags, umask); -} - -static int fanotify_remove_sb_mark(struct fsnotify_group *group, - struct super_block *sb, __u32 mask, - unsigned int flags, __u32 umask) -{ - return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask, - flags, umask); -} - -static int fanotify_remove_inode_mark(struct fsnotify_group *group, - struct inode *inode, __u32 mask, - unsigned int flags, __u32 umask) -{ - return fanotify_remove_mark(group, &inode->i_fsnotify_marks, mask, - flags, umask); -} - static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark, unsigned int fan_flags) { @@ -1249,7 +1319,7 @@ static int fanotify_set_mark_fsid(struct fsnotify_group *group, } static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, - fsnotify_connp_t *connp, + void *obj, unsigned int obj_type, unsigned int fan_flags, struct fan_fsid *fsid) @@ -1264,6 +1334,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, * A group with FAN_UNLIMITED_MARKS does not contribute to mark count * in the limited groups account. */ + BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_MARKS)); if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS) && !inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_FANOTIFY_MARKS)) return ERR_PTR(-ENOSPC); @@ -1288,7 +1359,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, fan_mark->fsid.val[0] = fan_mark->fsid.val[1] = 0; } - ret = fsnotify_add_mark_locked(mark, connp, obj_type, 0); + ret = fsnotify_add_mark_locked(mark, obj, obj_type, 0); if (ret) goto out_put_mark; @@ -1313,7 +1384,7 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group) } static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark, - unsigned int fan_flags) + __u32 mask, unsigned int fan_flags) { /* * Non evictable mark cannot be downgraded to evictable mark. @@ -1340,11 +1411,16 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark, fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) return -EEXIST; + /* For now pre-content events are not generated for directories */ + mask |= fsn_mark->mask; + if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR) + return -EEXIST; + return 0; } static int fanotify_add_mark(struct fsnotify_group *group, - fsnotify_connp_t *connp, unsigned int obj_type, + void *obj, unsigned int obj_type, __u32 mask, unsigned int fan_flags, struct fan_fsid *fsid) { @@ -1353,9 +1429,9 @@ static int fanotify_add_mark(struct fsnotify_group *group, int ret = 0; fsnotify_group_lock(group); - fsn_mark = fsnotify_find_mark(connp, group); + fsn_mark = fsnotify_find_mark(obj, obj_type, group); if (!fsn_mark) { - fsn_mark = fanotify_add_new_mark(group, connp, obj_type, + fsn_mark = fanotify_add_new_mark(group, obj, obj_type, fan_flags, fsid); if (IS_ERR(fsn_mark)) { fsnotify_group_unlock(group); @@ -1366,7 +1442,7 @@ static int fanotify_add_mark(struct fsnotify_group *group, /* * Check if requested mark flags conflict with an existing mark flags. */ - ret = fanotify_may_update_existing_mark(fsn_mark, fan_flags); + ret = fanotify_may_update_existing_mark(fsn_mark, mask, fan_flags); if (ret) goto out; @@ -1392,42 +1468,6 @@ out: return ret; } -static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, - struct vfsmount *mnt, __u32 mask, - unsigned int flags, struct fan_fsid *fsid) -{ - return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid); -} - -static int fanotify_add_sb_mark(struct fsnotify_group *group, - struct super_block *sb, __u32 mask, - unsigned int flags, struct fan_fsid *fsid) -{ - return fanotify_add_mark(group, &sb->s_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid); -} - -static int fanotify_add_inode_mark(struct fsnotify_group *group, - struct inode *inode, __u32 mask, - unsigned int flags, struct fan_fsid *fsid) -{ - pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); - - /* - * If some other task has this inode open for write we should not add - * an ignore mask, unless that ignore mask is supposed to survive - * modification changes anyway. - */ - if ((flags & FANOTIFY_MARK_IGNORE_BITS) && - !(flags & FAN_MARK_IGNORED_SURV_MODIFY) && - inode_is_open_for_write(inode)) - return 0; - - return fanotify_add_mark(group, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid); -} - static struct fsnotify_event *fanotify_alloc_overflow_event(void) { struct fanotify_event *oevent; @@ -1459,11 +1499,13 @@ static struct hlist_head *fanotify_alloc_merge_hash(void) /* fanotify syscalls */ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) { + struct user_namespace *user_ns = current_user_ns(); struct fsnotify_group *group; int f_flags, fd; unsigned int fid_mode = flags & FANOTIFY_FID_BITS; unsigned int class = flags & FANOTIFY_CLASS_BITS; unsigned int internal_flags = 0; + struct file *file; pr_debug("%s: flags=%x event_f_flags=%x\n", __func__, flags, event_f_flags); @@ -1472,10 +1514,11 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) /* * An unprivileged user can setup an fanotify group with * limited functionality - an unprivileged group is limited to - * notification events with file handles and it cannot use - * unlimited queue/marks. + * notification events with file handles or mount ids and it + * cannot use unlimited queue/marks. */ - if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || !fid_mode) + if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || + !(flags & (FANOTIFY_FID_BITS | FAN_REPORT_MNT))) return -EPERM; /* @@ -1501,6 +1544,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID)) return -EINVAL; + /* Don't allow mixing mnt events with inode events for now */ + if (flags & FAN_REPORT_MNT) { + if (class != FAN_CLASS_NOTIF) + return -EINVAL; + if (flags & (FANOTIFY_FID_BITS | FAN_REPORT_FD_ERROR)) + return -EINVAL; + } + if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS) return -EINVAL; @@ -1532,7 +1583,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) (!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID))) return -EINVAL; - f_flags = O_RDWR | __FMODE_NONOTIFY; + f_flags = O_RDWR; if (flags & FAN_CLOEXEC) f_flags |= O_CLOEXEC; if (flags & FAN_NONBLOCK) @@ -1540,14 +1591,13 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ group = fsnotify_alloc_group(&fanotify_fsnotify_ops, - FSNOTIFY_GROUP_USER | FSNOTIFY_GROUP_NOFS); + FSNOTIFY_GROUP_USER); if (IS_ERR(group)) { return PTR_ERR(group); } /* Enforce groups limits per user in all containing user ns */ - group->fanotify_data.ucounts = inc_ucount(current_user_ns(), - current_euid(), + group->fanotify_data.ucounts = inc_ucount(user_ns, current_euid(), UCOUNT_FANOTIFY_GROUPS); if (!group->fanotify_data.ucounts) { fd = -EMFILE; @@ -1556,6 +1606,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->fanotify_data.flags = flags | internal_flags; group->memcg = get_mem_cgroup_from_mm(current->mm); + group->user_ns = get_user_ns(user_ns); group->fanotify_data.merge_hash = fanotify_alloc_merge_hash(); if (!group->fanotify_data.merge_hash) { @@ -1576,44 +1627,44 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) INIT_LIST_HEAD(&group->fanotify_data.access_list); switch (class) { case FAN_CLASS_NOTIF: - group->priority = FS_PRIO_0; + group->priority = FSNOTIFY_PRIO_NORMAL; break; case FAN_CLASS_CONTENT: - group->priority = FS_PRIO_1; + group->priority = FSNOTIFY_PRIO_CONTENT; break; case FAN_CLASS_PRE_CONTENT: - group->priority = FS_PRIO_2; + group->priority = FSNOTIFY_PRIO_PRE_CONTENT; break; default: fd = -EINVAL; goto out_destroy_group; } + BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_QUEUE)); if (flags & FAN_UNLIMITED_QUEUE) { - fd = -EPERM; - if (!capable(CAP_SYS_ADMIN)) - goto out_destroy_group; group->max_events = UINT_MAX; } else { group->max_events = fanotify_max_queued_events; } - if (flags & FAN_UNLIMITED_MARKS) { - fd = -EPERM; - if (!capable(CAP_SYS_ADMIN)) - goto out_destroy_group; - } - if (flags & FAN_ENABLE_AUDIT) { fd = -EPERM; if (!capable(CAP_AUDIT_WRITE)) goto out_destroy_group; } - fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); + fd = get_unused_fd_flags(f_flags); if (fd < 0) goto out_destroy_group; + file = anon_inode_getfile_fmode("[fanotify]", &fanotify_fops, group, + f_flags, FMODE_NONOTIFY); + if (IS_ERR(file)) { + put_unused_fd(fd); + fd = PTR_ERR(file); + goto out_destroy_group; + } + fd_install(fd, file); return fd; out_destroy_group: @@ -1693,12 +1744,24 @@ static int fanotify_events_supported(struct fsnotify_group *group, unsigned int flags) { unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; + bool is_dir = d_is_dir(path->dentry); /* Strict validation of events in non-dir inode mask with v5.17+ APIs */ bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) || (mask & FAN_RENAME) || (flags & FAN_MARK_IGNORE); /* + * Filesystems need to opt-into pre-content evnets (a.k.a HSM) + * and they are only supported on regular files and directories. + */ + if (mask & FANOTIFY_PRE_CONTENT_EVENTS) { + if (!(path->mnt->mnt_sb->s_iflags & SB_I_ALLOW_HSM)) + return -EOPNOTSUPP; + if (!is_dir && !d_is_reg(path->dentry)) + return -EINVAL; + } + + /* * Some filesystems such as 'proc' acquire unusual locks when opening * files. For them fanotify permission events have high chances of * deadlocking the system - open done when reporting fanotify event @@ -1730,7 +1793,7 @@ static int fanotify_events_supported(struct fsnotify_group *group, * but because we always allowed it, error only when using new APIs. */ if (strict_dir_events && mark_type == FAN_MARK_INODE && - !d_is_dir(path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS)) + !is_dir && (mask & FANOTIFY_DIRONLY_EVENT_BITS)) return -ENOTDIR; return 0; @@ -1740,16 +1803,17 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, int dfd, const char __user *pathname) { struct inode *inode = NULL; - struct vfsmount *mnt = NULL; struct fsnotify_group *group; - struct fd f; struct path path; struct fan_fsid __fsid, *fsid = NULL; + struct user_namespace *user_ns = NULL; + struct mnt_namespace *mntns; u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS; unsigned int obj_type, fid_mode; + void *obj = NULL; u32 umask = 0; int ret; @@ -1773,6 +1837,9 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, case FAN_MARK_FILESYSTEM: obj_type = FSNOTIFY_OBJ_TYPE_SB; break; + case FAN_MARK_MNTNS: + obj_type = FSNOTIFY_OBJ_TYPE_MNTNS; + break; default: return -EINVAL; } @@ -1811,39 +1878,50 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, umask = FANOTIFY_EVENT_FLAGS; } - f = fdget(fanotify_fd); - if (unlikely(!f.file)) + CLASS(fd, f)(fanotify_fd); + if (fd_empty(f)) return -EBADF; /* verify that this is indeed an fanotify instance */ - ret = -EINVAL; - if (unlikely(f.file->f_op != &fanotify_fops)) - goto fput_and_out; - group = f.file->private_data; + if (unlikely(fd_file(f)->f_op != &fanotify_fops)) + return -EINVAL; + group = fd_file(f)->private_data; + + /* Only report mount events on mnt namespace */ + if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) { + if (mask & ~FANOTIFY_MOUNT_EVENTS) + return -EINVAL; + if (mark_type != FAN_MARK_MNTNS) + return -EINVAL; + } else { + if (mask & FANOTIFY_MOUNT_EVENTS) + return -EINVAL; + if (mark_type == FAN_MARK_MNTNS) + return -EINVAL; + } /* - * An unprivileged user is not allowed to setup mount nor filesystem - * marks. This also includes setting up such marks by a group that - * was initialized by an unprivileged user. + * A user is allowed to setup sb/mount/mntns marks only if it is + * capable in the user ns where the group was created. */ - ret = -EPERM; - if ((!capable(CAP_SYS_ADMIN) || - FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV)) && + if (!ns_capable(group->user_ns, CAP_SYS_ADMIN) && mark_type != FAN_MARK_INODE) - goto fput_and_out; + return -EPERM; /* - * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not - * allowed to set permissions events. + * Permission events are not allowed for FAN_CLASS_NOTIF. + * Pre-content permission events are not allowed for FAN_CLASS_CONTENT. */ - ret = -EINVAL; if (mask & FANOTIFY_PERM_EVENTS && - group->priority == FS_PRIO_0) - goto fput_and_out; + group->priority == FSNOTIFY_PRIO_NORMAL) + return -EINVAL; + else if (mask & FANOTIFY_PRE_CONTENT_EVENTS && + group->priority == FSNOTIFY_PRIO_CONTENT) + return -EINVAL; if (mask & FAN_FS_ERROR && mark_type != FAN_MARK_FILESYSTEM) - goto fput_and_out; + return -EINVAL; /* * Evictable is only relevant for inode marks, because only inode object @@ -1851,7 +1929,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, */ if (flags & FAN_MARK_EVICTABLE && mark_type != FAN_MARK_INODE) - goto fput_and_out; + return -EINVAL; /* * Events that do not carry enough information to report @@ -1861,9 +1939,9 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, * point. */ fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); - if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) && + if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_MOUNT_EVENTS|FANOTIFY_EVENT_FLAGS) && (!fid_mode || mark_type == FAN_MARK_MOUNT)) - goto fput_and_out; + return -EINVAL; /* * FAN_RENAME uses special info type records to report the old and @@ -1871,23 +1949,21 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, * useful and was not implemented. */ if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME)) - goto fput_and_out; + return -EINVAL; + + /* Pre-content events are not currently generated for directories. */ + if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR) + return -EINVAL; if (mark_cmd == FAN_MARK_FLUSH) { - ret = 0; - if (mark_type == FAN_MARK_MOUNT) - fsnotify_clear_vfsmount_marks_by_group(group); - else if (mark_type == FAN_MARK_FILESYSTEM) - fsnotify_clear_sb_marks_by_group(group); - else - fsnotify_clear_inode_marks_by_group(group); - goto fput_and_out; + fsnotify_clear_marks_by_group(group, obj_type); + return 0; } ret = fanotify_find_path(dfd, pathname, &path, flags, (mask & ALL_FSNOTIFY_EVENTS), obj_type); if (ret) - goto fput_and_out; + return ret; if (mark_cmd == FAN_MARK_ADD) { ret = fanotify_events_supported(group, &path, mask, flags); @@ -1907,21 +1983,55 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, fsid = &__fsid; } - /* inode held in place by reference to path; group by fget on fd */ - if (mark_type == FAN_MARK_INODE) + /* + * In addition to being capable in the user ns where group was created, + * the user also needs to be capable in the user ns associated with + * the filesystem or in the user ns associated with the mntns + * (when marking mntns). + */ + if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) { inode = path.dentry->d_inode; - else - mnt = path.mnt; + obj = inode; + } else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { + user_ns = path.mnt->mnt_sb->s_user_ns; + obj = path.mnt; + } else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) { + user_ns = path.mnt->mnt_sb->s_user_ns; + obj = path.mnt->mnt_sb; + } else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) { + mntns = mnt_ns_from_dentry(path.dentry); + user_ns = mntns->user_ns; + obj = mntns; + } - ret = mnt ? -EINVAL : -EISDIR; - /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */ - if (mark_cmd == FAN_MARK_ADD && ignore == FAN_MARK_IGNORE && - (mnt || S_ISDIR(inode->i_mode)) && - !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) + ret = -EPERM; + if (user_ns && !ns_capable(user_ns, CAP_SYS_ADMIN)) goto path_put_and_out; + ret = -EINVAL; + if (!obj) + goto path_put_and_out; + + /* + * If some other task has this inode open for write we should not add + * an ignore mask, unless that ignore mask is supposed to survive + * modification changes anyway. + */ + if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) && + !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) { + ret = !inode ? -EINVAL : -EISDIR; + /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */ + if (ignore == FAN_MARK_IGNORE && + (!inode || S_ISDIR(inode->i_mode))) + goto path_put_and_out; + + ret = 0; + if (inode && inode_is_open_for_write(inode)) + goto path_put_and_out; + } + /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */ - if (mnt || !S_ISDIR(inode->i_mode)) { + if (!inode || !S_ISDIR(inode->i_mode)) { mask &= ~FAN_EVENT_ON_CHILD; umask = FAN_EVENT_ON_CHILD; /* @@ -1936,26 +2046,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, /* create/update an inode mark */ switch (mark_cmd) { case FAN_MARK_ADD: - if (mark_type == FAN_MARK_MOUNT) - ret = fanotify_add_vfsmount_mark(group, mnt, mask, - flags, fsid); - else if (mark_type == FAN_MARK_FILESYSTEM) - ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, - flags, fsid); - else - ret = fanotify_add_inode_mark(group, inode, mask, - flags, fsid); + ret = fanotify_add_mark(group, obj, obj_type, mask, flags, + fsid); break; case FAN_MARK_REMOVE: - if (mark_type == FAN_MARK_MOUNT) - ret = fanotify_remove_vfsmount_mark(group, mnt, mask, - flags, umask); - else if (mark_type == FAN_MARK_FILESYSTEM) - ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, - flags, umask); - else - ret = fanotify_remove_inode_mark(group, inode, mask, - flags, umask); + ret = fanotify_remove_mark(group, obj, obj_type, mask, flags, + umask); break; default: ret = -EINVAL; @@ -1963,8 +2059,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, path_put_and_out: path_put(&path); -fput_and_out: - fdput(f); return ret; } @@ -2011,7 +2105,7 @@ static int __init fanotify_user_setup(void) FANOTIFY_DEFAULT_MAX_USER_MARKS); BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS); - BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 12); + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 14); BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11); fanotify_mark_cache = KMEM_CACHE(fanotify_mark, @@ -2024,6 +2118,7 @@ static int __init fanotify_user_setup(void) fanotify_perm_event_cachep = KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); } + fanotify_mnt_event_cachep = KMEM_CACHE(fanotify_mnt_event, SLAB_PANIC); fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS; init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] = diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 5c430736ec12..1161eabf11ee 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -41,29 +41,23 @@ static void show_fdinfo(struct seq_file *m, struct file *f, #if defined(CONFIG_EXPORTFS) static void show_mark_fhandle(struct seq_file *m, struct inode *inode) { - struct { - struct file_handle handle; - u8 pad[MAX_HANDLE_SZ]; - } f; + DEFINE_FLEX(struct file_handle, f, f_handle, handle_bytes, MAX_HANDLE_SZ); int size, ret, i; - f.handle.handle_bytes = sizeof(f.pad); - size = f.handle.handle_bytes >> 2; + size = f->handle_bytes >> 2; - ret = exportfs_encode_fid(inode, (struct fid *)f.handle.f_handle, &size); - if ((ret == FILEID_INVALID) || (ret < 0)) { - WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret); + ret = exportfs_encode_fid(inode, (struct fid *)f->f_handle, &size); + if ((ret == FILEID_INVALID) || (ret < 0)) return; - } - f.handle.handle_type = ret; - f.handle.handle_bytes = size * sizeof(u32); + f->handle_type = ret; + f->handle_bytes = size * sizeof(u32); seq_printf(m, "fhandle-bytes:%x fhandle-type:%x f_handle:", - f.handle.handle_bytes, f.handle.handle_type); + f->handle_bytes, f->handle_type); - for (i = 0; i < f.handle.handle_bytes; i++) - seq_printf(m, "%02x", (int)f.handle.f_handle[i]); + for (i = 0; i < f->handle_bytes; i++) + seq_printf(m, "%02x", (int)f->f_handle[i]); } #else static void show_mark_fhandle(struct seq_file *m, struct inode *inode) @@ -127,6 +121,11 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n", sb->s_dev, mflags, mark->mask, mark->ignore_mask); + } else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_MNTNS) { + struct mnt_namespace *mnt_ns = fsnotify_conn_mntns(mark->connector); + + seq_printf(m, "fanotify mnt_ns:%u mflags:%x mask:%x ignored_mask:%x\n", + mnt_ns->ns.inum, mflags, mark->mask, mark->ignore_mask); } } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 2fc105a72a8f..e2b4f17a48bb 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -28,6 +28,11 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt) fsnotify_clear_marks_by_mount(mnt); } +void __fsnotify_mntns_delete(struct mnt_namespace *mntns) +{ + fsnotify_clear_marks_by_mntns(mntns); +} + /** * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. * @sb: superblock being unmounted. @@ -89,11 +94,25 @@ static void fsnotify_unmount_inodes(struct super_block *sb) void fsnotify_sb_delete(struct super_block *sb) { + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); + + /* Were any marks ever added to any object on this sb? */ + if (!sbinfo) + return; + fsnotify_unmount_inodes(sb); fsnotify_clear_marks_by_sb(sb); /* Wait for outstanding object references from connectors */ - wait_var_event(&sb->s_fsnotify_connectors, - !atomic_long_read(&sb->s_fsnotify_connectors)); + wait_var_event(fsnotify_sb_watched_objects(sb), + !atomic_long_read(fsnotify_sb_watched_objects(sb))); + WARN_ON(fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_CONTENT)); + WARN_ON(fsnotify_sb_has_priority_watchers(sb, + FSNOTIFY_PRIO_PRE_CONTENT)); +} + +void fsnotify_sb_free(struct super_block *sb) +{ + kfree(sb->s_fsnotify_info); } /* @@ -103,17 +122,13 @@ void fsnotify_sb_delete(struct super_block *sb) * parent cares. Thus when an event happens on a child it can quickly tell * if there is a need to find a parent and send the event to the parent. */ -void __fsnotify_update_child_dentry_flags(struct inode *inode) +void fsnotify_set_children_dentry_flags(struct inode *inode) { struct dentry *alias; - int watched; if (!S_ISDIR(inode->i_mode)) return; - /* determine if the children should tell inode about their events */ - watched = fsnotify_inode_watches_children(inode); - spin_lock(&inode->i_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ @@ -129,10 +144,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) continue; spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); - if (watched) - child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; - else - child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; + child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; spin_unlock(&child->d_lock); } spin_unlock(&alias->d_lock); @@ -140,6 +152,24 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) spin_unlock(&inode->i_lock); } +/* + * Lazily clear false positive PARENT_WATCHED flag for child whose parent had + * stopped watching children. + */ +static void fsnotify_clear_child_dentry_flag(struct inode *pinode, + struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + /* + * d_lock is a sufficient barrier to prevent observing a non-watched + * parent state from before the fsnotify_set_children_dentry_flags() + * or fsnotify_update_flags() call that had set PARENT_WATCHED. + */ + if (!fsnotify_inode_watches_children(pinode)) + dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; + spin_unlock(&dentry->d_lock); +} + /* Are inode/sb/mount interested in parent and name info with this event? */ static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask, __u32 mask) @@ -158,24 +188,44 @@ static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask, BUILD_BUG_ON(FS_EVENTS_POSS_ON_CHILD & ~FS_EVENTS_POSS_TO_PARENT); /* Did either inode/sb/mount subscribe for events with parent/name? */ - marks_mask |= fsnotify_parent_needed_mask(inode->i_fsnotify_mask); - marks_mask |= fsnotify_parent_needed_mask(inode->i_sb->s_fsnotify_mask); + marks_mask |= fsnotify_parent_needed_mask( + READ_ONCE(inode->i_fsnotify_mask)); + marks_mask |= fsnotify_parent_needed_mask( + READ_ONCE(inode->i_sb->s_fsnotify_mask)); marks_mask |= fsnotify_parent_needed_mask(mnt_mask); /* Did they subscribe for this event with parent/name info? */ return mask & marks_mask; } -/* Are there any inode/mount/sb objects that are interested in this event? */ +/* Are there any inode/mount/sb objects that watch for these events? */ static inline bool fsnotify_object_watched(struct inode *inode, __u32 mnt_mask, __u32 mask) { - __u32 marks_mask = inode->i_fsnotify_mask | mnt_mask | - inode->i_sb->s_fsnotify_mask; + __u32 marks_mask = READ_ONCE(inode->i_fsnotify_mask) | mnt_mask | + READ_ONCE(inode->i_sb->s_fsnotify_mask); return mask & marks_mask & ALL_FSNOTIFY_EVENTS; } +/* Report pre-content event with optional range info */ +int fsnotify_pre_content(const struct path *path, const loff_t *ppos, + size_t count) +{ + struct file_range range; + + /* Report page aligned range only when pos is known */ + if (!ppos) + return fsnotify_path(path, FS_PRE_ACCESS); + + range.path = path; + range.pos = PAGE_ALIGN_DOWN(*ppos); + range.count = PAGE_ALIGN(*ppos + count) - range.pos; + + return fsnotify_parent(path->dentry, FS_PRE_ACCESS, &range, + FSNOTIFY_EVENT_FILE_RANGE); +} + /* * Notify this dentry's parent about a child's events with child name info * if parent is watching or if inode/sb/mount are interested in events with @@ -188,7 +238,8 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, int data_type) { const struct path *path = fsnotify_data_path(data, data_type); - __u32 mnt_mask = path ? real_mount(path->mnt)->mnt_fsnotify_mask : 0; + __u32 mnt_mask = path ? + READ_ONCE(real_mount(path->mnt)->mnt_fsnotify_mask) : 0; struct inode *inode = d_inode(dentry); struct dentry *parent; bool parent_watched = dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED; @@ -214,7 +265,7 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, p_inode = parent->d_inode; p_mask = fsnotify_inode_watches_children(p_inode); if (unlikely(parent_watched && !p_mask)) - __fsnotify_update_child_dentry_flags(p_inode); + fsnotify_clear_child_dentry_flag(p_inode, dentry); /* * Include parent/name in notification either if some notification @@ -305,16 +356,19 @@ static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask, if (!inode_mark) return 0; - if (mask & FS_EVENT_ON_CHILD) { - /* - * Some events can be sent on both parent dir and child marks - * (e.g. FS_ATTRIB). If both parent dir and child are - * watching, report the event once to parent dir with name (if - * interested) and once to child without name (if interested). - * The child watcher is expecting an event without a file name - * and without the FS_EVENT_ON_CHILD flag. - */ - mask &= ~FS_EVENT_ON_CHILD; + /* + * Some events can be sent on both parent dir and child marks (e.g. + * FS_ATTRIB). If both parent dir and child are watching, report the + * event once to parent dir with name (if interested) and once to child + * without name (if interested). + * + * In any case regardless whether the parent is watching or not, the + * child watcher is expecting an event without the FS_EVENT_ON_CHILD + * flag. The file name is expected if and only if this is a directory + * event. + */ + mask &= ~FS_EVENT_ON_CHILD; + if (!(mask & ALL_FSNOTIFY_DIRENT_EVENTS)) { dir = NULL; name = NULL; } @@ -371,7 +425,7 @@ static int send_to_group(__u32 mask, const void *data, int data_type, file_name, cookie, iter_info); } -static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp) +static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector *const *connp) { struct fsnotify_mark_connector *conn; struct hlist_node *node = NULL; @@ -489,13 +543,15 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, { const struct path *path = fsnotify_data_path(data, data_type); struct super_block *sb = fsnotify_data_sb(data, data_type); + const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type); + struct fsnotify_sb_info *sbinfo = sb ? fsnotify_sb_info(sb) : NULL; struct fsnotify_iter_info iter_info = {}; struct mount *mnt = NULL; struct inode *inode2 = NULL; struct dentry *moved; int inode2_type; int ret = 0; - __u32 test_mask, marks_mask; + __u32 test_mask, marks_mask = 0; if (path) mnt = real_mount(path->mnt); @@ -525,20 +581,23 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, * SRCU because we have no references to any objects and do not * need SRCU to keep them "alive". */ - if (!sb->s_fsnotify_marks && + if ((!sbinfo || !sbinfo->sb_marks) && (!mnt || !mnt->mnt_fsnotify_marks) && (!inode || !inode->i_fsnotify_marks) && - (!inode2 || !inode2->i_fsnotify_marks)) + (!inode2 || !inode2->i_fsnotify_marks) && + (!mnt_data || !mnt_data->ns->n_fsnotify_marks)) return 0; - marks_mask = sb->s_fsnotify_mask; + if (sb) + marks_mask |= READ_ONCE(sb->s_fsnotify_mask); if (mnt) - marks_mask |= mnt->mnt_fsnotify_mask; + marks_mask |= READ_ONCE(mnt->mnt_fsnotify_mask); if (inode) - marks_mask |= inode->i_fsnotify_mask; + marks_mask |= READ_ONCE(inode->i_fsnotify_mask); if (inode2) - marks_mask |= inode2->i_fsnotify_mask; - + marks_mask |= READ_ONCE(inode2->i_fsnotify_mask); + if (mnt_data) + marks_mask |= READ_ONCE(mnt_data->ns->n_fsnotify_mask); /* * If this is a modify event we may need to clear some ignore masks. @@ -552,8 +611,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); - iter_info.marks[FSNOTIFY_ITER_TYPE_SB] = - fsnotify_first_mark(&sb->s_fsnotify_marks); + if (sbinfo) { + iter_info.marks[FSNOTIFY_ITER_TYPE_SB] = + fsnotify_first_mark(&sbinfo->sb_marks); + } if (mnt) { iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] = fsnotify_first_mark(&mnt->mnt_fsnotify_marks); @@ -566,6 +627,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, iter_info.marks[inode2_type] = fsnotify_first_mark(&inode2->i_fsnotify_marks); } + if (mnt_data) { + iter_info.marks[FSNOTIFY_ITER_TYPE_MNTNS] = + fsnotify_first_mark(&mnt_data->ns->n_fsnotify_marks); + } /* * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark @@ -589,11 +654,98 @@ out: } EXPORT_SYMBOL_GPL(fsnotify); +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS +/* + * At open time we check fsnotify_sb_has_priority_watchers() and set the + * FMODE_NONOTIFY_ mode bits accordignly. + * Later, fsnotify permission hooks do not check if there are permission event + * watches, but that there were permission event watches at open time. + */ +void file_set_fsnotify_mode_from_watchers(struct file *file) +{ + struct dentry *dentry = file->f_path.dentry, *parent; + struct super_block *sb = dentry->d_sb; + __u32 mnt_mask, p_mask; + + /* Is it a file opened by fanotify? */ + if (FMODE_FSNOTIFY_NONE(file->f_mode)) + return; + + /* + * Permission events is a super set of pre-content events, so if there + * are no permission event watchers, there are also no pre-content event + * watchers and this is implied from the single FMODE_NONOTIFY_PERM bit. + */ + if (likely(!fsnotify_sb_has_priority_watchers(sb, + FSNOTIFY_PRIO_CONTENT))) { + file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM); + return; + } + + /* + * If there are permission event watchers but no pre-content event + * watchers, set FMODE_NONOTIFY | FMODE_NONOTIFY_PERM to indicate that. + */ + if ((!d_is_dir(dentry) && !d_is_reg(dentry)) || + likely(!fsnotify_sb_has_priority_watchers(sb, + FSNOTIFY_PRIO_PRE_CONTENT))) { + file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM); + return; + } + + /* + * OK, there are some pre-content watchers. Check if anybody is + * watching for pre-content events on *this* file. + */ + mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask); + if (unlikely(fsnotify_object_watched(d_inode(dentry), mnt_mask, + FSNOTIFY_PRE_CONTENT_EVENTS))) { + /* Enable pre-content events */ + file_set_fsnotify_mode(file, 0); + return; + } + + /* Is parent watching for pre-content events on this file? */ + if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) { + parent = dget_parent(dentry); + p_mask = fsnotify_inode_watches_children(d_inode(parent)); + dput(parent); + if (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS) { + /* Enable pre-content events */ + file_set_fsnotify_mode(file, 0); + return; + } + } + /* Nobody watching for pre-content events from this file */ + file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM); +} +#endif + +void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt) +{ + struct fsnotify_mnt data = { + .ns = ns, + .mnt_id = real_mount(mnt)->mnt_id_unique, + }; + + if (WARN_ON_ONCE(!ns)) + return; + + /* + * This is an optimization as well as making sure fsnotify_init() has + * been called. + */ + if (!ns->n_fsnotify_marks) + return; + + fsnotify(mask, &data, FSNOTIFY_EVENT_MNT, NULL, NULL, NULL, 0); +} + static __init int fsnotify_init(void) { int ret; - BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23); + BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 26); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index fde74eb333cc..5950c7a67f41 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -9,39 +9,64 @@ #include "../mount.h" +/* + * fsnotify_connp_t is what we embed in objects which connector can be attached + * to. + */ +typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; + static inline struct inode *fsnotify_conn_inode( struct fsnotify_mark_connector *conn) { - return container_of(conn->obj, struct inode, i_fsnotify_marks); + return conn->obj; } static inline struct mount *fsnotify_conn_mount( struct fsnotify_mark_connector *conn) { - return container_of(conn->obj, struct mount, mnt_fsnotify_marks); + return real_mount(conn->obj); } static inline struct super_block *fsnotify_conn_sb( struct fsnotify_mark_connector *conn) { - return container_of(conn->obj, struct super_block, s_fsnotify_marks); + return conn->obj; } -static inline struct super_block *fsnotify_connector_sb( +static inline struct mnt_namespace *fsnotify_conn_mntns( struct fsnotify_mark_connector *conn) { - switch (conn->type) { + return conn->obj; +} + +static inline struct super_block *fsnotify_object_sb(void *obj, + enum fsnotify_obj_type obj_type) +{ + switch (obj_type) { case FSNOTIFY_OBJ_TYPE_INODE: - return fsnotify_conn_inode(conn)->i_sb; + return ((struct inode *)obj)->i_sb; case FSNOTIFY_OBJ_TYPE_VFSMOUNT: - return fsnotify_conn_mount(conn)->mnt.mnt_sb; + return ((struct vfsmount *)obj)->mnt_sb; case FSNOTIFY_OBJ_TYPE_SB: - return fsnotify_conn_sb(conn); + return (struct super_block *)obj; default: return NULL; } } +static inline struct super_block *fsnotify_connector_sb( + struct fsnotify_mark_connector *conn) +{ + return fsnotify_object_sb(conn->obj, conn->type); +} + +static inline fsnotify_connp_t *fsnotify_sb_marks(struct super_block *sb) +{ + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); + + return sbinfo ? &sbinfo->sb_marks : NULL; +} + /* destroy all events sitting in this groups notification queue */ extern void fsnotify_flush_notify(struct fsnotify_group *group); @@ -67,14 +92,19 @@ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) /* run the list of all marks associated with sb and destroy them */ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb) { - fsnotify_destroy_marks(&sb->s_fsnotify_marks); + fsnotify_destroy_marks(fsnotify_sb_marks(sb)); +} + +static inline void fsnotify_clear_marks_by_mntns(struct mnt_namespace *mntns) +{ + fsnotify_destroy_marks(&mntns->n_fsnotify_marks); } /* * update the dentry->d_flags of all of inode's children to indicate if inode cares * about events that happen to its children. */ -extern void __fsnotify_update_child_dentry_flags(struct inode *inode); +extern void fsnotify_set_children_dentry_flags(struct inode *inode); extern struct kmem_cache *fsnotify_mark_connector_cachep; diff --git a/fs/notify/group.c b/fs/notify/group.c index 1de6631a3925..18446b7b0d49 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -115,7 +115,6 @@ static struct fsnotify_group *__fsnotify_alloc_group( const struct fsnotify_ops *ops, int flags, gfp_t gfp) { - static struct lock_class_key nofs_marks_lock; struct fsnotify_group *group; group = kzalloc(sizeof(struct fsnotify_group), gfp); @@ -136,16 +135,6 @@ static struct fsnotify_group *__fsnotify_alloc_group( group->ops = ops; group->flags = flags; - /* - * For most backends, eviction of inode with a mark is not expected, - * because marks hold a refcount on the inode against eviction. - * - * Use a different lockdep class for groups that support evictable - * inode marks, because with evictable marks, mark_mutex is NOT - * fs-reclaim safe - the mutex is taken when evicting inodes. - */ - if (flags & FSNOTIFY_GROUP_NOFS) - lockdep_set_class(&group->mark_mutex, &nofs_marks_lock); return group; } diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 993375f0db67..cd7d11b0eb08 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -121,7 +121,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, event->sync_cookie = cookie; event->name_len = len; if (len) - strcpy(event->name, name->name); + strscpy(event->name, name->name, event->name_len + 1); ret = fsnotify_add_event(group, fsn_event, inotify_merge); if (ret) { diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 85d8fdd55329..b372fb2c56bd 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -58,7 +58,7 @@ struct kmem_cache *inotify_inode_mark_cachep __ro_after_init; static long it_zero = 0; static long it_int_max = INT_MAX; -static struct ctl_table inotify_table[] = { +static const struct ctl_table inotify_table[] = { { .procname = "max_user_instances", .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES], @@ -544,7 +544,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, int create = (arg & IN_MASK_CREATE); int ret; - fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); + fsn_mark = fsnotify_find_inode_mark(inode, group); if (!fsn_mark) return -ENOENT; else if (create) { @@ -569,7 +569,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, /* more bits in old than in new? */ int dropped = (old_mask & ~new_mask); /* more bits in this fsn_mark than the inode's mask? */ - int do_inode = (new_mask & ~inode->i_fsnotify_mask); + int do_inode = (new_mask & ~READ_ONCE(inode->i_fsnotify_mask)); /* update the inode with this new fsn_mark */ if (dropped || do_inode) @@ -732,7 +732,6 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, struct fsnotify_group *group; struct inode *inode; struct path path; - struct fd f; int ret; unsigned flags = 0; @@ -752,21 +751,17 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, if (unlikely(!(mask & ALL_INOTIFY_BITS))) return -EINVAL; - f = fdget(fd); - if (unlikely(!f.file)) + CLASS(fd, f)(fd); + if (fd_empty(f)) return -EBADF; /* IN_MASK_ADD and IN_MASK_CREATE don't make sense together */ - if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) { - ret = -EINVAL; - goto fput_and_out; - } + if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) + return -EINVAL; /* verify that this is indeed an inotify instance */ - if (unlikely(f.file->f_op != &inotify_fops)) { - ret = -EINVAL; - goto fput_and_out; - } + if (unlikely(fd_file(f)->f_op != &inotify_fops)) + return -EINVAL; if (!(mask & IN_DONT_FOLLOW)) flags |= LOOKUP_FOLLOW; @@ -776,17 +771,15 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, ret = inotify_find_inode(pathname, &path, flags, (mask & IN_ALL_EVENTS)); if (ret) - goto fput_and_out; + return ret; /* inode held in place by reference to path; group by fget on fd */ inode = path.dentry->d_inode; - group = f.file->private_data; + group = fd_file(f)->private_data; /* create/update an inode mark */ ret = inotify_update_watch(group, inode, mask); path_put(&path); -fput_and_out: - fdput(f); return ret; } @@ -794,33 +787,26 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) { struct fsnotify_group *group; struct inotify_inode_mark *i_mark; - struct fd f; - int ret = -EINVAL; + CLASS(fd, f)(fd); - f = fdget(fd); - if (unlikely(!f.file)) + if (fd_empty(f)) return -EBADF; /* verify that this is indeed an inotify instance */ - if (unlikely(f.file->f_op != &inotify_fops)) - goto out; + if (unlikely(fd_file(f)->f_op != &inotify_fops)) + return -EINVAL; - group = f.file->private_data; + group = fd_file(f)->private_data; i_mark = inotify_idr_find(group, wd); if (unlikely(!i_mark)) - goto out; - - ret = 0; + return -EINVAL; fsnotify_destroy_mark(&i_mark->fsn_mark, group); /* match ref taken by inotify_idr_find */ fsnotify_put_mark(&i_mark->fsn_mark); - -out: - fdput(f); - return ret; + return 0; } /* diff --git a/fs/notify/mark.c b/fs/notify/mark.c index d6944ff86ffa..798340db69d7 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -97,6 +97,23 @@ void fsnotify_get_mark(struct fsnotify_mark *mark) refcount_inc(&mark->refcnt); } +static fsnotify_connp_t *fsnotify_object_connp(void *obj, + enum fsnotify_obj_type obj_type) +{ + switch (obj_type) { + case FSNOTIFY_OBJ_TYPE_INODE: + return &((struct inode *)obj)->i_fsnotify_marks; + case FSNOTIFY_OBJ_TYPE_VFSMOUNT: + return &real_mount(obj)->mnt_fsnotify_marks; + case FSNOTIFY_OBJ_TYPE_SB: + return fsnotify_sb_marks(obj); + case FSNOTIFY_OBJ_TYPE_MNTNS: + return &((struct mnt_namespace *)obj)->n_fsnotify_marks; + default: + return NULL; + } +} + static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn) { if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) @@ -105,6 +122,8 @@ static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn) return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask; else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) return &fsnotify_conn_sb(conn)->s_fsnotify_mask; + else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS) + return &fsnotify_conn_mntns(conn)->n_fsnotify_mask; return NULL; } @@ -113,13 +132,78 @@ __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn) if (WARN_ON(!fsnotify_valid_obj_type(conn->type))) return 0; - return *fsnotify_conn_mask_p(conn); + return READ_ONCE(*fsnotify_conn_mask_p(conn)); +} + +static void fsnotify_get_sb_watched_objects(struct super_block *sb) +{ + atomic_long_inc(fsnotify_sb_watched_objects(sb)); +} + +static void fsnotify_put_sb_watched_objects(struct super_block *sb) +{ + atomic_long_t *watched_objects = fsnotify_sb_watched_objects(sb); + + /* the superblock can go away after this decrement */ + if (atomic_long_dec_and_test(watched_objects)) + wake_up_var(watched_objects); } static void fsnotify_get_inode_ref(struct inode *inode) { ihold(inode); - atomic_long_inc(&inode->i_sb->s_fsnotify_connectors); + fsnotify_get_sb_watched_objects(inode->i_sb); +} + +static void fsnotify_put_inode_ref(struct inode *inode) +{ + /* read ->i_sb before the inode can go away */ + struct super_block *sb = inode->i_sb; + + iput(inode); + fsnotify_put_sb_watched_objects(sb); +} + +/* + * Grab or drop watched objects reference depending on whether the connector + * is attached and has any marks attached. + */ +static void fsnotify_update_sb_watchers(struct super_block *sb, + struct fsnotify_mark_connector *conn) +{ + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); + bool is_watched = conn->flags & FSNOTIFY_CONN_FLAG_IS_WATCHED; + struct fsnotify_mark *first_mark = NULL; + unsigned int highest_prio = 0; + + if (conn->obj) + first_mark = hlist_entry_safe(conn->list.first, + struct fsnotify_mark, obj_list); + if (first_mark) + highest_prio = first_mark->group->priority; + if (WARN_ON(highest_prio >= __FSNOTIFY_PRIO_NUM)) + highest_prio = 0; + + /* + * If the highest priority of group watching this object is prio, + * then watched object has a reference on counters [0..prio]. + * Update priority >= 1 watched objects counters. + */ + for (unsigned int p = conn->prio + 1; p <= highest_prio; p++) + atomic_long_inc(&sbinfo->watched_objects[p]); + for (unsigned int p = conn->prio; p > highest_prio; p--) + atomic_long_dec(&sbinfo->watched_objects[p]); + conn->prio = highest_prio; + + /* Update priority >= 0 (a.k.a total) watched objects counter */ + BUILD_BUG_ON(FSNOTIFY_PRIO_NORMAL != 0); + if (first_mark && !is_watched) { + conn->flags |= FSNOTIFY_CONN_FLAG_IS_WATCHED; + fsnotify_get_sb_watched_objects(sb); + } else if (!first_mark && is_watched) { + conn->flags &= ~FSNOTIFY_CONN_FLAG_IS_WATCHED; + fsnotify_put_sb_watched_objects(sb); + } } /* @@ -171,11 +255,33 @@ static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) !(mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) want_iref = true; } - *fsnotify_conn_mask_p(conn) = new_mask; + /* + * We use WRITE_ONCE() to prevent silly compiler optimizations from + * confusing readers not holding conn->lock with partial updates. + */ + WRITE_ONCE(*fsnotify_conn_mask_p(conn), new_mask); return fsnotify_update_iref(conn, want_iref); } +static bool fsnotify_conn_watches_children( + struct fsnotify_mark_connector *conn) +{ + if (conn->type != FSNOTIFY_OBJ_TYPE_INODE) + return false; + + return fsnotify_inode_watches_children(fsnotify_conn_inode(conn)); +} + +static void fsnotify_conn_set_children_dentry_flags( + struct fsnotify_mark_connector *conn) +{ + if (conn->type != FSNOTIFY_OBJ_TYPE_INODE) + return; + + fsnotify_set_children_dentry_flags(fsnotify_conn_inode(conn)); +} + /* * Calculate mask of events for a list of marks. The caller must make sure * connector and connector->obj cannot disappear under us. Callers achieve @@ -184,15 +290,23 @@ static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) */ void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) { + bool update_children; + if (!conn) return; spin_lock(&conn->lock); + update_children = !fsnotify_conn_watches_children(conn); __fsnotify_recalc_mask(conn); + update_children &= fsnotify_conn_watches_children(conn); spin_unlock(&conn->lock); - if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) - __fsnotify_update_child_dentry_flags( - fsnotify_conn_inode(conn)); + /* + * Set children's PARENT_WATCHED flags only if parent started watching. + * When parent stops watching, we clear false positive PARENT_WATCHED + * flags lazily in __fsnotify_parent(). + */ + if (update_children) + fsnotify_conn_set_children_dentry_flags(conn); } /* Free all connectors queued for freeing once SRCU period ends */ @@ -213,35 +327,12 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work) } } -static void fsnotify_put_inode_ref(struct inode *inode) -{ - struct super_block *sb = inode->i_sb; - - iput(inode); - if (atomic_long_dec_and_test(&sb->s_fsnotify_connectors)) - wake_up_var(&sb->s_fsnotify_connectors); -} - -static void fsnotify_get_sb_connectors(struct fsnotify_mark_connector *conn) -{ - struct super_block *sb = fsnotify_connector_sb(conn); - - if (sb) - atomic_long_inc(&sb->s_fsnotify_connectors); -} - -static void fsnotify_put_sb_connectors(struct fsnotify_mark_connector *conn) -{ - struct super_block *sb = fsnotify_connector_sb(conn); - - if (sb && atomic_long_dec_and_test(&sb->s_fsnotify_connectors)) - wake_up_var(&sb->s_fsnotify_connectors); -} - static void *fsnotify_detach_connector_from_object( struct fsnotify_mark_connector *conn, unsigned int *type) { + fsnotify_connp_t *connp = fsnotify_object_connp(conn->obj, conn->type); + struct super_block *sb = fsnotify_connector_sb(conn); struct inode *inode = NULL; *type = conn->type; @@ -259,12 +350,15 @@ static void *fsnotify_detach_connector_from_object( fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0; } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) { fsnotify_conn_sb(conn)->s_fsnotify_mask = 0; + } else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS) { + fsnotify_conn_mntns(conn)->n_fsnotify_mask = 0; } - fsnotify_put_sb_connectors(conn); - rcu_assign_pointer(*(conn->obj), NULL); + rcu_assign_pointer(*connp, NULL); conn->obj = NULL; conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; + if (sb) + fsnotify_update_sb_watchers(sb, conn); return inode; } @@ -316,6 +410,11 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) objp = fsnotify_detach_connector_from_object(conn, &type); free_conn = true; } else { + struct super_block *sb = fsnotify_connector_sb(conn); + + /* Update watched objects after detaching mark */ + if (sb) + fsnotify_update_sb_watchers(sb, conn); objp = __fsnotify_recalc_mask(conn); type = conn->type; } @@ -536,8 +635,28 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) return -1; } +static int fsnotify_attach_info_to_sb(struct super_block *sb) +{ + struct fsnotify_sb_info *sbinfo; + + /* sb info is freed on fsnotify_sb_delete() */ + sbinfo = kzalloc(sizeof(*sbinfo), GFP_KERNEL); + if (!sbinfo) + return -ENOMEM; + + /* + * cmpxchg() provides the barrier so that callers of fsnotify_sb_info() + * will observe an initialized structure + */ + if (cmpxchg(&sb->s_fsnotify_info, NULL, sbinfo)) { + /* Someone else created sbinfo for us */ + kfree(sbinfo); + } + return 0; +} + static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, - unsigned int obj_type) + void *obj, unsigned int obj_type) { struct fsnotify_mark_connector *conn; @@ -547,10 +666,9 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, spin_lock_init(&conn->lock); INIT_HLIST_HEAD(&conn->list); conn->flags = 0; + conn->prio = 0; conn->type = obj_type; - conn->obj = connp; - conn->flags = 0; - fsnotify_get_sb_connectors(conn); + conn->obj = obj; /* * cmpxchg() provides the barrier so that readers of *connp can see @@ -558,10 +676,8 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, */ if (cmpxchg(connp, NULL, conn)) { /* Someone else created list structure for us */ - fsnotify_put_sb_connectors(conn); kmem_cache_free(fsnotify_mark_connector_cachep, conn); } - return 0; } @@ -598,24 +714,36 @@ out: * to which group and for which inodes. These marks are ordered according to * priority, highest number first, and then by the group's location in memory. */ -static int fsnotify_add_mark_list(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, +static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj, unsigned int obj_type, int add_flags) { + struct super_block *sb = fsnotify_object_sb(obj, obj_type); struct fsnotify_mark *lmark, *last = NULL; struct fsnotify_mark_connector *conn; + fsnotify_connp_t *connp; int cmp; int err = 0; if (WARN_ON(!fsnotify_valid_obj_type(obj_type))) return -EINVAL; + /* + * Attach the sb info before attaching a connector to any object on sb. + * The sb info will remain attached as long as sb lives. + */ + if (sb && !fsnotify_sb_info(sb)) { + err = fsnotify_attach_info_to_sb(sb); + if (err) + return err; + } + + connp = fsnotify_object_connp(obj, obj_type); restart: spin_lock(&mark->lock); conn = fsnotify_grab_connector(connp); if (!conn) { spin_unlock(&mark->lock); - err = fsnotify_attach_connector_to_object(connp, obj_type); + err = fsnotify_attach_connector_to_object(connp, obj, obj_type); if (err) return err; goto restart; @@ -649,6 +777,8 @@ restart: /* mark should be the last entry. last is the current last entry */ hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); added: + if (sb) + fsnotify_update_sb_watchers(sb, conn); /* * Since connector is attached to object using cmpxchg() we are * guaranteed that connector initialization is fully visible by anyone @@ -667,7 +797,7 @@ out_err: * event types should be delivered to which group. */ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, unsigned int obj_type, + void *obj, unsigned int obj_type, int add_flags) { struct fsnotify_group *group = mark->group; @@ -688,7 +818,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_get_mark(mark); /* for g_list */ spin_unlock(&mark->lock); - ret = fsnotify_add_mark_list(mark, connp, obj_type, add_flags); + ret = fsnotify_add_mark_list(mark, obj, obj_type, add_flags); if (ret) goto err; @@ -706,14 +836,14 @@ err: return ret; } -int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, +int fsnotify_add_mark(struct fsnotify_mark *mark, void *obj, unsigned int obj_type, int add_flags) { int ret; struct fsnotify_group *group = mark->group; fsnotify_group_lock(group); - ret = fsnotify_add_mark_locked(mark, connp, obj_type, add_flags); + ret = fsnotify_add_mark_locked(mark, obj, obj_type, add_flags); fsnotify_group_unlock(group); return ret; } @@ -723,12 +853,16 @@ EXPORT_SYMBOL_GPL(fsnotify_add_mark); * Given a list of marks, find the mark associated with given group. If found * take a reference to that mark and return it, else return NULL. */ -struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp, +struct fsnotify_mark *fsnotify_find_mark(void *obj, unsigned int obj_type, struct fsnotify_group *group) { + fsnotify_connp_t *connp = fsnotify_object_connp(obj, obj_type); struct fsnotify_mark_connector *conn; struct fsnotify_mark *mark; + if (!connp) + return NULL; + conn = fsnotify_grab_connector(connp); if (!conn) return NULL; |