diff options
Diffstat (limited to 'fs/notify')
-rw-r--r-- | fs/notify/dnotify/dnotify.c | 8 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify.c | 11 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify.h | 9 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify_user.c | 57 | ||||
-rw-r--r-- | fs/notify/fsnotify.c | 87 |
5 files changed, 100 insertions, 72 deletions
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index c4cdaf5fa7ed..9fb73bafd41d 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -308,6 +308,10 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) goto out_err; } + error = file_f_owner_allocate(filp); + if (error) + goto out_err; + /* new fsnotify mark, we expect most fcntl calls to add a new mark */ new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL); if (!new_dn_mark) { @@ -315,10 +319,6 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) goto out_err; } - error = file_f_owner_allocate(filp); - if (error) - goto out_err; - /* set up the new_fsn_mark and new_dn_mark */ new_fsn_mark = &new_dn_mark->fsn_mark; fsnotify_init_mark(new_fsn_mark, dnotify_group); diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 6d386080faf2..bfe884d624e7 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -415,7 +415,7 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, { int dwords, type = 0; char *ext_buf = NULL; - void *buf = fh->buf; + void *buf = fh + 1; int err; fh->type = FILEID_ROOT; @@ -454,7 +454,13 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, dwords = fh_len >> 2; type = exportfs_encode_fid(inode, buf, &dwords); err = -EINVAL; - if (type <= 0 || type == FILEID_INVALID || fh_len != dwords << 2) + /* + * Unlike file_handle, type and len of struct fanotify_fh are u8. + * Traditionally, filesystem return handle_type < 0xff, but there + * is no enforecement for that in vfs. + */ + BUILD_BUG_ON(MAX_HANDLE_SZ > 0xff || FILEID_INVALID > 0xff); + if (type <= 0 || type >= FILEID_INVALID || fh_len != dwords << 2) goto out_err; fh->type = type; @@ -1009,6 +1015,7 @@ finish: static void fanotify_free_group_priv(struct fsnotify_group *group) { + put_user_ns(group->user_ns); kfree(group->fanotify_data.merge_hash); if (group->fanotify_data.ucounts) dec_ucount(group->fanotify_data.ucounts, diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index b44e70e44be6..b78308975082 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -25,7 +25,7 @@ enum { * stored in either the first or last 2 dwords. */ #define FANOTIFY_INLINE_FH_LEN (3 << 2) -#define FANOTIFY_FH_HDR_LEN offsetof(struct fanotify_fh, buf) +#define FANOTIFY_FH_HDR_LEN sizeof(struct fanotify_fh) /* Fixed size struct for file handle */ struct fanotify_fh { @@ -34,7 +34,6 @@ struct fanotify_fh { #define FANOTIFY_FH_FLAG_EXT_BUF 1 u8 flags; u8 pad; - unsigned char buf[]; } __aligned(4); /* Variable size struct for dir file handle + child file handle + name */ @@ -92,7 +91,7 @@ static inline char **fanotify_fh_ext_buf_ptr(struct fanotify_fh *fh) BUILD_BUG_ON(FANOTIFY_FH_HDR_LEN % 4); BUILD_BUG_ON(__alignof__(char *) - 4 + sizeof(char *) > FANOTIFY_INLINE_FH_LEN); - return (char **)ALIGN((unsigned long)(fh->buf), __alignof__(char *)); + return (char **)ALIGN((unsigned long)(fh + 1), __alignof__(char *)); } static inline void *fanotify_fh_ext_buf(struct fanotify_fh *fh) @@ -102,7 +101,7 @@ static inline void *fanotify_fh_ext_buf(struct fanotify_fh *fh) static inline void *fanotify_fh_buf(struct fanotify_fh *fh) { - return fanotify_fh_has_ext_buf(fh) ? fanotify_fh_ext_buf(fh) : fh->buf; + return fanotify_fh_has_ext_buf(fh) ? fanotify_fh_ext_buf(fh) : fh + 1; } static inline int fanotify_info_dir_fh_len(struct fanotify_info *info) @@ -278,7 +277,7 @@ static inline void fanotify_init_event(struct fanotify_event *event, #define FANOTIFY_INLINE_FH(name, size) \ struct { \ struct fanotify_fh name; \ - /* Space for object_fh.buf[] - access with fanotify_fh_buf() */ \ + /* Space for filehandle - access with fanotify_fh_buf() */ \ unsigned char _inline_fh_buf[size]; \ } diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index f2d840ae4ded..b192ee068a7a 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1334,6 +1334,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, * A group with FAN_UNLIMITED_MARKS does not contribute to mark count * in the limited groups account. */ + BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_MARKS)); if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS) && !inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_FANOTIFY_MARKS)) return ERR_PTR(-ENOSPC); @@ -1498,6 +1499,7 @@ static struct hlist_head *fanotify_alloc_merge_hash(void) /* fanotify syscalls */ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) { + struct user_namespace *user_ns = current_user_ns(); struct fsnotify_group *group; int f_flags, fd; unsigned int fid_mode = flags & FANOTIFY_FID_BITS; @@ -1512,10 +1514,11 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) /* * An unprivileged user can setup an fanotify group with * limited functionality - an unprivileged group is limited to - * notification events with file handles and it cannot use - * unlimited queue/marks. + * notification events with file handles or mount ids and it + * cannot use unlimited queue/marks. */ - if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || !fid_mode) + if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || + !(flags & (FANOTIFY_FID_BITS | FAN_REPORT_MNT))) return -EPERM; /* @@ -1594,8 +1597,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) } /* Enforce groups limits per user in all containing user ns */ - group->fanotify_data.ucounts = inc_ucount(current_user_ns(), - current_euid(), + group->fanotify_data.ucounts = inc_ucount(user_ns, current_euid(), UCOUNT_FANOTIFY_GROUPS); if (!group->fanotify_data.ucounts) { fd = -EMFILE; @@ -1604,6 +1606,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->fanotify_data.flags = flags | internal_flags; group->memcg = get_mem_cgroup_from_mm(current->mm); + group->user_ns = get_user_ns(user_ns); group->fanotify_data.merge_hash = fanotify_alloc_merge_hash(); if (!group->fanotify_data.merge_hash) { @@ -1637,21 +1640,13 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) goto out_destroy_group; } + BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_QUEUE)); if (flags & FAN_UNLIMITED_QUEUE) { - fd = -EPERM; - if (!capable(CAP_SYS_ADMIN)) - goto out_destroy_group; group->max_events = UINT_MAX; } else { group->max_events = fanotify_max_queued_events; } - if (flags & FAN_UNLIMITED_MARKS) { - fd = -EPERM; - if (!capable(CAP_SYS_ADMIN)) - goto out_destroy_group; - } - if (flags & FAN_ENABLE_AUDIT) { fd = -EPERM; if (!capable(CAP_AUDIT_WRITE)) @@ -1811,6 +1806,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, struct fsnotify_group *group; struct path path; struct fan_fsid __fsid, *fsid = NULL; + struct user_namespace *user_ns = NULL; + struct mnt_namespace *mntns; u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; @@ -1904,12 +1901,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, } /* - * An unprivileged user is not allowed to setup mount nor filesystem - * marks. This also includes setting up such marks by a group that - * was initialized by an unprivileged user. + * A user is allowed to setup sb/mount/mntns marks only if it is + * capable in the user ns where the group was created. */ - if ((!capable(CAP_SYS_ADMIN) || - FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV)) && + if (!ns_capable(group->user_ns, CAP_SYS_ADMIN) && mark_type != FAN_MARK_INODE) return -EPERM; @@ -1961,12 +1956,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, return -EINVAL; if (mark_cmd == FAN_MARK_FLUSH) { - if (mark_type == FAN_MARK_MOUNT) - fsnotify_clear_vfsmount_marks_by_group(group); - else if (mark_type == FAN_MARK_FILESYSTEM) - fsnotify_clear_sb_marks_by_group(group); - else - fsnotify_clear_inode_marks_by_group(group); + fsnotify_clear_marks_by_group(group, obj_type); return 0; } @@ -1993,18 +1983,31 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, fsid = &__fsid; } - /* inode held in place by reference to path; group by fget on fd */ + /* + * In addition to being capable in the user ns where group was created, + * the user also needs to be capable in the user ns associated with + * the filesystem or in the user ns associated with the mntns + * (when marking mntns). + */ if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) { inode = path.dentry->d_inode; obj = inode; } else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { + user_ns = path.mnt->mnt_sb->s_user_ns; obj = path.mnt; } else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) { + user_ns = path.mnt->mnt_sb->s_user_ns; obj = path.mnt->mnt_sb; } else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) { - obj = mnt_ns_from_dentry(path.dentry); + mntns = mnt_ns_from_dentry(path.dentry); + user_ns = mntns->user_ns; + obj = mntns; } + ret = -EPERM; + if (user_ns && !ns_capable(user_ns, CAP_SYS_ADMIN)) + goto path_put_and_out; + ret = -EINVAL; if (!obj) goto path_put_and_out; diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index e2b4f17a48bb..079b868552c2 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -199,8 +199,8 @@ static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask, } /* Are there any inode/mount/sb objects that watch for these events? */ -static inline bool fsnotify_object_watched(struct inode *inode, __u32 mnt_mask, - __u32 mask) +static inline __u32 fsnotify_object_watched(struct inode *inode, __u32 mnt_mask, + __u32 mask) { __u32 marks_mask = READ_ONCE(inode->i_fsnotify_mask) | mnt_mask | READ_ONCE(inode->i_sb->s_fsnotify_mask); @@ -656,20 +656,20 @@ EXPORT_SYMBOL_GPL(fsnotify); #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS /* - * At open time we check fsnotify_sb_has_priority_watchers() and set the - * FMODE_NONOTIFY_ mode bits accordignly. + * At open time we check fsnotify_sb_has_priority_watchers(), call the open perm + * hook and set the FMODE_NONOTIFY_ mode bits accordignly. * Later, fsnotify permission hooks do not check if there are permission event * watches, but that there were permission event watches at open time. */ -void file_set_fsnotify_mode_from_watchers(struct file *file) +int fsnotify_open_perm_and_set_mode(struct file *file) { struct dentry *dentry = file->f_path.dentry, *parent; struct super_block *sb = dentry->d_sb; - __u32 mnt_mask, p_mask; + __u32 mnt_mask, p_mask = 0; /* Is it a file opened by fanotify? */ if (FMODE_FSNOTIFY_NONE(file->f_mode)) - return; + return 0; /* * Permission events is a super set of pre-content events, so if there @@ -679,45 +679,64 @@ void file_set_fsnotify_mode_from_watchers(struct file *file) if (likely(!fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_CONTENT))) { file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM); - return; + return 0; } /* - * If there are permission event watchers but no pre-content event - * watchers, set FMODE_NONOTIFY | FMODE_NONOTIFY_PERM to indicate that. + * OK, there are some permission event watchers. Check if anybody is + * watching for permission events on *this* file. */ - if ((!d_is_dir(dentry) && !d_is_reg(dentry)) || - likely(!fsnotify_sb_has_priority_watchers(sb, - FSNOTIFY_PRIO_PRE_CONTENT))) { - file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM); - return; + mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask); + p_mask = fsnotify_object_watched(d_inode(dentry), mnt_mask, + ALL_FSNOTIFY_PERM_EVENTS); + if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) { + parent = dget_parent(dentry); + p_mask |= fsnotify_inode_watches_children(d_inode(parent)); + dput(parent); } /* - * OK, there are some pre-content watchers. Check if anybody is - * watching for pre-content events on *this* file. + * Legacy FAN_ACCESS_PERM events have very high performance overhead, + * so unlikely to be used in the wild. If they are used there will be + * no optimizations at all. */ - mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask); - if (unlikely(fsnotify_object_watched(d_inode(dentry), mnt_mask, - FSNOTIFY_PRE_CONTENT_EVENTS))) { - /* Enable pre-content events */ + if (unlikely(p_mask & FS_ACCESS_PERM)) { + /* Enable all permission and pre-content events */ file_set_fsnotify_mode(file, 0); - return; + goto open_perm; } - /* Is parent watching for pre-content events on this file? */ - if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) { - parent = dget_parent(dentry); - p_mask = fsnotify_inode_watches_children(d_inode(parent)); - dput(parent); - if (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS) { - /* Enable pre-content events */ - file_set_fsnotify_mode(file, 0); - return; - } + /* + * Pre-content events are only supported on regular files. + * If there are pre-content event watchers and no permission access + * watchers, set FMODE_NONOTIFY | FMODE_NONOTIFY_PERM to indicate that. + * That is the common case with HSM service. + */ + if (d_is_reg(dentry) && (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS)) { + file_set_fsnotify_mode(file, FMODE_NONOTIFY | + FMODE_NONOTIFY_PERM); + goto open_perm; } - /* Nobody watching for pre-content events from this file */ - file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM); + + /* Nobody watching permission and pre-content events on this file */ + file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM); + +open_perm: + /* + * Send open perm events depending on object masks and regardless of + * FMODE_NONOTIFY_PERM. + */ + if (file->f_flags & __FMODE_EXEC && p_mask & FS_OPEN_EXEC_PERM) { + int ret = fsnotify_path(&file->f_path, FS_OPEN_EXEC_PERM); + + if (ret) + return ret; + } + + if (p_mask & FS_OPEN_PERM) + return fsnotify_path(&file->f_path, FS_OPEN_PERM); + + return 0; } #endif |