diff options
Diffstat (limited to 'fs/open.c')
-rw-r--r-- | fs/open.c | 325 |
1 files changed, 172 insertions, 153 deletions
diff --git a/fs/open.c b/fs/open.c index ee8460c83c77..7828234a7caa 100644 --- a/fs/open.c +++ b/fs/open.c @@ -60,18 +60,21 @@ int do_truncate(struct mnt_idmap *idmap, struct dentry *dentry, if (ret) newattrs.ia_valid |= ret | ATTR_FORCE; - inode_lock(dentry->d_inode); + ret = inode_lock_killable(dentry->d_inode); + if (ret) + return ret; + /* Note any delegations or leases have already been broken: */ ret = notify_change(idmap, dentry, &newattrs, NULL); inode_unlock(dentry->d_inode); return ret; } -long vfs_truncate(const struct path *path, loff_t length) +int vfs_truncate(const struct path *path, loff_t length) { struct mnt_idmap *idmap; struct inode *inode; - long error; + int error; inode = path->dentry->d_inode; @@ -81,14 +84,18 @@ long vfs_truncate(const struct path *path, loff_t length) if (!S_ISREG(inode->i_mode)) return -EINVAL; - error = mnt_want_write(path->mnt); - if (error) - goto out; - idmap = mnt_idmap(path->mnt); error = inode_permission(idmap, inode, MAY_WRITE); if (error) - goto mnt_drop_write_and_out; + return error; + + error = fsnotify_truncate_perm(path, length); + if (error) + return error; + + error = mnt_want_write(path->mnt); + if (error) + return error; error = -EPERM; if (IS_APPEND(inode)) @@ -114,12 +121,12 @@ put_write_and_out: put_write_access(inode); mnt_drop_write_and_out: mnt_drop_write(path->mnt); -out: + return error; } EXPORT_SYMBOL_GPL(vfs_truncate); -long do_sys_truncate(const char __user *pathname, loff_t length) +int do_sys_truncate(const char __user *pathname, loff_t length) { unsigned int lookup_flags = LOOKUP_FOLLOW; struct path path; @@ -153,7 +160,7 @@ COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length } #endif -long do_ftruncate(struct file *file, loff_t length, int small) +int do_ftruncate(struct file *file, loff_t length, int small) { struct inode *inode; struct dentry *dentry; @@ -175,40 +182,41 @@ long do_ftruncate(struct file *file, loff_t length, int small) /* Check IS_APPEND on real upper inode */ if (IS_APPEND(file_inode(file))) return -EPERM; - sb_start_write(inode->i_sb); + error = security_file_truncate(file); - if (!error) - error = do_truncate(file_mnt_idmap(file), dentry, length, - ATTR_MTIME | ATTR_CTIME, file); + if (error) + return error; + + error = fsnotify_truncate_perm(&file->f_path, length); + if (error) + return error; + + sb_start_write(inode->i_sb); + error = do_truncate(file_mnt_idmap(file), dentry, length, + ATTR_MTIME | ATTR_CTIME, file); sb_end_write(inode->i_sb); return error; } -long do_sys_ftruncate(unsigned int fd, loff_t length, int small) +int do_sys_ftruncate(unsigned int fd, loff_t length, int small) { - struct fd f; - int error; - if (length < 0) return -EINVAL; - f = fdget(fd); - if (!f.file) + CLASS(fd, f)(fd); + if (fd_empty(f)) return -EBADF; - error = do_ftruncate(f.file, length, small); - - fdput(f); - return error; + return do_ftruncate(fd_file(f), length, small); } -SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length) +SYSCALL_DEFINE2(ftruncate, unsigned int, fd, off_t, length) { return do_sys_ftruncate(fd, length, 1); } #ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_ulong_t, length) +COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_off_t, length) { return do_sys_ftruncate(fd, length, 1); } @@ -246,45 +254,45 @@ COMPAT_SYSCALL_DEFINE3(ftruncate64, unsigned int, fd, int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); - long ret; + int ret; + loff_t sum; if (offset < 0 || len <= 0) return -EINVAL; - /* Return error if mode is not supported */ - if (mode & ~FALLOC_FL_SUPPORTED_MASK) + if (mode & ~(FALLOC_FL_MODE_MASK | FALLOC_FL_KEEP_SIZE)) return -EOPNOTSUPP; - /* Punch hole and zero range are mutually exclusive */ - if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) == - (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) - return -EOPNOTSUPP; - - /* Punch hole must have keep size set */ - if ((mode & FALLOC_FL_PUNCH_HOLE) && - !(mode & FALLOC_FL_KEEP_SIZE)) + /* + * Modes are exclusive, even if that is not obvious from the encoding + * as bit masks and the mix with the flag in the same namespace. + * + * To make things even more complicated, FALLOC_FL_ALLOCATE_RANGE is + * encoded as no bit set. + */ + switch (mode & FALLOC_FL_MODE_MASK) { + case FALLOC_FL_ALLOCATE_RANGE: + case FALLOC_FL_UNSHARE_RANGE: + case FALLOC_FL_ZERO_RANGE: + break; + case FALLOC_FL_PUNCH_HOLE: + if (!(mode & FALLOC_FL_KEEP_SIZE)) + return -EOPNOTSUPP; + break; + case FALLOC_FL_COLLAPSE_RANGE: + case FALLOC_FL_INSERT_RANGE: + if (mode & FALLOC_FL_KEEP_SIZE) + return -EOPNOTSUPP; + break; + default: return -EOPNOTSUPP; - - /* Collapse range should only be used exclusively. */ - if ((mode & FALLOC_FL_COLLAPSE_RANGE) && - (mode & ~FALLOC_FL_COLLAPSE_RANGE)) - return -EINVAL; - - /* Insert range should only be used exclusively. */ - if ((mode & FALLOC_FL_INSERT_RANGE) && - (mode & ~FALLOC_FL_INSERT_RANGE)) - return -EINVAL; - - /* Unshare range should only be used with allocate mode. */ - if ((mode & FALLOC_FL_UNSHARE_RANGE) && - (mode & ~(FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_KEEP_SIZE))) - return -EINVAL; + } if (!(file->f_mode & FMODE_WRITE)) return -EBADF; /* - * We can only allow pure fallocate on append only files + * On append-only files only space preallocation is supported. */ if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode)) return -EPERM; @@ -319,8 +327,11 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) return -ENODEV; - /* Check for wrap through zero too */ - if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) + /* Check for wraparound */ + if (check_add_overflow(offset, len, &sum)) + return -EFBIG; + + if (sum > inode->i_sb->s_maxbytes) return -EFBIG; if (!file->f_op->fallocate) @@ -346,14 +357,12 @@ EXPORT_SYMBOL_GPL(vfs_fallocate); int ksys_fallocate(int fd, int mode, loff_t offset, loff_t len) { - struct fd f = fdget(fd); - int error = -EBADF; + CLASS(fd, f)(fd); - if (f.file) { - error = vfs_fallocate(f.file, mode, offset, len); - fdput(f); - } - return error; + if (fd_empty(f)) + return -EBADF; + + return vfs_fallocate(fd_file(f), mode, offset, len); } SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len) @@ -407,7 +416,6 @@ static bool access_need_override_creds(int flags) static const struct cred *access_override_creds(void) { - const struct cred *old_cred; struct cred *override_cred; override_cred = prepare_creds(); @@ -452,16 +460,10 @@ static const struct cred *access_override_creds(void) * freeing. */ override_cred->non_rcu = 1; - - old_cred = override_creds(override_cred); - - /* override_cred() gets its own ref */ - put_cred(override_cred); - - return old_cred; + return override_creds(override_cred); } -static long do_faccessat(int dfd, const char __user *filename, int mode, int flags) +static int do_faccessat(int dfd, const char __user *filename, int mode, int flags) { struct path path; struct inode *inode; @@ -528,7 +530,7 @@ out_path_release: } out: if (old_cred) - revert_creds(old_cred); + put_cred(revert_creds(old_cred)); return res; } @@ -577,23 +579,18 @@ out: SYSCALL_DEFINE1(fchdir, unsigned int, fd) { - struct fd f = fdget_raw(fd); + CLASS(fd_raw, f)(fd); int error; - error = -EBADF; - if (!f.file) - goto out; + if (fd_empty(f)) + return -EBADF; - error = -ENOTDIR; - if (!d_can_lookup(f.file->f_path.dentry)) - goto out_putf; + if (!d_can_lookup(fd_file(f)->f_path.dentry)) + return -ENOTDIR; - error = file_permission(f.file, MAY_EXEC | MAY_CHDIR); + error = file_permission(fd_file(f), MAY_EXEC | MAY_CHDIR); if (!error) - set_fs_pwd(current->fs, &f.file->f_path); -out_putf: - fdput(f); -out: + set_fs_pwd(current->fs, &fd_file(f)->f_path); return error; } @@ -641,7 +638,9 @@ int chmod_common(const struct path *path, umode_t mode) if (error) return error; retry_deleg: - inode_lock(inode); + error = inode_lock_killable(inode); + if (error) + goto out_mnt_unlock; error = security_path_chmod(path, mode); if (error) goto out_unlock; @@ -656,6 +655,7 @@ out_unlock: if (!error) goto retry_deleg; } +out_mnt_unlock: mnt_drop_write(path->mnt); return error; } @@ -668,14 +668,12 @@ int vfs_fchmod(struct file *file, umode_t mode) SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode) { - struct fd f = fdget(fd); - int err = -EBADF; + CLASS(fd, f)(fd); - if (f.file) { - err = vfs_fchmod(f.file, mode); - fdput(f); - } - return err; + if (fd_empty(f)) + return -EBADF; + + return vfs_fchmod(fd_file(f), mode); } static int do_fchmodat(int dfd, const char __user *filename, umode_t mode, @@ -777,7 +775,9 @@ retry_deleg: return -EINVAL; if ((group != (gid_t)-1) && !setattr_vfsgid(&newattrs, gid)) return -EINVAL; - inode_lock(inode); + error = inode_lock_killable(inode); + if (error) + return error; if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV | setattr_should_drop_sgid(idmap, inode); @@ -862,14 +862,12 @@ int vfs_fchown(struct file *file, uid_t user, gid_t group) int ksys_fchown(unsigned int fd, uid_t user, gid_t group) { - struct fd f = fdget(fd); - int error = -EBADF; + CLASS(fd, f)(fd); - if (f.file) { - error = vfs_fchown(f.file, user, group); - fdput(f); - } - return error; + if (fd_empty(f)) + return -EBADF; + + return vfs_fchown(fd_file(f), user, group); } SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) @@ -902,10 +900,10 @@ cleanup_inode: } static int do_dentry_open(struct file *f, - struct inode *inode, int (*open)(struct inode *, struct file *)) { static const struct file_operations empty_fops = {}; + struct inode *inode = f->f_path.dentry->d_inode; int error; path_get(&f->f_path); @@ -916,6 +914,7 @@ static int do_dentry_open(struct file *f, if (unlikely(f->f_flags & O_PATH)) { f->f_mode = FMODE_PATH | FMODE_OPENED; + file_set_fsnotify_mode(f, FMODE_NONOTIFY); f->f_op = &empty_fops; return 0; } @@ -943,6 +942,16 @@ static int do_dentry_open(struct file *f, if (error) goto cleanup_all; + /* + * Set FMODE_NONOTIFY_* bits according to existing permission watches. + * If FMODE_NONOTIFY mode was already set for an fanotify fd or for a + * pseudo file, this call will not change the mode. + */ + file_set_fsnotify_mode_from_watchers(f); + error = fsnotify_open_perm(f); + if (error) + goto cleanup_all; + error = break_lease(file_inode(f), f->f_flags); if (error) goto cleanup_all; @@ -982,12 +991,11 @@ static int do_dentry_open(struct file *f, */ if (f->f_mode & FMODE_WRITE) { /* - * Paired with smp_mb() in collapse_file() to ensure nr_thps - * is up to date and the update to i_writecount by - * get_write_access() is visible. Ensures subsequent insertion - * of THPs into the page cache will fail. + * Depends on full fence from get_write_access() to synchronize + * against collapse_file() regarding i_writecount and nr_thps + * updates. Ensures subsequent insertion of THPs into the page + * cache will fail. */ - smp_mb(); if (filemap_nr_thps(inode->i_mapping)) { struct address_space *mapping = inode->i_mapping; @@ -1004,11 +1012,6 @@ static int do_dentry_open(struct file *f, } } - /* - * Once we return a file with FMODE_OPENED, __fput() will call - * fsnotify_close(), so we need fsnotify_open() here for symmetry. - */ - fsnotify_open(f); return 0; cleanup_all: @@ -1047,7 +1050,7 @@ int finish_open(struct file *file, struct dentry *dentry, BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */ file->f_path.dentry = dentry; - return do_dentry_open(file, d_backing_inode(dentry), open); + return do_dentry_open(file, open); } EXPORT_SYMBOL(finish_open); @@ -1085,8 +1088,19 @@ EXPORT_SYMBOL(file_path); */ int vfs_open(const struct path *path, struct file *file) { + int ret; + file->f_path = *path; - return do_dentry_open(file, d_backing_inode(path->dentry), NULL); + ret = do_dentry_open(file, NULL); + if (!ret) { + /* + * Once we return a file with FMODE_OPENED, __fput() will call + * fsnotify_close(), so we need fsnotify_open() here for + * symmetry. + */ + fsnotify_open(file); + } + return ret; } struct file *dentry_open(const struct path *path, int flags, @@ -1110,6 +1124,23 @@ struct file *dentry_open(const struct path *path, int flags, } EXPORT_SYMBOL(dentry_open); +struct file *dentry_open_nonotify(const struct path *path, int flags, + const struct cred *cred) +{ + struct file *f = alloc_empty_file(flags, cred); + if (!IS_ERR(f)) { + int error; + + file_set_fsnotify_mode(f, FMODE_NONOTIFY); + error = vfs_open(path, f); + if (error) { + fput(f); + f = ERR_PTR(error); + } + } + return f; +} + /** * dentry_create - Create and open a file * @path: path to create @@ -1155,7 +1186,6 @@ EXPORT_SYMBOL(dentry_create); * kernel_file_open - open a file for kernel internal use * @path: path of the file to open * @flags: open flags - * @inode: the inode * @cred: credentials for open * * Open a file for use by in-kernel consumers. The file is not accounted @@ -1165,7 +1195,7 @@ EXPORT_SYMBOL(dentry_create); * Return: Opened file on success, an error pointer on failure. */ struct file *kernel_file_open(const struct path *path, int flags, - struct inode *inode, const struct cred *cred) + const struct cred *cred) { struct file *f; int error; @@ -1175,11 +1205,13 @@ struct file *kernel_file_open(const struct path *path, int flags, return f; f->f_path = *path; - error = do_dentry_open(f, inode, NULL); + error = do_dentry_open(f, NULL); if (error) { fput(f); - f = ERR_PTR(error); + return ERR_PTR(error); } + + fsnotify_open(f); return f; } EXPORT_SYMBOL_GPL(kernel_file_open); @@ -1206,7 +1238,7 @@ inline struct open_how build_open_how(int flags, umode_t mode) inline int build_open_flags(const struct open_how *how, struct open_flags *op) { u64 flags = how->flags; - u64 strip = __FMODE_NONOTIFY | O_CLOEXEC; + u64 strip = O_CLOEXEC; int lookup_flags = 0; int acc_mode = ACC_MODE(flags); @@ -1214,9 +1246,7 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op) "struct open_flags doesn't yet handle flags > 32 bits"); /* - * Strip flags that either shouldn't be set by userspace like - * FMODE_NONOTIFY or that aren't relevant in determining struct - * open_flags like O_CLOEXEC. + * Strip flags that aren't relevant in determining struct open_flags. */ flags &= ~strip; @@ -1387,22 +1417,23 @@ struct file *file_open_root(const struct path *root, } EXPORT_SYMBOL(file_open_root); -static long do_sys_openat2(int dfd, const char __user *filename, - struct open_how *how) +static int do_sys_openat2(int dfd, const char __user *filename, + struct open_how *how) { struct open_flags op; - int fd = build_open_flags(how, &op); struct filename *tmp; + int err, fd; - if (fd) - return fd; + err = build_open_flags(how, &op); + if (unlikely(err)) + return err; tmp = getname(filename); if (IS_ERR(tmp)) return PTR_ERR(tmp); fd = get_unused_fd_flags(how->flags); - if (fd >= 0) { + if (likely(fd >= 0)) { struct file *f = do_filp_open(dfd, tmp, &op); if (IS_ERR(f)) { put_unused_fd(fd); @@ -1415,7 +1446,7 @@ static long do_sys_openat2(int dfd, const char __user *filename, return fd; } -long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) +int do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) { struct open_how how = build_open_how(flags, mode); return do_sys_openat2(dfd, filename, &how); @@ -1448,6 +1479,8 @@ SYSCALL_DEFINE4(openat2, int, dfd, const char __user *, filename, if (unlikely(usize < OPEN_HOW_SIZE_VER0)) return -EINVAL; + if (unlikely(usize > PAGE_SIZE)) + return -E2BIG; err = copy_struct_from_user(&tmp, sizeof(tmp), how, usize); if (err) @@ -1506,7 +1539,7 @@ static int filp_flush(struct file *filp, fl_owner_t id) { int retval = 0; - if (CHECK_DATA_CORRUPTION(file_count(filp) == 0, + if (CHECK_DATA_CORRUPTION(file_count(filp) == 0, filp, "VFS: Close: file count is 0 (f_op=%ps)", filp->f_op)) { return 0; @@ -1527,7 +1560,7 @@ int filp_close(struct file *filp, fl_owner_t id) int retval; retval = filp_flush(filp, id); - fput(filp); + fput_close(filp); return retval; } @@ -1553,35 +1586,21 @@ SYSCALL_DEFINE1(close, unsigned int, fd) * We're returning to user space. Don't bother * with any delayed fput() cases. */ - __fput_sync(file); + fput_close_sync(file); + + if (likely(retval == 0)) + return 0; /* can't restart close syscall because file table entry was cleared */ - if (unlikely(retval == -ERESTARTSYS || - retval == -ERESTARTNOINTR || - retval == -ERESTARTNOHAND || - retval == -ERESTART_RESTARTBLOCK)) + if (retval == -ERESTARTSYS || + retval == -ERESTARTNOINTR || + retval == -ERESTARTNOHAND || + retval == -ERESTART_RESTARTBLOCK) retval = -EINTR; return retval; } -/** - * sys_close_range() - Close all file descriptors in a given range. - * - * @fd: starting file descriptor to close - * @max_fd: last file descriptor to close - * @flags: reserved for future extensions - * - * This closes a range of file descriptors. All file descriptors - * from @fd up to and including @max_fd are closed. - * Currently, errors to close a given file descriptor are ignored. - */ -SYSCALL_DEFINE3(close_range, unsigned int, fd, unsigned int, max_fd, - unsigned int, flags) -{ - return __close_range(fd, max_fd, flags); -} - /* * This routine simulates a hangup on the tty, to arrange that users * are given clean terminals at login time. |