summaryrefslogtreecommitdiff
path: root/fs/open.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/open.c')
-rw-r--r--fs/open.c325
1 files changed, 172 insertions, 153 deletions
diff --git a/fs/open.c b/fs/open.c
index ee8460c83c77..7828234a7caa 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -60,18 +60,21 @@ int do_truncate(struct mnt_idmap *idmap, struct dentry *dentry,
if (ret)
newattrs.ia_valid |= ret | ATTR_FORCE;
- inode_lock(dentry->d_inode);
+ ret = inode_lock_killable(dentry->d_inode);
+ if (ret)
+ return ret;
+
/* Note any delegations or leases have already been broken: */
ret = notify_change(idmap, dentry, &newattrs, NULL);
inode_unlock(dentry->d_inode);
return ret;
}
-long vfs_truncate(const struct path *path, loff_t length)
+int vfs_truncate(const struct path *path, loff_t length)
{
struct mnt_idmap *idmap;
struct inode *inode;
- long error;
+ int error;
inode = path->dentry->d_inode;
@@ -81,14 +84,18 @@ long vfs_truncate(const struct path *path, loff_t length)
if (!S_ISREG(inode->i_mode))
return -EINVAL;
- error = mnt_want_write(path->mnt);
- if (error)
- goto out;
-
idmap = mnt_idmap(path->mnt);
error = inode_permission(idmap, inode, MAY_WRITE);
if (error)
- goto mnt_drop_write_and_out;
+ return error;
+
+ error = fsnotify_truncate_perm(path, length);
+ if (error)
+ return error;
+
+ error = mnt_want_write(path->mnt);
+ if (error)
+ return error;
error = -EPERM;
if (IS_APPEND(inode))
@@ -114,12 +121,12 @@ put_write_and_out:
put_write_access(inode);
mnt_drop_write_and_out:
mnt_drop_write(path->mnt);
-out:
+
return error;
}
EXPORT_SYMBOL_GPL(vfs_truncate);
-long do_sys_truncate(const char __user *pathname, loff_t length)
+int do_sys_truncate(const char __user *pathname, loff_t length)
{
unsigned int lookup_flags = LOOKUP_FOLLOW;
struct path path;
@@ -153,7 +160,7 @@ COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length
}
#endif
-long do_ftruncate(struct file *file, loff_t length, int small)
+int do_ftruncate(struct file *file, loff_t length, int small)
{
struct inode *inode;
struct dentry *dentry;
@@ -175,40 +182,41 @@ long do_ftruncate(struct file *file, loff_t length, int small)
/* Check IS_APPEND on real upper inode */
if (IS_APPEND(file_inode(file)))
return -EPERM;
- sb_start_write(inode->i_sb);
+
error = security_file_truncate(file);
- if (!error)
- error = do_truncate(file_mnt_idmap(file), dentry, length,
- ATTR_MTIME | ATTR_CTIME, file);
+ if (error)
+ return error;
+
+ error = fsnotify_truncate_perm(&file->f_path, length);
+ if (error)
+ return error;
+
+ sb_start_write(inode->i_sb);
+ error = do_truncate(file_mnt_idmap(file), dentry, length,
+ ATTR_MTIME | ATTR_CTIME, file);
sb_end_write(inode->i_sb);
return error;
}
-long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
+int do_sys_ftruncate(unsigned int fd, loff_t length, int small)
{
- struct fd f;
- int error;
-
if (length < 0)
return -EINVAL;
- f = fdget(fd);
- if (!f.file)
+ CLASS(fd, f)(fd);
+ if (fd_empty(f))
return -EBADF;
- error = do_ftruncate(f.file, length, small);
-
- fdput(f);
- return error;
+ return do_ftruncate(fd_file(f), length, small);
}
-SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length)
+SYSCALL_DEFINE2(ftruncate, unsigned int, fd, off_t, length)
{
return do_sys_ftruncate(fd, length, 1);
}
#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_ulong_t, length)
+COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_off_t, length)
{
return do_sys_ftruncate(fd, length, 1);
}
@@ -246,45 +254,45 @@ COMPAT_SYSCALL_DEFINE3(ftruncate64, unsigned int, fd,
int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
- long ret;
+ int ret;
+ loff_t sum;
if (offset < 0 || len <= 0)
return -EINVAL;
- /* Return error if mode is not supported */
- if (mode & ~FALLOC_FL_SUPPORTED_MASK)
+ if (mode & ~(FALLOC_FL_MODE_MASK | FALLOC_FL_KEEP_SIZE))
return -EOPNOTSUPP;
- /* Punch hole and zero range are mutually exclusive */
- if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
- (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
- return -EOPNOTSUPP;
-
- /* Punch hole must have keep size set */
- if ((mode & FALLOC_FL_PUNCH_HOLE) &&
- !(mode & FALLOC_FL_KEEP_SIZE))
+ /*
+ * Modes are exclusive, even if that is not obvious from the encoding
+ * as bit masks and the mix with the flag in the same namespace.
+ *
+ * To make things even more complicated, FALLOC_FL_ALLOCATE_RANGE is
+ * encoded as no bit set.
+ */
+ switch (mode & FALLOC_FL_MODE_MASK) {
+ case FALLOC_FL_ALLOCATE_RANGE:
+ case FALLOC_FL_UNSHARE_RANGE:
+ case FALLOC_FL_ZERO_RANGE:
+ break;
+ case FALLOC_FL_PUNCH_HOLE:
+ if (!(mode & FALLOC_FL_KEEP_SIZE))
+ return -EOPNOTSUPP;
+ break;
+ case FALLOC_FL_COLLAPSE_RANGE:
+ case FALLOC_FL_INSERT_RANGE:
+ if (mode & FALLOC_FL_KEEP_SIZE)
+ return -EOPNOTSUPP;
+ break;
+ default:
return -EOPNOTSUPP;
-
- /* Collapse range should only be used exclusively. */
- if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
- (mode & ~FALLOC_FL_COLLAPSE_RANGE))
- return -EINVAL;
-
- /* Insert range should only be used exclusively. */
- if ((mode & FALLOC_FL_INSERT_RANGE) &&
- (mode & ~FALLOC_FL_INSERT_RANGE))
- return -EINVAL;
-
- /* Unshare range should only be used with allocate mode. */
- if ((mode & FALLOC_FL_UNSHARE_RANGE) &&
- (mode & ~(FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_KEEP_SIZE)))
- return -EINVAL;
+ }
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
/*
- * We can only allow pure fallocate on append only files
+ * On append-only files only space preallocation is supported.
*/
if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
return -EPERM;
@@ -319,8 +327,11 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
return -ENODEV;
- /* Check for wrap through zero too */
- if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
+ /* Check for wraparound */
+ if (check_add_overflow(offset, len, &sum))
+ return -EFBIG;
+
+ if (sum > inode->i_sb->s_maxbytes)
return -EFBIG;
if (!file->f_op->fallocate)
@@ -346,14 +357,12 @@ EXPORT_SYMBOL_GPL(vfs_fallocate);
int ksys_fallocate(int fd, int mode, loff_t offset, loff_t len)
{
- struct fd f = fdget(fd);
- int error = -EBADF;
+ CLASS(fd, f)(fd);
- if (f.file) {
- error = vfs_fallocate(f.file, mode, offset, len);
- fdput(f);
- }
- return error;
+ if (fd_empty(f))
+ return -EBADF;
+
+ return vfs_fallocate(fd_file(f), mode, offset, len);
}
SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
@@ -407,7 +416,6 @@ static bool access_need_override_creds(int flags)
static const struct cred *access_override_creds(void)
{
- const struct cred *old_cred;
struct cred *override_cred;
override_cred = prepare_creds();
@@ -452,16 +460,10 @@ static const struct cred *access_override_creds(void)
* freeing.
*/
override_cred->non_rcu = 1;
-
- old_cred = override_creds(override_cred);
-
- /* override_cred() gets its own ref */
- put_cred(override_cred);
-
- return old_cred;
+ return override_creds(override_cred);
}
-static long do_faccessat(int dfd, const char __user *filename, int mode, int flags)
+static int do_faccessat(int dfd, const char __user *filename, int mode, int flags)
{
struct path path;
struct inode *inode;
@@ -528,7 +530,7 @@ out_path_release:
}
out:
if (old_cred)
- revert_creds(old_cred);
+ put_cred(revert_creds(old_cred));
return res;
}
@@ -577,23 +579,18 @@ out:
SYSCALL_DEFINE1(fchdir, unsigned int, fd)
{
- struct fd f = fdget_raw(fd);
+ CLASS(fd_raw, f)(fd);
int error;
- error = -EBADF;
- if (!f.file)
- goto out;
+ if (fd_empty(f))
+ return -EBADF;
- error = -ENOTDIR;
- if (!d_can_lookup(f.file->f_path.dentry))
- goto out_putf;
+ if (!d_can_lookup(fd_file(f)->f_path.dentry))
+ return -ENOTDIR;
- error = file_permission(f.file, MAY_EXEC | MAY_CHDIR);
+ error = file_permission(fd_file(f), MAY_EXEC | MAY_CHDIR);
if (!error)
- set_fs_pwd(current->fs, &f.file->f_path);
-out_putf:
- fdput(f);
-out:
+ set_fs_pwd(current->fs, &fd_file(f)->f_path);
return error;
}
@@ -641,7 +638,9 @@ int chmod_common(const struct path *path, umode_t mode)
if (error)
return error;
retry_deleg:
- inode_lock(inode);
+ error = inode_lock_killable(inode);
+ if (error)
+ goto out_mnt_unlock;
error = security_path_chmod(path, mode);
if (error)
goto out_unlock;
@@ -656,6 +655,7 @@ out_unlock:
if (!error)
goto retry_deleg;
}
+out_mnt_unlock:
mnt_drop_write(path->mnt);
return error;
}
@@ -668,14 +668,12 @@ int vfs_fchmod(struct file *file, umode_t mode)
SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
{
- struct fd f = fdget(fd);
- int err = -EBADF;
+ CLASS(fd, f)(fd);
- if (f.file) {
- err = vfs_fchmod(f.file, mode);
- fdput(f);
- }
- return err;
+ if (fd_empty(f))
+ return -EBADF;
+
+ return vfs_fchmod(fd_file(f), mode);
}
static int do_fchmodat(int dfd, const char __user *filename, umode_t mode,
@@ -777,7 +775,9 @@ retry_deleg:
return -EINVAL;
if ((group != (gid_t)-1) && !setattr_vfsgid(&newattrs, gid))
return -EINVAL;
- inode_lock(inode);
+ error = inode_lock_killable(inode);
+ if (error)
+ return error;
if (!S_ISDIR(inode->i_mode))
newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV |
setattr_should_drop_sgid(idmap, inode);
@@ -862,14 +862,12 @@ int vfs_fchown(struct file *file, uid_t user, gid_t group)
int ksys_fchown(unsigned int fd, uid_t user, gid_t group)
{
- struct fd f = fdget(fd);
- int error = -EBADF;
+ CLASS(fd, f)(fd);
- if (f.file) {
- error = vfs_fchown(f.file, user, group);
- fdput(f);
- }
- return error;
+ if (fd_empty(f))
+ return -EBADF;
+
+ return vfs_fchown(fd_file(f), user, group);
}
SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
@@ -902,10 +900,10 @@ cleanup_inode:
}
static int do_dentry_open(struct file *f,
- struct inode *inode,
int (*open)(struct inode *, struct file *))
{
static const struct file_operations empty_fops = {};
+ struct inode *inode = f->f_path.dentry->d_inode;
int error;
path_get(&f->f_path);
@@ -916,6 +914,7 @@ static int do_dentry_open(struct file *f,
if (unlikely(f->f_flags & O_PATH)) {
f->f_mode = FMODE_PATH | FMODE_OPENED;
+ file_set_fsnotify_mode(f, FMODE_NONOTIFY);
f->f_op = &empty_fops;
return 0;
}
@@ -943,6 +942,16 @@ static int do_dentry_open(struct file *f,
if (error)
goto cleanup_all;
+ /*
+ * Set FMODE_NONOTIFY_* bits according to existing permission watches.
+ * If FMODE_NONOTIFY mode was already set for an fanotify fd or for a
+ * pseudo file, this call will not change the mode.
+ */
+ file_set_fsnotify_mode_from_watchers(f);
+ error = fsnotify_open_perm(f);
+ if (error)
+ goto cleanup_all;
+
error = break_lease(file_inode(f), f->f_flags);
if (error)
goto cleanup_all;
@@ -982,12 +991,11 @@ static int do_dentry_open(struct file *f,
*/
if (f->f_mode & FMODE_WRITE) {
/*
- * Paired with smp_mb() in collapse_file() to ensure nr_thps
- * is up to date and the update to i_writecount by
- * get_write_access() is visible. Ensures subsequent insertion
- * of THPs into the page cache will fail.
+ * Depends on full fence from get_write_access() to synchronize
+ * against collapse_file() regarding i_writecount and nr_thps
+ * updates. Ensures subsequent insertion of THPs into the page
+ * cache will fail.
*/
- smp_mb();
if (filemap_nr_thps(inode->i_mapping)) {
struct address_space *mapping = inode->i_mapping;
@@ -1004,11 +1012,6 @@ static int do_dentry_open(struct file *f,
}
}
- /*
- * Once we return a file with FMODE_OPENED, __fput() will call
- * fsnotify_close(), so we need fsnotify_open() here for symmetry.
- */
- fsnotify_open(f);
return 0;
cleanup_all:
@@ -1047,7 +1050,7 @@ int finish_open(struct file *file, struct dentry *dentry,
BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
file->f_path.dentry = dentry;
- return do_dentry_open(file, d_backing_inode(dentry), open);
+ return do_dentry_open(file, open);
}
EXPORT_SYMBOL(finish_open);
@@ -1085,8 +1088,19 @@ EXPORT_SYMBOL(file_path);
*/
int vfs_open(const struct path *path, struct file *file)
{
+ int ret;
+
file->f_path = *path;
- return do_dentry_open(file, d_backing_inode(path->dentry), NULL);
+ ret = do_dentry_open(file, NULL);
+ if (!ret) {
+ /*
+ * Once we return a file with FMODE_OPENED, __fput() will call
+ * fsnotify_close(), so we need fsnotify_open() here for
+ * symmetry.
+ */
+ fsnotify_open(file);
+ }
+ return ret;
}
struct file *dentry_open(const struct path *path, int flags,
@@ -1110,6 +1124,23 @@ struct file *dentry_open(const struct path *path, int flags,
}
EXPORT_SYMBOL(dentry_open);
+struct file *dentry_open_nonotify(const struct path *path, int flags,
+ const struct cred *cred)
+{
+ struct file *f = alloc_empty_file(flags, cred);
+ if (!IS_ERR(f)) {
+ int error;
+
+ file_set_fsnotify_mode(f, FMODE_NONOTIFY);
+ error = vfs_open(path, f);
+ if (error) {
+ fput(f);
+ f = ERR_PTR(error);
+ }
+ }
+ return f;
+}
+
/**
* dentry_create - Create and open a file
* @path: path to create
@@ -1155,7 +1186,6 @@ EXPORT_SYMBOL(dentry_create);
* kernel_file_open - open a file for kernel internal use
* @path: path of the file to open
* @flags: open flags
- * @inode: the inode
* @cred: credentials for open
*
* Open a file for use by in-kernel consumers. The file is not accounted
@@ -1165,7 +1195,7 @@ EXPORT_SYMBOL(dentry_create);
* Return: Opened file on success, an error pointer on failure.
*/
struct file *kernel_file_open(const struct path *path, int flags,
- struct inode *inode, const struct cred *cred)
+ const struct cred *cred)
{
struct file *f;
int error;
@@ -1175,11 +1205,13 @@ struct file *kernel_file_open(const struct path *path, int flags,
return f;
f->f_path = *path;
- error = do_dentry_open(f, inode, NULL);
+ error = do_dentry_open(f, NULL);
if (error) {
fput(f);
- f = ERR_PTR(error);
+ return ERR_PTR(error);
}
+
+ fsnotify_open(f);
return f;
}
EXPORT_SYMBOL_GPL(kernel_file_open);
@@ -1206,7 +1238,7 @@ inline struct open_how build_open_how(int flags, umode_t mode)
inline int build_open_flags(const struct open_how *how, struct open_flags *op)
{
u64 flags = how->flags;
- u64 strip = __FMODE_NONOTIFY | O_CLOEXEC;
+ u64 strip = O_CLOEXEC;
int lookup_flags = 0;
int acc_mode = ACC_MODE(flags);
@@ -1214,9 +1246,7 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op)
"struct open_flags doesn't yet handle flags > 32 bits");
/*
- * Strip flags that either shouldn't be set by userspace like
- * FMODE_NONOTIFY or that aren't relevant in determining struct
- * open_flags like O_CLOEXEC.
+ * Strip flags that aren't relevant in determining struct open_flags.
*/
flags &= ~strip;
@@ -1387,22 +1417,23 @@ struct file *file_open_root(const struct path *root,
}
EXPORT_SYMBOL(file_open_root);
-static long do_sys_openat2(int dfd, const char __user *filename,
- struct open_how *how)
+static int do_sys_openat2(int dfd, const char __user *filename,
+ struct open_how *how)
{
struct open_flags op;
- int fd = build_open_flags(how, &op);
struct filename *tmp;
+ int err, fd;
- if (fd)
- return fd;
+ err = build_open_flags(how, &op);
+ if (unlikely(err))
+ return err;
tmp = getname(filename);
if (IS_ERR(tmp))
return PTR_ERR(tmp);
fd = get_unused_fd_flags(how->flags);
- if (fd >= 0) {
+ if (likely(fd >= 0)) {
struct file *f = do_filp_open(dfd, tmp, &op);
if (IS_ERR(f)) {
put_unused_fd(fd);
@@ -1415,7 +1446,7 @@ static long do_sys_openat2(int dfd, const char __user *filename,
return fd;
}
-long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
+int do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{
struct open_how how = build_open_how(flags, mode);
return do_sys_openat2(dfd, filename, &how);
@@ -1448,6 +1479,8 @@ SYSCALL_DEFINE4(openat2, int, dfd, const char __user *, filename,
if (unlikely(usize < OPEN_HOW_SIZE_VER0))
return -EINVAL;
+ if (unlikely(usize > PAGE_SIZE))
+ return -E2BIG;
err = copy_struct_from_user(&tmp, sizeof(tmp), how, usize);
if (err)
@@ -1506,7 +1539,7 @@ static int filp_flush(struct file *filp, fl_owner_t id)
{
int retval = 0;
- if (CHECK_DATA_CORRUPTION(file_count(filp) == 0,
+ if (CHECK_DATA_CORRUPTION(file_count(filp) == 0, filp,
"VFS: Close: file count is 0 (f_op=%ps)",
filp->f_op)) {
return 0;
@@ -1527,7 +1560,7 @@ int filp_close(struct file *filp, fl_owner_t id)
int retval;
retval = filp_flush(filp, id);
- fput(filp);
+ fput_close(filp);
return retval;
}
@@ -1553,35 +1586,21 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
* We're returning to user space. Don't bother
* with any delayed fput() cases.
*/
- __fput_sync(file);
+ fput_close_sync(file);
+
+ if (likely(retval == 0))
+ return 0;
/* can't restart close syscall because file table entry was cleared */
- if (unlikely(retval == -ERESTARTSYS ||
- retval == -ERESTARTNOINTR ||
- retval == -ERESTARTNOHAND ||
- retval == -ERESTART_RESTARTBLOCK))
+ if (retval == -ERESTARTSYS ||
+ retval == -ERESTARTNOINTR ||
+ retval == -ERESTARTNOHAND ||
+ retval == -ERESTART_RESTARTBLOCK)
retval = -EINTR;
return retval;
}
-/**
- * sys_close_range() - Close all file descriptors in a given range.
- *
- * @fd: starting file descriptor to close
- * @max_fd: last file descriptor to close
- * @flags: reserved for future extensions
- *
- * This closes a range of file descriptors. All file descriptors
- * from @fd up to and including @max_fd are closed.
- * Currently, errors to close a given file descriptor are ignored.
- */
-SYSCALL_DEFINE3(close_range, unsigned int, fd, unsigned int, max_fd,
- unsigned int, flags)
-{
- return __close_range(fd, max_fd, flags);
-}
-
/*
* This routine simulates a hangup on the tty, to arrange that users
* are given clean terminals at login time.