diff options
author | Ingo Molnar <mingo@kernel.org> | 2020-01-20 08:43:44 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2020-01-20 08:43:44 +0100 |
commit | cb6c82df684e912b10245c13200ef09c9d372fc2 (patch) | |
tree | 98448cea990ecf17ad39798980ab18165b72b7ac /fs | |
parent | 5738891229a25e9e678122a843cbf0466a456d0c (diff) | |
parent | def9d2780727cec3313ed3522d0123158d87224d (diff) |
Merge tag 'v5.5-rc7' into perf/core, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/afs/dir.c | 18 | ||||
-rw-r--r-- | fs/btrfs/compression.c | 7 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 79 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 14 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 6 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 51 | ||||
-rw-r--r-- | fs/btrfs/root-tree.c | 10 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 6 | ||||
-rw-r--r-- | fs/buffer.c | 33 | ||||
-rw-r--r-- | fs/char_dev.c | 2 | ||||
-rw-r--r-- | fs/cifs/cifsglob.h | 1 | ||||
-rw-r--r-- | fs/cifs/readdir.c | 63 | ||||
-rw-r--r-- | fs/cifs/smb2file.c | 2 | ||||
-rw-r--r-- | fs/direct-io.c | 2 | ||||
-rw-r--r-- | fs/file.c | 7 | ||||
-rw-r--r-- | fs/fuse/file.c | 4 | ||||
-rw-r--r-- | fs/hugetlbfs/inode.c | 4 | ||||
-rw-r--r-- | fs/internal.h | 2 | ||||
-rw-r--r-- | fs/io-wq.c | 22 | ||||
-rw-r--r-- | fs/io_uring.c | 774 | ||||
-rw-r--r-- | fs/locks.c | 2 | ||||
-rw-r--r-- | fs/mpage.c | 2 | ||||
-rw-r--r-- | fs/namei.c | 90 | ||||
-rw-r--r-- | fs/namespace.c | 2 | ||||
-rw-r--r-- | fs/nfs/nfstrace.h | 2 | ||||
-rw-r--r-- | fs/nsfs.c | 3 | ||||
-rw-r--r-- | fs/ocfs2/dlmglue.c | 1 | ||||
-rw-r--r-- | fs/ocfs2/journal.c | 8 | ||||
-rw-r--r-- | fs/posix_acl.c | 7 | ||||
-rw-r--r-- | fs/pstore/ram.c | 13 | ||||
-rw-r--r-- | fs/pstore/ram_core.c | 2 |
31 files changed, 668 insertions, 571 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 497f979018c2..5c794f4b051a 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -908,6 +908,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct afs_vnode *dvnode = AFS_FS_I(dir); + struct afs_fid fid = {}; struct inode *inode; struct dentry *d; struct key *key; @@ -951,21 +952,18 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, afs_stat_v(dvnode, n_lookup); inode = afs_do_lookup(dir, dentry, key); key_put(key); - if (inode == ERR_PTR(-ENOENT)) { + if (inode == ERR_PTR(-ENOENT)) inode = afs_try_auto_mntpt(dentry, dir); - } else { - dentry->d_fsdata = - (void *)(unsigned long)dvnode->status.data_version; - } + + if (!IS_ERR_OR_NULL(inode)) + fid = AFS_FS_I(inode)->fid; + d = d_splice_alias(inode, dentry); if (!IS_ERR_OR_NULL(d)) { d->d_fsdata = dentry->d_fsdata; - trace_afs_lookup(dvnode, &d->d_name, - inode ? AFS_FS_I(inode) : NULL); + trace_afs_lookup(dvnode, &d->d_name, &fid); } else { - trace_afs_lookup(dvnode, &dentry->d_name, - IS_ERR_OR_NULL(inode) ? NULL - : AFS_FS_I(inode)); + trace_afs_lookup(dvnode, &dentry->d_name, &fid); } return d; } diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index ee834ef7beb4..43e1660f450f 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -447,7 +447,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, if (blkcg_css) { bio->bi_opf |= REQ_CGROUP_PUNT; - bio_associate_blkg_from_css(bio, blkcg_css); + kthread_associate_blkcg(blkcg_css); } refcount_set(&cb->pending_bios, 1); @@ -491,6 +491,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, bio->bi_opf = REQ_OP_WRITE | write_flags; bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; + if (blkcg_css) + bio->bi_opf |= REQ_CGROUP_PUNT; bio_add_page(bio, page, PAGE_SIZE, 0); } if (bytes_left < PAGE_SIZE) { @@ -517,6 +519,9 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, bio_endio(bio); } + if (blkcg_css) + kthread_associate_blkcg(NULL); + return 0; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e3c76645cad7..c70baafb2a39 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1479,10 +1479,10 @@ next_slot: disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); /* - * If extent we got ends before our range starts, skip - * to next extent + * If the extent we got ends before our current offset, + * skip to the next extent. */ - if (extent_end <= start) { + if (extent_end <= cur_offset) { path->slots[0]++; goto next_slot; } @@ -4238,18 +4238,30 @@ out: } static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, - struct inode *dir, u64 objectid, - const char *name, int name_len) + struct inode *dir, struct dentry *dentry) { struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_inode *inode = BTRFS_I(d_inode(dentry)); struct btrfs_path *path; struct extent_buffer *leaf; struct btrfs_dir_item *di; struct btrfs_key key; + const char *name = dentry->d_name.name; + int name_len = dentry->d_name.len; u64 index; int ret; + u64 objectid; u64 dir_ino = btrfs_ino(BTRFS_I(dir)); + if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) { + objectid = inode->root->root_key.objectid; + } else if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) { + objectid = inode->location.objectid; + } else { + WARN_ON(1); + return -EINVAL; + } + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -4271,13 +4283,16 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, } btrfs_release_path(path); - ret = btrfs_del_root_ref(trans, objectid, root->root_key.objectid, - dir_ino, &index, name, name_len); - if (ret < 0) { - if (ret != -ENOENT) { - btrfs_abort_transaction(trans, ret); - goto out; - } + /* + * This is a placeholder inode for a subvolume we didn't have a + * reference to at the time of the snapshot creation. In the meantime + * we could have renamed the real subvol link into our snapshot, so + * depending on btrfs_del_root_ref to return -ENOENT here is incorret. + * Instead simply lookup the dir_index_item for this entry so we can + * remove it. Otherwise we know we have a ref to the root and we can + * call btrfs_del_root_ref, and it _shouldn't_ fail. + */ + if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) { di = btrfs_search_dir_index_item(root, path, dir_ino, name, name_len); if (IS_ERR_OR_NULL(di)) { @@ -4292,8 +4307,16 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); index = key.offset; + btrfs_release_path(path); + } else { + ret = btrfs_del_root_ref(trans, objectid, + root->root_key.objectid, dir_ino, + &index, name, name_len); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out; + } } - btrfs_release_path(path); ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index); if (ret) { @@ -4487,8 +4510,7 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) btrfs_record_snapshot_destroy(trans, BTRFS_I(dir)); - ret = btrfs_unlink_subvol(trans, dir, dest->root_key.objectid, - dentry->d_name.name, dentry->d_name.len); + ret = btrfs_unlink_subvol(trans, dir, dentry); if (ret) { err = ret; btrfs_abort_transaction(trans, ret); @@ -4583,10 +4605,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) return PTR_ERR(trans); if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { - err = btrfs_unlink_subvol(trans, dir, - BTRFS_I(inode)->location.objectid, - dentry->d_name.name, - dentry->d_name.len); + err = btrfs_unlink_subvol(trans, dir, dentry); goto out; } @@ -9536,7 +9555,6 @@ static int btrfs_rename_exchange(struct inode *old_dir, u64 new_ino = btrfs_ino(BTRFS_I(new_inode)); u64 old_idx = 0; u64 new_idx = 0; - u64 root_objectid; int ret; bool root_log_pinned = false; bool dest_log_pinned = false; @@ -9642,10 +9660,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, /* src is a subvolume */ if (old_ino == BTRFS_FIRST_FREE_OBJECTID) { - root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; - ret = btrfs_unlink_subvol(trans, old_dir, root_objectid, - old_dentry->d_name.name, - old_dentry->d_name.len); + ret = btrfs_unlink_subvol(trans, old_dir, old_dentry); } else { /* src is an inode */ ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir), BTRFS_I(old_dentry->d_inode), @@ -9661,10 +9676,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, /* dest is a subvolume */ if (new_ino == BTRFS_FIRST_FREE_OBJECTID) { - root_objectid = BTRFS_I(new_inode)->root->root_key.objectid; - ret = btrfs_unlink_subvol(trans, new_dir, root_objectid, - new_dentry->d_name.name, - new_dentry->d_name.len); + ret = btrfs_unlink_subvol(trans, new_dir, new_dentry); } else { /* dest is an inode */ ret = __btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir), BTRFS_I(new_dentry->d_inode), @@ -9862,7 +9874,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_inode = d_inode(new_dentry); struct inode *old_inode = d_inode(old_dentry); u64 index = 0; - u64 root_objectid; int ret; u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); bool log_pinned = false; @@ -9970,10 +9981,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, BTRFS_I(old_inode), 1); if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { - root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; - ret = btrfs_unlink_subvol(trans, old_dir, root_objectid, - old_dentry->d_name.name, - old_dentry->d_name.len); + ret = btrfs_unlink_subvol(trans, old_dir, old_dentry); } else { ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir), BTRFS_I(d_inode(old_dentry)), @@ -9992,10 +10000,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_inode->i_ctime = current_time(new_inode); if (unlikely(btrfs_ino(BTRFS_I(new_inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { - root_objectid = BTRFS_I(new_inode)->location.objectid; - ret = btrfs_unlink_subvol(trans, new_dir, root_objectid, - new_dentry->d_name.name, - new_dentry->d_name.len); + ret = btrfs_unlink_subvol(trans, new_dir, new_dentry); BUG_ON(new_inode->i_nlink == 0); } else { ret = btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir), diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 18e328ce4b54..12ae31e1813e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4252,7 +4252,19 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg) &sa->progress, sa->flags & BTRFS_SCRUB_READONLY, 0); - if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa))) + /* + * Copy scrub args to user space even if btrfs_scrub_dev() returned an + * error. This is important as it allows user space to know how much + * progress scrub has done. For example, if scrub is canceled we get + * -ECANCELED from btrfs_scrub_dev() and return that error back to user + * space. Later user space can inspect the progress from the structure + * btrfs_ioctl_scrub_args and resume scrub from where it left off + * previously (btrfs-progs does this). + * If we fail to copy the btrfs_ioctl_scrub_args structure to user space + * then return -EFAULT to signal the structure was not copied or it may + * be corrupt and unreliable due to a partial copy. + */ + if (copy_to_user(arg, sa, sizeof(*sa))) ret = -EFAULT; if (!(sa->flags & BTRFS_SCRUB_READONLY)) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index d4282e12f2a6..39fc8c3d3a75 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2423,8 +2423,12 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 nr_old_roots = 0; int ret = 0; + /* + * If quotas get disabled meanwhile, the resouces need to be freed and + * we can't just exit here. + */ if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) - return 0; + goto out_free; if (new_roots) { if (!maybe_fs_roots(new_roots)) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index c58245797f30..da5abd62db22 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -517,6 +517,34 @@ static int update_backref_cache(struct btrfs_trans_handle *trans, return 1; } +static bool reloc_root_is_dead(struct btrfs_root *root) +{ + /* + * Pair with set_bit/clear_bit in clean_dirty_subvols and + * btrfs_update_reloc_root. We need to see the updated bit before + * trying to access reloc_root + */ + smp_rmb(); + if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state)) + return true; + return false; +} + +/* + * Check if this subvolume tree has valid reloc tree. + * + * Reloc tree after swap is considered dead, thus not considered as valid. + * This is enough for most callers, as they don't distinguish dead reloc root + * from no reloc root. But should_ignore_root() below is a special case. + */ +static bool have_reloc_root(struct btrfs_root *root) +{ + if (reloc_root_is_dead(root)) + return false; + if (!root->reloc_root) + return false; + return true; +} static int should_ignore_root(struct btrfs_root *root) { @@ -525,6 +553,10 @@ static int should_ignore_root(struct btrfs_root *root) if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) return 0; + /* This root has been merged with its reloc tree, we can ignore it */ + if (reloc_root_is_dead(root)) + return 1; + reloc_root = root->reloc_root; if (!reloc_root) return 0; @@ -1439,7 +1471,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, * The subvolume has reloc tree but the swap is finished, no need to * create/update the dead reloc tree */ - if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state)) + if (reloc_root_is_dead(root)) return 0; if (root->reloc_root) { @@ -1478,8 +1510,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, struct btrfs_root_item *root_item; int ret; - if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state) || - !root->reloc_root) + if (!have_reloc_root(root)) goto out; reloc_root = root->reloc_root; @@ -1489,6 +1520,11 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, if (fs_info->reloc_ctl->merge_reloc_tree && btrfs_root_refs(root_item) == 0) { set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state); + /* + * Mark the tree as dead before we change reloc_root so + * have_reloc_root will not touch it from now on. + */ + smp_wmb(); __del_reloc_root(reloc_root); } @@ -2201,6 +2237,11 @@ static int clean_dirty_subvols(struct reloc_control *rc) if (ret2 < 0 && !ret) ret = ret2; } + /* + * Need barrier to ensure clear_bit() only happens after + * root->reloc_root = NULL. Pairs with have_reloc_root. + */ + smp_wmb(); clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state); btrfs_put_fs_root(root); } else { @@ -4718,7 +4759,7 @@ void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending, struct btrfs_root *root = pending->root; struct reloc_control *rc = root->fs_info->reloc_ctl; - if (!root->reloc_root || !rc) + if (!rc || !have_reloc_root(root)) return; if (!rc->merge_reloc_tree) @@ -4752,7 +4793,7 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, struct reloc_control *rc = root->fs_info->reloc_ctl; int ret; - if (!root->reloc_root || !rc) + if (!rc || !have_reloc_root(root)) return 0; rc = root->fs_info->reloc_ctl; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 3b17b647d002..612411c74550 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -376,11 +376,13 @@ again: leaf = path->nodes[0]; ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); - - WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid); - WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len); ptr = (unsigned long)(ref + 1); - WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len)); + if ((btrfs_root_ref_dirid(leaf, ref) != dirid) || + (btrfs_root_ref_name_len(leaf, ref) != name_len) || + memcmp_extent_buffer(leaf, name, ptr, name_len)) { + err = -ENOENT; + goto out; + } *sequence = btrfs_root_ref_sequence(leaf, ref); ret = btrfs_del_item(trans, tree_root, path); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a6d3f08bfff3..9b78e720c697 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3881,7 +3881,11 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, } } - num_devices = btrfs_num_devices(fs_info); + /* + * rw_devices will not change at the moment, device add/delete/replace + * are excluded by EXCL_OP + */ + num_devices = fs_info->fs_devices->rw_devices; /* * SINGLE profile on-disk has no profile bit, but in-memory we have a diff --git a/fs/buffer.c b/fs/buffer.c index d8c7242426bb..18a87ec8a465 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3031,11 +3031,9 @@ static void end_bio_bh_io_sync(struct bio *bio) * errors, this only handles the "we need to be able to * do IO at the final sector" case. */ -void guard_bio_eod(int op, struct bio *bio) +void guard_bio_eod(struct bio *bio) { sector_t maxsector; - struct bio_vec *bvec = bio_last_bvec_all(bio); - unsigned truncated_bytes; struct hd_struct *part; rcu_read_lock(); @@ -3061,28 +3059,7 @@ void guard_bio_eod(int op, struct bio *bio) if (likely((bio->bi_iter.bi_size >> 9) <= maxsector)) return; - /* Uhhuh. We've got a bio that straddles the device size! */ - truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9); - - /* - * The bio contains more than one segment which spans EOD, just return - * and let IO layer turn it into an EIO - */ - if (truncated_bytes > bvec->bv_len) - return; - - /* Truncate the bio.. */ - bio->bi_iter.bi_size -= truncated_bytes; - bvec->bv_len -= truncated_bytes; - - /* ..and clear the end of the buffer for reads */ - if (op == REQ_OP_READ) { - struct bio_vec bv; - - mp_bvec_last_segment(bvec, &bv); - zero_user(bv.bv_page, bv.bv_offset + bv.bv_len, - truncated_bytes); - } + bio_truncate(bio, maxsector << 9); } static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, @@ -3118,15 +3095,15 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, bio->bi_end_io = end_bio_bh_io_sync; bio->bi_private = bh; - /* Take care of bh's that straddle the end of the device */ - guard_bio_eod(op, bio); - if (buffer_meta(bh)) op_flags |= REQ_META; if (buffer_prio(bh)) op_flags |= REQ_PRIO; bio_set_op_attrs(bio, op, op_flags); + /* Take care of bh's that straddle the end of the device */ + guard_bio_eod(bio); + if (wbc) { wbc_init_bio(wbc, bio); wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size); diff --git a/fs/char_dev.c b/fs/char_dev.c index 00dfe17871ac..c5e6eff5a381 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -352,7 +352,7 @@ static struct kobject *cdev_get(struct cdev *p) if (owner && !try_module_get(owner)) return NULL; - kobj = kobject_get(&p->kobj); + kobj = kobject_get_unless_zero(&p->kobj); if (!kobj) module_put(owner); return kobj; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index ce9bac756c2a..40705e862451 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1693,6 +1693,7 @@ struct cifs_fattr { struct timespec64 cf_atime; struct timespec64 cf_mtime; struct timespec64 cf_ctime; + u32 cf_cifstag; }; static inline void free_dfs_info_param(struct dfs_info3_param *param) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 3925a7bfc74d..d17587c2c4ab 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -139,6 +139,28 @@ retry: dput(dentry); } +static bool reparse_file_needs_reval(const struct cifs_fattr *fattr) +{ + if (!(fattr->cf_cifsattrs & ATTR_REPARSE)) + return false; + /* + * The DFS tags should be only intepreted by server side as per + * MS-FSCC 2.1.2.1, but let's include them anyway. + * + * Besides, if cf_cifstag is unset (0), then we still need it to be + * revalidated to know exactly what reparse point it is. + */ + switch (fattr->cf_cifstag) { + case IO_REPARSE_TAG_DFS: + case IO_REPARSE_TAG_DFSR: + case IO_REPARSE_TAG_SYMLINK: + case IO_REPARSE_TAG_NFS: + case 0: + return true; + } + return false; +} + static void cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) { @@ -158,7 +180,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) * is a symbolic link, DFS referral or a reparse point with a direct * access like junctions, deduplicated files, NFS symlinks. */ - if (fattr->cf_cifsattrs & ATTR_REPARSE) + if (reparse_file_needs_reval(fattr)) fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; /* non-unix readdir doesn't provide nlink */ @@ -194,19 +216,37 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) } } +static void __dir_info_to_fattr(struct cifs_fattr *fattr, const void *info) +{ + const FILE_DIRECTORY_INFO *fi = info; + + memset(fattr, 0, sizeof(*fattr)); + fattr->cf_cifsattrs = le32_to_cpu(fi->ExtFileAttributes); + fattr->cf_eof = le64_to_cpu(fi->EndOfFile); + fattr->cf_bytes = le64_to_cpu(fi->AllocationSize); + fattr->cf_createtime = le64_to_cpu(fi->CreationTime); + fattr->cf_atime = cifs_NTtimeToUnix(fi->LastAccessTime); + fattr->cf_ctime = cifs_NTtimeToUnix(fi->ChangeTime); + fattr->cf_mtime = cifs_NTtimeToUnix(fi->LastWriteTime); +} + void cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, struct cifs_sb_info *cifs_sb) { - memset(fattr, 0, sizeof(*fattr)); - fattr->cf_cifsattrs = le32_to_cpu(info->ExtFileAttributes); - fattr->cf_eof = le64_to_cpu(info->EndOfFile); - fattr->cf_bytes = le64_to_cpu(info->AllocationSize); - fattr->cf_createtime = le64_to_cpu(info->CreationTime); - fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime); - fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime); - fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime); + __dir_info_to_fattr(fattr, info); + cifs_fill_common_info(fattr, cifs_sb); +} +static void cifs_fulldir_info_to_fattr(struct cifs_fattr *fattr, + SEARCH_ID_FULL_DIR_INFO *info, + struct cifs_sb_info *cifs_sb) +{ + __dir_info_to_fattr(fattr, info); + + /* See MS-FSCC 2.4.18 FileIdFullDirectoryInformation */ + if (fattr->cf_cifsattrs & ATTR_REPARSE) + fattr->cf_cifstag = le32_to_cpu(info->EaSize); cifs_fill_common_info(fattr, cifs_sb); } @@ -755,6 +795,11 @@ static int cifs_filldir(char *find_entry, struct file *file, (FIND_FILE_STANDARD_INFO *)find_entry, cifs_sb); break; + case SMB_FIND_FILE_ID_FULL_DIR_INFO: + cifs_fulldir_info_to_fattr(&fattr, + (SEARCH_ID_FULL_DIR_INFO *)find_entry, + cifs_sb); + break; default: cifs_dir_info_to_fattr(&fattr, (FILE_DIRECTORY_INFO *)find_entry, diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 8b0b512c5792..afe1f03aabe3 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -67,7 +67,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, goto out; - if (oparms->tcon->use_resilient) { + if (oparms->tcon->use_resilient) { /* default timeout is 0, servers pick default (120 seconds) */ nr_ioctl_req.Timeout = cpu_to_le32(oparms->tcon->handle_timeout); diff --git a/fs/direct-io.c b/fs/direct-io.c index 0ec4f270139f..00b4d15bb811 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -39,6 +39,8 @@ #include <linux/atomic.h> #include <linux/prefetch.h> +#include "internal.h" + /* * How many user pages to map in one call to get_user_pages(). This determines * the size of a structure in the slab cache diff --git a/fs/file.c b/fs/file.c index 2f4fcf985079..3da91a112bab 100644 --- a/fs/file.c +++ b/fs/file.c @@ -960,7 +960,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) return ksys_dup3(oldfd, newfd, 0); } -SYSCALL_DEFINE1(dup, unsigned int, fildes) +int ksys_dup(unsigned int fildes) { int ret = -EBADF; struct file *file = fget_raw(fildes); @@ -975,6 +975,11 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes) return ret; } +SYSCALL_DEFINE1(dup, unsigned int, fildes) +{ + return ksys_dup(fildes); +} + int f_dupfd(unsigned int from, struct file *file, unsigned flags) { int err; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index a63d779eac10..ce715380143c 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -882,6 +882,7 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file) struct fuse_args_pages *ap = &ia->ap; loff_t pos = page_offset(ap->pages[0]); size_t count = ap->num_pages << PAGE_SHIFT; + ssize_t res; int err; ap->args.out_pages = true; @@ -896,7 +897,8 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file) if (!err) return; } else { - err = fuse_simple_request(fc, &ap->args); + res = fuse_simple_request(fc, &ap->args); + err = res < 0 ? res : 0; } fuse_readpages_end(fc, &ap->args, err); } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index d5c2a3158610..a66e425884d1 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1498,8 +1498,10 @@ static int __init init_hugetlbfs_fs(void) /* other hstates are optional */ i = 0; for_each_hstate(h) { - if (i == default_hstate_idx) + if (i == default_hstate_idx) { + i++; continue; + } mnt = mount_one_hugetlbfs(h); if (IS_ERR(mnt)) diff --git a/fs/internal.h b/fs/internal.h index 4a7da1df573d..e3fa69544b66 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -38,7 +38,7 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait) /* * buffer.c */ -extern void guard_bio_eod(int rw, struct bio *bio); +extern void guard_bio_eod(struct bio *bio); extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len, get_block_t *get_block, struct iomap *iomap); diff --git a/fs/io-wq.c b/fs/io-wq.c index 11e80b7252a8..5147d2213b01 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -92,7 +92,6 @@ struct io_wqe { struct io_wqe_acct acct[2]; struct hlist_nulls_head free_list; - struct hlist_nulls_head busy_list; struct list_head all_list; struct io_wq *wq; @@ -327,7 +326,6 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker, if (worker->flags & IO_WORKER_F_FREE) { worker->flags &= ~IO_WORKER_F_FREE; hlist_nulls_del_init_rcu(&worker->nulls_node); - hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->busy_list); } /* @@ -365,7 +363,6 @@ static bool __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker) { if (!(worker->flags & IO_WORKER_F_FREE)) { worker->flags |= IO_WORKER_F_FREE; - hlist_nulls_del_init_rcu(&worker->nulls_node); hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); } @@ -432,6 +429,8 @@ next: if (signal_pending(current)) flush_signals(current); + cond_resched(); + spin_lock_irq(&worker->lock); worker->cur_work = work; spin_unlock_irq(&worker->lock); @@ -446,10 +445,14 @@ next: task_unlock(current); } if ((work->flags & IO_WQ_WORK_NEEDS_USER) && !worker->mm && - wq->mm && mmget_not_zero(wq->mm)) { - use_mm(wq->mm); - set_fs(USER_DS); - worker->mm = wq->mm; + wq->mm) { + if (mmget_not_zero(wq->mm)) { + use_mm(wq->mm); + set_fs(USER_DS); + worker->mm = wq->mm; + } else { + work->flags |= IO_WQ_WORK_CANCEL; + } } if (!worker->creds) worker->creds = override_creds(wq->creds); @@ -798,10 +801,6 @@ void io_wq_cancel_all(struct io_wq *wq) set_bit(IO_WQ_BIT_CANCEL, &wq->state); - /* - * Browse both lists, as there's a gap between handing work off - * to a worker and the worker putting itself on the busy_list - */ rcu_read_lock(); for_each_node(node) { struct io_wqe *wqe = wq->wqes[node]; @@ -1049,7 +1048,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) spin_lock_init(&wqe->lock); INIT_WQ_LIST(&wqe->work_list); INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0); - INIT_HLIST_NULLS_HEAD(&wqe->busy_list, 1); INIT_LIST_HEAD(&wqe->all_list); } diff --git a/fs/io_uring.c b/fs/io_uring.c index 6f084e3cf835..187dd94fd6b1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -330,6 +330,26 @@ struct io_timeout { struct file *file; u64 addr; int flags; + unsigned count; +}; + +struct io_rw { + /* NOTE: kiocb has the file as the first member, so don't do it here */ + struct kiocb kiocb; + u64 addr; + u64 len; +}; + +struct io_connect { + struct file *file; + struct sockaddr __user *addr; + int addr_len; +}; + +struct io_sr_msg { + struct file *file; + struct user_msghdr __user *msg; + int msg_flags; }; struct io_async_connect { @@ -351,7 +371,6 @@ struct io_async_rw { }; struct io_async_ctx { - struct io_uring_sqe sqe; union { struct io_async_rw rw; struct io_async_msghdr msg; @@ -369,15 +388,16 @@ struct io_async_ctx { struct io_kiocb { union { struct file *file; - struct kiocb rw; + struct io_rw rw; struct io_poll_iocb poll; struct io_accept accept; struct io_sync sync; struct io_cancel cancel; struct io_timeout timeout; + struct io_connect connect; + struct io_sr_msg sr_msg; }; - const struct io_uring_sqe *sqe; struct io_async_ctx *io; struct file *ring_file; int ring_fd; @@ -411,7 +431,6 @@ struct io_kiocb { #define REQ_F_INFLIGHT 16384 /* on inflight list */ #define REQ_F_COMP_LOCKED 32768 /* completion under lock */ #define REQ_F_HARDLINK 65536 /* doesn't sever on completion < 0 */ -#define REQ_F_PREPPED 131072 /* request already opcode prepared */ u64 user_data; u32 result; u32 sequence; @@ -609,33 +628,31 @@ static inline bool io_prep_async_work(struct io_kiocb *req, { bool do_hashed = false; - if (req->sqe) { - switch (req->opcode) { - case IORING_OP_WRITEV: - case IORING_OP_WRITE_FIXED: - /* only regular files should be hashed for writes */ - if (req->flags & REQ_F_ISREG) - do_hashed = true; - /* fall-through */ - case IORING_OP_READV: - case IORING_OP_READ_FIXED: - case IORING_OP_SENDMSG: - case IORING_OP_RECVMSG: - case IORING_OP_ACCEPT: - case IORING_OP_POLL_ADD: - case IORING_OP_CONNECT: - /* - * We know REQ_F_ISREG is not set on some of these - * opcodes, but this enables us to keep the check in - * just one place. - */ - if (!(req->flags & REQ_F_ISREG)) - req->work.flags |= IO_WQ_WORK_UNBOUND; - break; - } - if (io_req_needs_user(req)) - req->work.flags |= IO_WQ_WORK_NEEDS_USER; + switch (req->opcode) { + case IORING_OP_WRITEV: + case IORING_OP_WRITE_FIXED: + /* only regular files should be hashed for writes */ + if (req->flags & REQ_F_ISREG) + do_hashed = true; + /* fall-through */ + case IORING_OP_READV: + case IORING_OP_READ_FIXED: + case IORING_OP_SENDMSG: + case IORING_OP_RECVMSG: + case IORING_OP_ACCEPT: + case IORING_OP_POLL_ADD: + case IORING_OP_CONNECT: + /* + * We know REQ_F_ISREG is not set on some of these + * opcodes, but this enables us to keep the check in + * just one place. + */ + if (!(req->flags & REQ_F_ISREG)) + req->work.flags |= IO_WQ_WORK_UNBOUND; + break; } + if (io_req_needs_user(req)) + req->work.flags |= IO_WQ_WORK_NEEDS_USER; *link = io_prep_linked_timeout(req); return do_hashed; @@ -1180,7 +1197,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, ret = 0; list_for_each_entry_safe(req, tmp, &ctx->poll_list, list) { - struct kiocb *kiocb = &req->rw; + struct kiocb *kiocb = &req->rw.kiocb; /* * Move completed entries to our local list. If we find a @@ -1335,7 +1352,7 @@ static inline void req_set_fail_links(struct io_kiocb *req) static void io_complete_rw_common(struct kiocb *kiocb, long res) { - struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); + struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); if (kiocb->ki_flags & IOCB_WRITE) kiocb_end_write(req); @@ -1347,7 +1364,7 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res) static void io_complete_rw(struct kiocb *kiocb, long res, long res2) { - struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); + struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); io_complete_rw_common(kiocb, res); io_put_req(req); @@ -1355,7 +1372,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2) static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res) { - struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); + struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); struct io_kiocb *nxt = NULL; io_complete_rw_common(kiocb, res); @@ -1366,7 +1383,7 @@ static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res) static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) { - struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); + struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); if (kiocb->ki_flags & IOCB_WRITE) kiocb_end_write(req); @@ -1400,7 +1417,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req) list_req = list_first_entry(&ctx->poll_list, struct io_kiocb, list); - if (list_req->rw.ki_filp != req->rw.ki_filp) + if (list_req->file != req->file) ctx->poll_multi_file = true; } @@ -1471,11 +1488,11 @@ static bool io_file_supports_async(struct file *file) return false; } -static int io_prep_rw(struct io_kiocb *req, bool force_nonblock) +static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, + bool force_nonblock) { - const struct io_uring_sqe *sqe = req->sqe; struct io_ring_ctx *ctx = req->ctx; - struct kiocb *kiocb = &req->rw; + struct kiocb *kiocb = &req->rw.kiocb; unsigned ioprio; int ret; @@ -1524,6 +1541,12 @@ static int io_prep_rw(struct io_kiocb *req, bool force_nonblock) return -EINVAL; kiocb->ki_complete = io_complete_rw; } + + req->rw.addr = READ_ONCE(sqe->addr); + req->rw.len = READ_ONCE(sqe->len); + /* we own ->private, reuse it for the buffer index */ + req->rw.kiocb.private = (void *) (unsigned long) + READ_ONCE(sqe->buf_index); return 0; } @@ -1557,11 +1580,11 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, struct io_kiocb **nxt, io_rw_done(kiocb, ret); } -static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw, - const struct io_uring_sqe *sqe, +static ssize_t io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter) { - size_t len = READ_ONCE(sqe->len); + struct io_ring_ctx *ctx = req->ctx; + size_t len = req->rw.len; struct io_mapped_ubuf *imu; unsigned index, buf_index; size_t offset; @@ -1571,13 +1594,13 @@ static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw, if (unlikely(!ctx->user_bufs)) return -EFAULT; - buf_index = READ_ONCE(sqe->buf_index); + buf_index = (unsigned long) req->rw.kiocb.private; if (unlikely(buf_index >= ctx->nr_user_bufs)) return -EFAULT; index = array_index_nospec(buf_index, ctx->nr_user_bufs); imu = &ctx->user_bufs[index]; - buf_addr = READ_ONCE(sqe->addr); + buf_addr = req->rw.addr; /* overflow */ if (buf_addr + len < buf_addr) @@ -1634,25 +1657,20 @@ static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw, static ssize_t io_import_iovec(int rw, struct io_kiocb *req, struct iovec **iovec, struct iov_iter *iter) { - const struct io_uring_sqe *sqe = req->sqe; - void __user *buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); - size_t sqe_len = READ_ONCE(sqe->len); + void __user *buf = u64_to_user_ptr(req->rw.addr); + size_t sqe_len = req->rw.len; u8 opcode; - /* - * We're reading ->opcode for the second time, but the first read - * doesn't care whether it's _FIXED or not, so it doesn't matter - * whether ->opcode changes concurrently. The first read does care - * about whether it is a READ or a WRITE, so we don't trust this read - * for that purpose and instead let the caller pass in the read/write - * flag. - */ opcode = req->opcode; if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { *iovec = NULL; - return io_import_fixed(req->ctx, rw, sqe, iter); + return io_import_fixed(req, rw, iter); } + /* buffer index only valid with fixed read/write */ + if (req->rw.kiocb.private) + return -EINVAL; + if (req->io) { struct io_async_rw *iorw = &req->io->rw; @@ -1750,13 +1768,7 @@ static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size, static int io_alloc_async_ctx(struct io_kiocb *req) { req->io = kmalloc(sizeof(*req->io), GFP_KERNEL); - if (req->io) { - memcpy(&req->io->sqe, req->sqe, sizeof(req->io->sqe)); - req->sqe = &req->io->sqe; - return 0; - } - - return 1; + return req->io == NULL; } static void io_rw_async(struct io_wq_work **workptr) @@ -1774,6 +1786,9 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size, struct iovec *iovec, struct iovec *fast_iov, struct iov_iter *iter) { + if (req->opcode == IORING_OP_READ_FIXED || + req->opcode == IORING_OP_WRITE_FIXED) + return 0; if (!req->io && io_alloc_async_ctx(req)) return -ENOMEM; @@ -1782,46 +1797,53 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size, return 0; } -static int io_read_prep(struct io_kiocb *req, struct iovec **iovec, - struct iov_iter *iter, bool force_nonblock) +static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, + bool force_nonblock) { + struct io_async_ctx *io; + struct iov_iter iter; ssize_t ret; - ret = io_prep_rw(req, force_nonblock); + ret = io_prep_rw(req, sqe, force_nonblock); if (ret) return ret; if (unlikely(!(req->file->f_mode & FMODE_READ))) return -EBADF; - return io_import_iovec(READ, req, iovec, iter); + if (!req->io) + return 0; + + io = req->io; + io->rw.iov = io->rw.fast_iov; + req->io = NULL; + ret = io_import_iovec(READ, req, &io->rw.iov, &iter); + req->io = io; + if (ret < 0) + return ret; + + io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter); + return 0; } static int io_read(struct io_kiocb *req, struct io_kiocb **nxt, bool force_nonblock) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; - struct kiocb *kiocb = &req->rw; + struct kiocb *kiocb = &req->rw.kiocb; struct iov_iter iter; - struct file *file; size_t iov_count; ssize_t io_size, ret; - if (!req->io) { - ret = io_read_prep(req, &iovec, &iter, force_nonblock); - if (ret < 0) - return ret; - } else { - ret = io_import_iovec(READ, req, &iovec, &iter); - if (ret < 0) - return ret; - } + ret = io_import_iovec(READ, req, &iovec, &iter); + if (ret < 0) + return ret; /* Ensure we clear previously set non-block flag */ if (!force_nonblock) - req->rw.ki_flags &= ~IOCB_NOWAIT; + req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT; - file = req->file; + req->result = 0; io_size = ret; if (req->flags & REQ_F_LINK) req->result = io_size; @@ -1830,33 +1852,21 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt, * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so * we know to async punt it even if it was opened O_NONBLOCK */ - if (force_nonblock && !io_file_supports_async(file)) { + if (force_nonblock && !io_file_supports_async(req->file)) { req->flags |= REQ_F_MUST_PUNT; goto copy_iov; } iov_count = iov_iter_count(&iter); - ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count); + ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count); if (!ret) { ssize_t ret2; - if (file->f_op->read_iter) - ret2 = call_read_iter(file, kiocb, &iter); + if (req->file->f_op->read_iter) + ret2 = call_read_iter(req->file, kiocb, &iter); else - ret2 = loop_rw_iter(READ, file, kiocb, &iter); + ret2 = loop_rw_iter(READ, req->file, kiocb, &iter); - /* - * In case of a short read, punt to async. This can happen - * if we have data partially cached. Alternatively we can - * return the short read, in which case the application will - * need to issue another SQE and wait for it. That SQE will - * need async punt anyway, so it's more efficient to do it - * here. - */ - if (force_nonblock && !(req->flags & REQ_F_NOWAIT) && - (req->flags & REQ_F_ISREG) && - ret2 > 0 && ret2 < io_size) - ret2 = -EAGAIN; /* Catch -EAGAIN return for forced non-blocking submission */ if (!force_nonblock || ret2 != -EAGAIN) { kiocb_done(kiocb, ret2, nxt, req->in_async); @@ -1875,46 +1885,53 @@ out_free: return ret; } -static int io_write_prep(struct io_kiocb *req, struct iovec **iovec, - struct iov_iter *iter, bool force_nonblock) +static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, + bool force_nonblock) { + struct io_async_ctx *io; + struct iov_iter iter; ssize_t ret; - ret = io_prep_rw(req, force_nonblock); + ret = io_prep_rw(req, sqe, force_nonblock); if (ret) return ret; if (unlikely(!(req->file->f_mode & FMODE_WRITE))) return -EBADF; - return io_import_iovec(WRITE, req, iovec, iter); + if (!req->io) + return 0; + + io = req->io; + io->rw.iov = io->rw.fast_iov; + req->io = NULL; + ret = io_import_iovec(WRITE, req, &io->rw.iov, &iter); + req->io = io; + if (ret < 0) + return ret; + + io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter); + return 0; } static int io_write(struct io_kiocb *req, struct io_kiocb **nxt, bool force_nonblock) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; - struct kiocb *kiocb = &req->rw; + struct kiocb *kiocb = &req->rw.kiocb; struct iov_iter iter; - struct file *file; size_t iov_count; ssize_t ret, io_size; - if (!req->io) { - ret = io_write_prep(req, &iovec, &iter, force_nonblock); - if (ret < 0) - return ret; - } else { - ret = io_import_iovec(WRITE, req, &iovec, &iter); - if (ret < 0) - return ret; - } + ret = io_import_iovec(WRITE, req, &iovec, &iter); + if (ret < 0) + return ret; /* Ensure we clear previously set non-block flag */ if (!force_nonblock) - req->rw.ki_flags &= ~IOCB_NOWAIT; + req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT; - file = kiocb->ki_filp; + req->result = 0; io_size = ret; if (req->flags & REQ_F_LINK) req->result = io_size; @@ -1934,7 +1951,7 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt, goto copy_iov; iov_count = iov_iter_count(&iter); - ret = rw_verify_area(WRITE, file, &kiocb->ki_pos, iov_count); + ret = rw_verify_area(WRITE, req->file, &kiocb->ki_pos, iov_count); if (!ret) { ssize_t ret2; @@ -1946,17 +1963,17 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt, * we return to userspace. */ if (req->flags & REQ_F_ISREG) { - __sb_start_write(file_inode(file)->i_sb, + __sb_start_write(file_inode(req->file)->i_sb, SB_FREEZE_WRITE, true); - __sb_writers_release(file_inode(file)->i_sb, + __sb_writers_release(file_inode(req->file)->i_sb, SB_FREEZE_WRITE); } kiocb->ki_flags |= IOCB_WRITE; - if (file->f_op->write_iter) - ret2 = call_write_iter(file, kiocb, &iter); + if (req->file->f_op->write_iter) + ret2 = call_write_iter(req->file, kiocb, &iter); else - ret2 = loop_rw_iter(WRITE, file, kiocb, &iter); + ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter); if (!force_nonblock || ret2 != -EAGAIN) { kiocb_done(kiocb, ret2, nxt, req->in_async); } else { @@ -1989,13 +2006,10 @@ static int io_nop(struct io_kiocb *req) return 0; } -static int io_prep_fsync(struct io_kiocb *req) +static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - const struct io_uring_sqe *sqe = req->sqe; struct io_ring_ctx *ctx = req->ctx; - if (req->flags & REQ_F_PREPPED) - return 0; if (!req->file) return -EBADF; @@ -2010,7 +2024,6 @@ static int io_prep_fsync(struct io_kiocb *req) req->sync.off = READ_ONCE(sqe->off); req->sync.len = READ_ONCE(sqe->len); - req->flags |= REQ_F_PREPPED; return 0; } @@ -2026,6 +2039,28 @@ static bool io_req_cancelled(struct io_kiocb *req) return false; } +static void io_link_work_cb(struct io_wq_work **workptr) +{ + struct io_wq_work *work = *workptr; + struct io_kiocb *link = work->data; + + io_queue_linked_timeout(link); + work->func = io_wq_submit_work; +} + +static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt) +{ + struct io_kiocb *link; + + io_prep_async_work(nxt, &link); + *workptr = &nxt->work; + if (link) { + nxt->work.flags |= IO_WQ_WORK_CB; + nxt->work.func = io_link_work_cb; + nxt->work.data = link; + } +} + static void io_fsync_finish(struct io_wq_work **workptr) { struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); @@ -2036,7 +2071,7 @@ static void io_fsync_finish(struct io_wq_work **workptr) if (io_req_cancelled(req)) return; - ret = vfs_fsync_range(req->rw.ki_filp, req->sync.off, + ret = vfs_fsync_range(req->file, req->sync.off, end > 0 ? end : LLONG_MAX, req->sync.flags & IORING_FSYNC_DATASYNC); if (ret < 0) @@ -2044,18 +2079,13 @@ static void io_fsync_finish(struct io_wq_work **workptr) io_cqring_add_event(req, ret); io_put_req_find_next(req, &nxt); if (nxt) - *workptr = &nxt->work; + io_wq_assign_next(workptr, nxt); } static int io_fsync(struct io_kiocb *req, struct io_kiocb **nxt, bool force_nonblock) { struct io_wq_work *work, *old_work; - int ret; - - ret = io_prep_fsync(req); - if (ret) - return ret; /* fsync always requires a blocking context */ if (force_nonblock) { @@ -2071,13 +2101,10 @@ static int io_fsync(struct io_kiocb *req, struct io_kiocb **nxt, return 0; } -static int io_prep_sfr(struct io_kiocb *req) +static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - const struct io_uring_sqe *sqe = req->sqe; struct io_ring_ctx *ctx = req->ctx; - if (req->flags & REQ_F_PREPPED) - return 0; if (!req->file) return -EBADF; @@ -2089,7 +2116,6 @@ static int io_prep_sfr(struct io_kiocb *req) req->sync.off = READ_ONCE(sqe->off); req->sync.len = READ_ONCE(sqe->len); req->sync.flags = READ_ONCE(sqe->sync_range_flags); - req->flags |= REQ_F_PREPPED; return 0; } @@ -2102,25 +2128,20 @@ static void io_sync_file_range_finish(struct io_wq_work **workptr) if (io_req_cancelled(req)) return; - ret = sync_file_range(req->rw.ki_filp, req->sync.off, req->sync.len, + ret = sync_file_range(req->file, req->sync.off, req->sync.len, req->sync.flags); if (ret < 0) req_set_fail_links(req); io_cqring_add_event(req, ret); io_put_req_find_next(req, &nxt); if (nxt) - *workptr = &nxt->work; + io_wq_assign_next(workptr, nxt); } static int io_sync_file_range(struct io_kiocb *req, struct io_kiocb **nxt, bool force_nonblock) { struct io_wq_work *work, *old_work; - int ret; - - ret = io_prep_sfr(req); - if (ret) - return ret; /* sync_file_range always requires a blocking context */ if (force_nonblock) { @@ -2149,19 +2170,23 @@ static void io_sendrecv_async(struct io_wq_work **workptr) } #endif -static int io_sendmsg_prep(struct io_kiocb *req, struct io_async_ctx *io) +static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { #if defined(CONFIG_NET) - const struct io_uring_sqe *sqe = req->sqe; - struct user_msghdr __user *msg; - unsigned flags; + struct io_sr_msg *sr = &req->sr_msg; + struct io_async_ctx *io = req->io; + + sr->msg_flags = READ_ONCE(sqe->msg_flags); + sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr)); + + if (!io) + return 0; - flags = READ_ONCE(sqe->msg_flags); - msg = (struct user_msghdr __user *)(unsigned long) READ_ONCE(sqe->addr); io->msg.iov = io->msg.fast_iov; - return sendmsg_copy_msghdr(&io->msg.msg, msg, flags, &io->msg.iov); + return sendmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags, + &io->msg.iov); #else - return 0; + return -EOPNOTSUPP; #endif } @@ -2169,7 +2194,6 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt, bool force_nonblock) { #if defined(CONFIG_NET) - const struct io_uring_sqe *sqe = req->sqe; struct io_async_msghdr *kmsg = NULL; struct socket *sock; int ret; @@ -2183,12 +2207,6 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt, struct sockaddr_storage addr; unsigned flags; - flags = READ_ONCE(sqe->msg_flags); - if (flags & MSG_DONTWAIT) - req->flags |= REQ_F_NOWAIT; - else if (force_nonblock) - flags |= MSG_DONTWAIT; - if (req->io) { kmsg = &req->io->msg; kmsg->msg.msg_name = &addr; @@ -2197,13 +2215,24 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt, kmsg->iov = kmsg->fast_iov; kmsg->msg.msg_iter.iov = kmsg->iov; } else { + struct io_sr_msg *sr = &req->sr_msg; + kmsg = &io.msg; kmsg->msg.msg_name = &addr; - ret = io_sendmsg_prep(req, &io); + + io.msg.iov = io.msg.fast_iov; + ret = sendmsg_copy_msghdr(&io.msg.msg, sr->msg, + sr->msg_flags, &io.msg.iov); if (ret) - goto out; + return ret; } + flags = req->sr_msg.msg_flags; + if (flags & MSG_DONTWAIT) + req->flags |= REQ_F_NOWAIT; + else if (force_nonblock) + flags |= MSG_DONTWAIT; + ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); if (force_nonblock && ret == -EAGAIN) { if (req->io) @@ -2218,7 +2247,6 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt, ret = -EINTR; } -out: if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov) kfree(kmsg->iov); io_cqring_add_event(req, ret); @@ -2231,20 +2259,24 @@ out: #endif } -static int io_recvmsg_prep(struct io_kiocb *req, struct io_async_ctx *io) +static int io_recvmsg_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) { #if defined(CONFIG_NET) - const struct io_uring_sqe *sqe = req->sqe; - struct user_msghdr __user *msg; - unsigned flags; + struct io_sr_msg *sr = &req->sr_msg; + struct io_async_ctx *io = req->io; + + sr->msg_flags = READ_ONCE(sqe->msg_flags); + sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr)); + + if (!io) + return 0; - flags = READ_ONCE(sqe->msg_flags); - msg = (struct user_msghdr __user *)(unsigned long) READ_ONCE(sqe->addr); io->msg.iov = io->msg.fast_iov; - return recvmsg_copy_msghdr(&io->msg.msg, msg, flags, &io->msg.uaddr, - &io->msg.iov); + return recvmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags, + &io->msg.uaddr, &io->msg.iov); #else - return 0; + return -EOPNOTSUPP; #endif } @@ -2252,7 +2284,6 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt, bool force_nonblock) { #if defined(CONFIG_NET) - const struct io_uring_sqe *sqe = req->sqe; struct io_async_msghdr *kmsg = NULL; struct socket *sock; int ret; @@ -2262,19 +2293,10 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt, sock = sock_from_file(req->file, &ret); if (sock) { - struct user_msghdr __user *msg; struct io_async_ctx io; struct sockaddr_storage addr; unsigned flags; - flags = READ_ONCE(sqe->msg_flags); - if (flags & MSG_DONTWAIT) - req->flags |= REQ_F_NOWAIT; - else if (force_nonblock) - flags |= MSG_DONTWAIT; - - msg = (struct user_msghdr __user *) (unsigned long) - READ_ONCE(sqe->addr); if (req->io) { kmsg = &req->io->msg; kmsg->msg.msg_name = &addr; @@ -2283,14 +2305,27 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt, kmsg->iov = kmsg->fast_iov; kmsg->msg.msg_iter.iov = kmsg->iov; } else { + struct io_sr_msg *sr = &req->sr_msg; + kmsg = &io.msg; kmsg->msg.msg_name = &addr; - ret = io_recvmsg_prep(req, &io); + + io.msg.iov = io.msg.fast_iov; + ret = recvmsg_copy_msghdr(&io.msg.msg, sr->msg, + sr->msg_flags, &io.msg.uaddr, + &io.msg.iov); if (ret) - goto out; + return ret; } - ret = __sys_recvmsg_sock(sock, &kmsg->msg, msg, kmsg->uaddr, flags); + flags = req->sr_msg.msg_flags; + if (flags & MSG_DONTWAIT) + req->flags |= REQ_F_NOWAIT; + else if (force_nonblock) + flags |= MSG_DONTWAIT; + + ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.msg, + kmsg->uaddr, flags); if (force_nonblock && ret == -EAGAIN) { if (req->io) return -EAGAIN; @@ -2304,7 +2339,6 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt, ret = -EINTR; } -out: if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov) kfree(kmsg->iov); io_cqring_add_event(req, ret); @@ -2317,25 +2351,19 @@ out: #endif } -static int io_accept_prep(struct io_kiocb *req) +static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { #if defined(CONFIG_NET) - const struct io_uring_sqe *sqe = req->sqe; struct io_accept *accept = &req->accept; - if (req->flags & REQ_F_PREPPED) - return 0; - if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) return -EINVAL; if (sqe->ioprio || sqe->len || sqe->buf_index) return -EINVAL; - accept->addr = (struct sockaddr __user *) - (unsigned long) READ_ONCE(sqe->addr); - accept->addr_len = (int __user *) (unsigned long) READ_ONCE(sqe->addr2); + accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); + accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); accept->flags = READ_ONCE(sqe->accept_flags); - req->flags |= REQ_F_PREPPED; return 0; #else return -EOPNOTSUPP; @@ -2373,7 +2401,7 @@ static void io_accept_finish(struct io_wq_work **workptr) return; __io_accept(req, &nxt, false); if (nxt) - *workptr = &nxt->work; + io_wq_assign_next(workptr, nxt); } #endif @@ -2383,10 +2411,6 @@ static int io_accept(struct io_kiocb *req, struct io_kiocb **nxt, #if defined(CONFIG_NET) int ret; - ret = io_accept_prep(req); - if (ret) - return ret; - ret = __io_accept(req, nxt, force_nonblock); if (ret == -EAGAIN && force_nonblock) { req->work.func = io_accept_finish; @@ -2400,18 +2424,27 @@ static int io_accept(struct io_kiocb *req, struct io_kiocb **nxt, #endif } -static int io_connect_prep(struct io_kiocb *req, struct io_async_ctx *io) +static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { #if defined(CONFIG_NET) - const struct io_uring_sqe *sqe = req->sqe; - struct sockaddr __user *addr; - int addr_len; + struct io_connect *conn = &req->connect; + struct io_async_ctx *io = req->io; - addr = (struct sockaddr __user *) (unsigned long) READ_ONCE(sqe->addr); - addr_len = READ_ONCE(sqe->addr2); - return move_addr_to_kernel(addr, addr_len, &io->connect.address); + if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) + return -EINVAL; + if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags) + return -EINVAL; + + conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); + conn->addr_len = READ_ONCE(sqe->addr2); + + if (!io) + return 0; + + return move_addr_to_kernel(conn->addr, conn->addr_len, + &io->connect.address); #else - return 0; + return -EOPNOTSUPP; #endif } @@ -2419,30 +2452,25 @@ static int io_connect(struct io_kiocb *req, struct io_kiocb **nxt, bool force_nonblock) { #if defined(CONFIG_NET) - const struct io_uring_sqe *sqe = req->sqe; struct io_async_ctx __io, *io; unsigned file_flags; - int addr_len, ret; - - if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) - return -EINVAL; - if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags) - return -EINVAL; - - addr_len = READ_ONCE(sqe->addr2); - file_flags = force_nonblock ? O_NONBLOCK : 0; + int ret; if (req->io) { io = req->io; } else { - ret = io_connect_prep(req, &__io); + ret = move_addr_to_kernel(req->connect.addr, + req->connect.addr_len, + &__io.connect.address); if (ret) goto out; io = &__io; } - ret = __sys_connect_file(req->file, &io->connect.address, addr_len, - file_flags); + file_flags = force_nonblock ? O_NONBLOCK : 0; + + ret = __sys_connect_file(req->file, &io->connect.address, + req->connect.addr_len, file_flags); if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) { if (req->io) return -EAGAIN; @@ -2513,12 +2541,9 @@ static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr) return -ENOENT; } -static int io_poll_remove_prep(struct io_kiocb *req) +static int io_poll_remove_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) { - const struct io_uring_sqe *sqe = req->sqe; - - if (req->flags & REQ_F_PREPPED) - return 0; if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index || @@ -2526,7 +2551,6 @@ static int io_poll_remove_prep(struct io_kiocb *req) return -EINVAL; req->poll.addr = READ_ONCE(sqe->addr); - req->flags |= REQ_F_PREPPED; return 0; } @@ -2540,10 +2564,6 @@ static int io_poll_remove(struct io_kiocb *req) u64 addr; int ret; - ret = io_poll_remove_prep(req); - if (ret) - return ret; - addr = req->poll.addr; spin_lock_irq(&ctx->completion_lock); ret = io_poll_cancel(ctx, addr); @@ -2612,7 +2632,7 @@ static void io_poll_complete_work(struct io_wq_work **workptr) req_set_fail_links(req); io_put_req_find_next(req, &nxt); if (nxt) - *workptr = &nxt->work; + io_wq_assign_next(workptr, nxt); } static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, @@ -2681,14 +2701,11 @@ static void io_poll_req_insert(struct io_kiocb *req) hlist_add_head(&req->hash_node, list); } -static int io_poll_add_prep(struct io_kiocb *req) +static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - const struct io_uring_sqe *sqe = req->sqe; struct io_poll_iocb *poll = &req->poll; u16 events; - if (req->flags & REQ_F_PREPPED) - return 0; if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index) @@ -2696,7 +2713,6 @@ static int io_poll_add_prep(struct io_kiocb *req) if (!poll->file) return -EBADF; - req->flags |= REQ_F_PREPPED; events = READ_ONCE(sqe->poll_events); poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP; return 0; @@ -2709,11 +2725,6 @@ static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt) struct io_poll_table ipt; bool cancel = false; __poll_t mask; - int ret; - - ret = io_poll_add_prep(req); - if (ret) - return ret; INIT_IO_WORK(&req->work, io_poll_complete_work); INIT_HLIST_NODE(&req->hash_node); @@ -2832,12 +2843,9 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data) return 0; } -static int io_timeout_remove_prep(struct io_kiocb *req) +static int io_timeout_remove_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) { - const struct io_uring_sqe *sqe = req->sqe; - - if (req->flags & REQ_F_PREPPED) - return 0; if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->len) @@ -2848,7 +2856,6 @@ static int io_timeout_remove_prep(struct io_kiocb *req) if (req->timeout.flags) return -EINVAL; - req->flags |= REQ_F_PREPPED; return 0; } @@ -2860,10 +2867,6 @@ static int io_timeout_remove(struct io_kiocb *req) struct io_ring_ctx *ctx = req->ctx; int ret; - ret = io_timeout_remove_prep(req); - if (ret) - return ret; - spin_lock_irq(&ctx->completion_lock); ret = io_timeout_cancel(ctx, req->timeout.addr); @@ -2877,10 +2880,9 @@ static int io_timeout_remove(struct io_kiocb *req) return 0; } -static int io_timeout_prep(struct io_kiocb *req, struct io_async_ctx *io, +static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, bool is_timeout_link) { - const struct io_uring_sqe *sqe = req->sqe; struct io_timeout_data *data; unsigned flags; @@ -2894,7 +2896,12 @@ static int io_timeout_prep(struct io_kiocb *req, struct io_async_ctx *io, if (flags & ~IORING_TIMEOUT_ABS) return -EINVAL; - data = &io->timeout; + req->timeout.count = READ_ONCE(sqe->off); + + if (!req->io && io_alloc_async_ctx(req)) + return -ENOMEM; + + data = &req->io->timeout; data->req = req; req->flags |= REQ_F_TIMEOUT; @@ -2912,21 +2919,12 @@ static int io_timeout_prep(struct io_kiocb *req, struct io_async_ctx *io, static int io_timeout(struct io_kiocb *req) { - const struct io_uring_sqe *sqe = req->sqe; unsigned count; struct io_ring_ctx *ctx = req->ctx; struct io_timeout_data *data; struct list_head *entry; unsigned span = 0; - int ret; - if (!req->io) { - if (io_alloc_async_ctx(req)) - return -ENOMEM; - ret = io_timeout_prep(req, req->io, false); - if (ret) - return ret; - } data = &req->io->timeout; /* @@ -2934,7 +2932,7 @@ static int io_timeout(struct io_kiocb *req) * timeout event to be satisfied. If it isn't set, then this is * a pure timeout request, sequence isn't used. */ - count = READ_ONCE(sqe->off); + count = req->timeout.count; if (!count) { req->flags |= REQ_F_TIMEOUT_NOSEQ; spin_lock_irq(&ctx->completion_lock); @@ -3052,19 +3050,15 @@ done: io_put_req_find_next(req, nxt); } -static int io_async_cancel_prep(struct io_kiocb *req) +static int io_async_cancel_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) { - const struct io_uring_sqe *sqe = req->sqe; - - if (req->flags & REQ_F_PREPPED) - return 0; if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (sqe->flags || sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags) return -EINVAL; - req->flags |= REQ_F_PREPPED; req->cancel.addr = READ_ONCE(sqe->addr); return 0; } @@ -3072,21 +3066,14 @@ static int io_async_cancel_prep(struct io_kiocb *req) static int io_async_cancel(struct io_kiocb *req, struct io_kiocb **nxt) { struct io_ring_ctx *ctx = req->ctx; - int ret; - - ret = io_async_cancel_prep(req); - if (ret) - return ret; io_async_find_and_cancel(ctx, req, req->cancel.addr, nxt, 0); return 0; } -static int io_req_defer_prep(struct io_kiocb *req) +static int io_req_defer_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) { - struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; - struct io_async_ctx *io = req->io; - struct iov_iter iter; ssize_t ret = 0; switch (req->opcode) { @@ -3094,61 +3081,47 @@ static int io_req_defer_prep(struct io_kiocb *req) break; case IORING_OP_READV: case IORING_OP_READ_FIXED: - /* ensure prep does right import */ - req->io = NULL; - ret = io_read_prep(req, &iovec, &iter, true); - req->io = io; - if (ret < 0) - break; - io_req_map_rw(req, ret, iovec, inline_vecs, &iter); - ret = 0; + ret = io_read_prep(req, sqe, true); break; case IORING_OP_WRITEV: case IORING_OP_WRITE_FIXED: - /* ensure prep does right import */ - req->io = NULL; - ret = io_write_prep(req, &iovec, &iter, true); - req->io = io; - if (ret < 0) - break; - io_req_map_rw(req, ret, iovec, inline_vecs, &iter); - ret = 0; + ret = io_write_prep(req, sqe, true); break; case IORING_OP_POLL_ADD: - ret = io_poll_add_prep(req); + ret = io_poll_add_prep(req, sqe); break; case IORING_OP_POLL_REMOVE: - ret = io_poll_remove_prep(req); + ret = io_poll_remove_prep(req, sqe); break; case IORING_OP_FSYNC: - ret = io_prep_fsync(req); + ret = io_prep_fsync(req, sqe); break; case IORING_OP_SYNC_FILE_RANGE: - ret = io_prep_sfr(req); + ret = io_prep_sfr(req, sqe); break; case IORING_OP_SENDMSG: - ret = io_sendmsg_prep(req, io); + ret = io_sendmsg_prep(req, sqe); break; case IORING_OP_RECVMSG: - ret = io_recvmsg_prep(req, io); + ret = io_recvmsg_prep(req, sqe); break; case IORING_OP_CONNECT: - ret = io_connect_prep(req, io); + ret = io_connect_prep(req, sqe); break; case IORING_OP_TIMEOUT: - ret = io_timeout_prep(req, io, false); + ret = io_timeout_prep(req, sqe, false); break; case IORING_OP_TIMEOUT_REMOVE: - ret = io_timeout_remove_prep(req); + ret = io_timeout_remove_prep(req, sqe); break; case IORING_OP_ASYNC_CANCEL: - ret = io_async_cancel_prep(req); + ret = io_async_cancel_prep(req, sqe); break; case IORING_OP_LINK_TIMEOUT: - ret = io_timeout_prep(req, io, true); + ret = io_timeout_prep(req, sqe, true); break; case IORING_OP_ACCEPT: - ret = io_accept_prep(req); + ret = io_accept_prep(req, sqe); break; default: printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", @@ -3160,7 +3133,7 @@ static int io_req_defer_prep(struct io_kiocb *req) return ret; } -static int io_req_defer(struct io_kiocb *req) +static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_ring_ctx *ctx = req->ctx; int ret; @@ -3169,10 +3142,10 @@ static int io_req_defer(struct io_kiocb *req) if (!req_need_defer(req) && list_empty(&ctx->defer_list)) return 0; - if (io_alloc_async_ctx(req)) + if (!req->io && io_alloc_async_ctx(req)) return -EAGAIN; - ret = io_req_defer_prep(req); + ret = io_req_defer_prep(req, sqe); if (ret < 0) return ret; @@ -3188,9 +3161,8 @@ static int io_req_defer(struct io_kiocb *req) return -EIOCBQUEUED; } -__attribute__((nonnull)) -static int io_issue_sqe(struct io_kiocb *req, struct io_kiocb **nxt, - bool force_nonblock) +static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, + struct io_kiocb **nxt, bool force_nonblock) { struct io_ring_ctx *ctx = req->ctx; int ret; @@ -3200,52 +3172,109 @@ static int io_issue_sqe(struct io_kiocb *req, struct io_kiocb **nxt, ret = io_nop(req); break; case IORING_OP_READV: - if (unlikely(req->sqe->buf_index)) - return -EINVAL; - ret = io_read(req, nxt, force_nonblock); - break; - case IORING_OP_WRITEV: - if (unlikely(req->sqe->buf_index)) - return -EINVAL; - ret = io_write(req, nxt, force_nonblock); - break; case IORING_OP_READ_FIXED: + if (sqe) { + ret = io_read_prep(req, sqe, force_nonblock); + if (ret < 0) + break; + } ret = io_read(req, nxt, force_nonblock); break; + case IORING_OP_WRITEV: case IORING_OP_WRITE_FIXED: + if (sqe) { + ret = io_write_prep(req, sqe, force_nonblock); + if (ret < 0) + break; + } ret = io_write(req, nxt, force_nonblock); break; case IORING_OP_FSYNC: + if (sqe) { + ret = io_prep_fsync(req, sqe); + if (ret < 0) + break; + } ret = io_fsync(req, nxt, force_nonblock); break; case IORING_OP_POLL_ADD: + if (sqe) { + ret = io_poll_add_prep(req, sqe); + if (ret) + break; + } ret = io_poll_add(req, nxt); break; case IORING_OP_POLL_REMOVE: + if (sqe) { + ret = io_poll_remove_prep(req, sqe); + if (ret < 0) + break; + } ret = io_poll_remove(req); break; case IORING_OP_SYNC_FILE_RANGE: + if (sqe) { + ret = io_prep_sfr(req, sqe); + if (ret < 0) + break; + } ret = io_sync_file_range(req, nxt, force_nonblock); break; case IORING_OP_SENDMSG: + if (sqe) { + ret = io_sendmsg_prep(req, sqe); + if (ret < 0) + break; + } ret = io_sendmsg(req, nxt, force_nonblock); break; case IORING_OP_RECVMSG: + if (sqe) { + ret = io_recvmsg_prep(req, sqe); + if (ret) + break; + } ret = io_recvmsg(req, nxt, force_nonblock); break; case IORING_OP_TIMEOUT: + if (sqe) { + ret = io_timeout_prep(req, sqe, false); + if (ret) + break; + } ret = io_timeout(req); break; case IORING_OP_TIMEOUT_REMOVE: + if (sqe) { + ret = io_timeout_remove_prep(req, sqe); + if (ret) + break; + } ret = io_timeout_remove(req); break; case IORING_OP_ACCEPT: + if (sqe) { + ret = io_accept_prep(req, sqe); + if (ret) + break; + } ret = io_accept(req, nxt, force_nonblock); break; case IORING_OP_CONNECT: + if (sqe) { + ret = io_connect_prep(req, sqe); + if (ret) + break; + } ret = io_connect(req, nxt, force_nonblock); break; case IORING_OP_ASYNC_CANCEL: + if (sqe) { + ret = io_async_cancel_prep(req, sqe); + if (ret) + break; + } ret = io_async_cancel(req, nxt); break; default: @@ -3257,24 +3286,24 @@ static int io_issue_sqe(struct io_kiocb *req, struct io_kiocb **nxt, return ret; if (ctx->flags & IORING_SETUP_IOPOLL) { + const bool in_async = io_wq_current_is_worker(); + if (req->result == -EAGAIN) return -EAGAIN; + /* workqueue context doesn't hold uring_lock, grab it now */ + if (in_async) + mutex_lock(&ctx->uring_lock); + io_iopoll_req_issued(req); + + if (in_async) + mutex_unlock(&ctx->uring_lock); } return 0; } -static void io_link_work_cb(struct io_wq_work **workptr) -{ - struct io_wq_work *work = *workptr; - struct io_kiocb *link = work->data; - - io_queue_linked_timeout(link); - work->func = io_wq_submit_work; -} - static void io_wq_submit_work(struct io_wq_work **workptr) { struct io_wq_work *work = *workptr; @@ -3289,7 +3318,7 @@ static void io_wq_submit_work(struct io_wq_work **workptr) req->has_user = (work->flags & IO_WQ_WORK_HAS_MM) != 0; req->in_async = true; do { - ret = io_issue_sqe(req, &nxt, false); + ret = io_issue_sqe(req, NULL, &nxt, false); /* * We can get EAGAIN for polled IO even though we're * forcing a sync submission from here, since we can't @@ -3311,17 +3340,8 @@ static void io_wq_submit_work(struct io_wq_work **workptr) } /* if a dependent link is ready, pass it back */ - if (!ret && nxt) { - struct io_kiocb *link; - - io_prep_async_work(nxt, &link); - *workptr = &nxt->work; - if (link) { - nxt->work.flags |= IO_WQ_WORK_CB; - nxt->work.func = io_link_work_cb; - nxt->work.data = link; - } - } + if (!ret && nxt) + io_wq_assign_next(workptr, nxt); } static bool io_req_op_valid(int op) @@ -3355,14 +3375,15 @@ static inline struct file *io_file_from_index(struct io_ring_ctx *ctx, return table->files[index & IORING_FILE_TABLE_MASK]; } -static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req) +static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req, + const struct io_uring_sqe *sqe) { struct io_ring_ctx *ctx = req->ctx; unsigned flags; int fd, ret; - flags = READ_ONCE(req->sqe->flags); - fd = READ_ONCE(req->sqe->fd); + flags = READ_ONCE(sqe->flags); + fd = READ_ONCE(sqe->fd); if (flags & IOSQE_IO_DRAIN) req->flags |= REQ_F_IO_DRAIN; @@ -3494,7 +3515,7 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) return nxt; } -static void __io_queue_sqe(struct io_kiocb *req) +static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_kiocb *linked_timeout; struct io_kiocb *nxt = NULL; @@ -3503,7 +3524,7 @@ static void __io_queue_sqe(struct io_kiocb *req) again: linked_timeout = io_prep_linked_timeout(req); - ret = io_issue_sqe(req, &nxt, true); + ret = io_issue_sqe(req, sqe, &nxt, true); /* * We async punt it if the file wasn't marked NOWAIT, or if the file @@ -3550,7 +3571,7 @@ done_req: } } -static void io_queue_sqe(struct io_kiocb *req) +static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe) { int ret; @@ -3560,7 +3581,7 @@ static void io_queue_sqe(struct io_kiocb *req) } req->ctx->drain_next = (req->flags & REQ_F_DRAIN_LINK); - ret = io_req_defer(req); + ret = io_req_defer(req, sqe); if (ret) { if (ret != -EIOCBQUEUED) { io_cqring_add_event(req, ret); @@ -3568,7 +3589,7 @@ static void io_queue_sqe(struct io_kiocb *req) io_double_put_req(req); } } else - __io_queue_sqe(req); + __io_queue_sqe(req, sqe); } static inline void io_queue_link_head(struct io_kiocb *req) @@ -3577,25 +3598,25 @@ static inline void io_queue_link_head(struct io_kiocb *req) io_cqring_add_event(req, -ECANCELED); io_double_put_req(req); } else - io_queue_sqe(req); + io_queue_sqe(req, NULL); } #define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \ IOSQE_IO_HARDLINK) -static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state, - struct io_kiocb **link) +static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, + struct io_submit_state *state, struct io_kiocb **link) { struct io_ring_ctx *ctx = req->ctx; int ret; /* enforce forwards compatibility on users */ - if (unlikely(req->sqe->flags & ~SQE_VALID_FLAGS)) { + if (unlikely(sqe->flags & ~SQE_VALID_FLAGS)) { ret = -EINVAL; goto err_req; } - ret = io_req_set_file(state, req); + ret = io_req_set_file(state, req, sqe); if (unlikely(ret)) { err_req: io_cqring_add_event(req, ret); @@ -3613,10 +3634,10 @@ err_req: if (*link) { struct io_kiocb *prev = *link; - if (req->sqe->flags & IOSQE_IO_DRAIN) + if (sqe->flags & IOSQE_IO_DRAIN) (*link)->flags |= REQ_F_DRAIN_LINK | REQ_F_IO_DRAIN; - if (req->sqe->flags & IOSQE_IO_HARDLINK) + if (sqe->flags & IOSQE_IO_HARDLINK) req->flags |= REQ_F_HARDLINK; if (io_alloc_async_ctx(req)) { @@ -3624,7 +3645,7 @@ err_req: goto err_req; } - ret = io_req_defer_prep(req); + ret = io_req_defer_prep(req, sqe); if (ret) { /* fail even hard links since we don't submit */ prev->flags |= REQ_F_FAIL_LINK; @@ -3632,15 +3653,18 @@ err_req: } trace_io_uring_link(ctx, req, prev); list_add_tail(&req->link_list, &prev->link_list); - } else if (req->sqe->flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) { + } else if (sqe->flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) { req->flags |= REQ_F_LINK; - if (req->sqe->flags & IOSQE_IO_HARDLINK) + if (sqe->flags & IOSQE_IO_HARDLINK) req->flags |= REQ_F_HARDLINK; INIT_LIST_HEAD(&req->link_list); + ret = io_req_defer_prep(req, sqe); + if (ret) + req->flags |= REQ_F_FAIL_LINK; *link = req; } else { - io_queue_sqe(req); + io_queue_sqe(req, sqe); } return true; @@ -3685,14 +3709,15 @@ static void io_commit_sqring(struct io_ring_ctx *ctx) } /* - * Fetch an sqe, if one is available. Note that req->sqe will point to memory + * Fetch an sqe, if one is available. Note that sqe_ptr will point to memory * that is mapped by userspace. This means that care needs to be taken to * ensure that reads are stable, as we cannot rely on userspace always * being a good citizen. If members of the sqe are validated and then later * used, it's important that those reads are done through READ_ONCE() to * prevent a re-load down the line. */ -static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req) +static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req, + const struct io_uring_sqe **sqe_ptr) { struct io_rings *rings = ctx->rings; u32 *sq_array = ctx->sq_array; @@ -3719,9 +3744,9 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req) * link list. */ req->sequence = ctx->cached_sq_head; - req->sqe = &ctx->sq_sqes[head]; - req->opcode = READ_ONCE(req->sqe->opcode); - req->user_data = READ_ONCE(req->sqe->user_data); + *sqe_ptr = &ctx->sq_sqes[head]; + req->opcode = READ_ONCE((*sqe_ptr)->opcode); + req->user_data = READ_ONCE((*sqe_ptr)->user_data); ctx->cached_sq_head++; return true; } @@ -3753,6 +3778,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, } for (i = 0; i < nr; i++) { + const struct io_uring_sqe *sqe; struct io_kiocb *req; unsigned int sqe_flags; @@ -3762,7 +3788,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, submitted = -EAGAIN; break; } - if (!io_get_sqring(ctx, req)) { + if (!io_get_sqring(ctx, req, &sqe)) { __io_free_req(req); break; } @@ -3776,7 +3802,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, } submitted++; - sqe_flags = req->sqe->flags; + sqe_flags = sqe->flags; req->ring_file = ring_file; req->ring_fd = ring_fd; @@ -3784,7 +3810,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, req->in_async = async; req->needs_fixed_file = async; trace_io_uring_submit_sqe(ctx, req->user_data, true, async); - if (!io_submit_sqe(req, statep, &link)) + if (!io_submit_sqe(req, sqe, statep, &link)) break; /* * If previous wasn't linked and we have a linked command, @@ -4702,7 +4728,7 @@ static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst, if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov))) return -EFAULT; - dst->iov_base = (void __user *) (unsigned long) ciov.iov_base; + dst->iov_base = u64_to_user_ptr((u64)ciov.iov_base); dst->iov_len = ciov.iov_len; return 0; } @@ -5133,6 +5159,12 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, } else if (to_submit) { struct mm_struct *cur_mm; + if (current->mm != ctx->sqo_mm || + current_cred() != ctx->creds) { + ret = -EPERM; + goto out; + } + to_submit = min(to_submit, ctx->sq_entries); mutex_lock(&ctx->uring_lock); /* already have mm, so io_submit_sqes() won't try to grab it */ diff --git a/fs/locks.c b/fs/locks.c index 6970f55daf54..44b6da032842 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2853,7 +2853,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, } if (inode) { /* userspace relies on this representation of dev_t */ - seq_printf(f, "%d %02x:%02x:%ld ", fl_pid, + seq_printf(f, "%d %02x:%02x:%lu ", fl_pid, MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), inode->i_ino); } else { diff --git a/fs/mpage.c b/fs/mpage.c index a63620cdb73a..ccba3c4c4479 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -62,7 +62,7 @@ static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio) { bio->bi_end_io = mpage_end_io; bio_set_op_attrs(bio, op, op_flags); - guard_bio_eod(op, bio); + guard_bio_eod(bio); submit_bio(bio); return NULL; } diff --git a/fs/namei.c b/fs/namei.c index d6c91d1e88cb..d2720dc71d0e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1232,6 +1232,7 @@ static int follow_managed(struct path *path, struct nameidata *nd) BUG_ON(!path->dentry->d_op); BUG_ON(!path->dentry->d_op->d_manage); ret = path->dentry->d_op->d_manage(path, false); + flags = smp_load_acquire(&path->dentry->d_flags); if (ret < 0) break; } @@ -1649,17 +1650,15 @@ again: if (IS_ERR(dentry)) return dentry; if (unlikely(!d_in_lookup(dentry))) { - if (!(flags & LOOKUP_NO_REVAL)) { - int error = d_revalidate(dentry, flags); - if (unlikely(error <= 0)) { - if (!error) { - d_invalidate(dentry); - dput(dentry); - goto again; - } + int error = d_revalidate(dentry, flags); + if (unlikely(error <= 0)) { + if (!error) { + d_invalidate(dentry); dput(dentry); - dentry = ERR_PTR(error); + goto again; } + dput(dentry); + dentry = ERR_PTR(error); } } else { old = inode->i_op->lookup(inode, dentry, flags); @@ -2618,72 +2617,6 @@ int user_path_at_empty(int dfd, const char __user *name, unsigned flags, EXPORT_SYMBOL(user_path_at_empty); /** - * mountpoint_last - look up last component for umount - * @nd: pathwalk nameidata - currently pointing at parent directory of "last" - * - * This is a special lookup_last function just for umount. In this case, we - * need to resolve the path without doing any revalidation. - * - * The nameidata should be the result of doing a LOOKUP_PARENT pathwalk. Since - * mountpoints are always pinned in the dcache, their ancestors are too. Thus, - * in almost all cases, this lookup will be served out of the dcache. The only - * cases where it won't are if nd->last refers to a symlink or the path is - * bogus and it doesn't exist. - * - * Returns: - * -error: if there was an error during lookup. This includes -ENOENT if the - * lookup found a negative dentry. - * - * 0: if we successfully resolved nd->last and found it to not to be a - * symlink that needs to be followed. - * - * 1: if we successfully resolved nd->last and found it to be a symlink - * that needs to be followed. - */ -static int -mountpoint_last(struct nameidata *nd) -{ - int error = 0; - struct dentry *dir = nd->path.dentry; - struct path path; - - /* If we're in rcuwalk, drop out of it to handle last component */ - if (nd->flags & LOOKUP_RCU) { - if (unlazy_walk(nd)) - return -ECHILD; - } - - nd->flags &= ~LOOKUP_PARENT; - - if (unlikely(nd->last_type != LAST_NORM)) { - error = handle_dots(nd, nd->last_type); - if (error) - return error; - path.dentry = dget(nd->path.dentry); - } else { - path.dentry = d_lookup(dir, &nd->last); - if (!path.dentry) { - /* - * No cached dentry. Mounted dentries are pinned in the - * cache, so that means that this dentry is probably - * a symlink or the path doesn't actually point - * to a mounted dentry. - */ - path.dentry = lookup_slow(&nd->last, dir, - nd->flags | LOOKUP_NO_REVAL); - if (IS_ERR(path.dentry)) - return PTR_ERR(path.dentry); - } - } - if (d_flags_negative(smp_load_acquire(&path.dentry->d_flags))) { - dput(path.dentry); - return -ENOENT; - } - path.mnt = nd->path.mnt; - return step_into(nd, &path, 0, d_backing_inode(path.dentry), 0); -} - -/** * path_mountpoint - look up a path to be umounted * @nd: lookup context * @flags: lookup flags @@ -2699,14 +2632,17 @@ path_mountpoint(struct nameidata *nd, unsigned flags, struct path *path) int err; while (!(err = link_path_walk(s, nd)) && - (err = mountpoint_last(nd)) > 0) { + (err = lookup_last(nd)) > 0) { s = trailing_symlink(nd); } + if (!err && (nd->flags & LOOKUP_RCU)) + err = unlazy_walk(nd); + if (!err) + err = handle_lookup_down(nd); if (!err) { *path = nd->path; nd->path.mnt = NULL; nd->path.dentry = NULL; - follow_mount(path); } terminate_walk(nd); return err; diff --git a/fs/namespace.c b/fs/namespace.c index be601d3a8008..5e1bf611a9eb 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1728,7 +1728,7 @@ static bool is_mnt_ns_file(struct dentry *dentry) dentry->d_fsdata == &mntns_operations; } -struct mnt_namespace *to_mnt_ns(struct ns_common *ns) +static struct mnt_namespace *to_mnt_ns(struct ns_common *ns) { return container_of(ns, struct mnt_namespace, ns); } diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index f64a33d2a1d1..2a82dcce5fc1 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -206,7 +206,6 @@ TRACE_DEFINE_ENUM(LOOKUP_AUTOMOUNT); TRACE_DEFINE_ENUM(LOOKUP_PARENT); TRACE_DEFINE_ENUM(LOOKUP_REVAL); TRACE_DEFINE_ENUM(LOOKUP_RCU); -TRACE_DEFINE_ENUM(LOOKUP_NO_REVAL); TRACE_DEFINE_ENUM(LOOKUP_OPEN); TRACE_DEFINE_ENUM(LOOKUP_CREATE); TRACE_DEFINE_ENUM(LOOKUP_EXCL); @@ -224,7 +223,6 @@ TRACE_DEFINE_ENUM(LOOKUP_DOWN); { LOOKUP_PARENT, "PARENT" }, \ { LOOKUP_REVAL, "REVAL" }, \ { LOOKUP_RCU, "RCU" }, \ - { LOOKUP_NO_REVAL, "NO_REVAL" }, \ { LOOKUP_OPEN, "OPEN" }, \ { LOOKUP_CREATE, "CREATE" }, \ { LOOKUP_EXCL, "EXCL" }, \ diff --git a/fs/nsfs.c b/fs/nsfs.c index a0431642c6b5..f75767bd623a 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -3,6 +3,7 @@ #include <linux/pseudo_fs.h> #include <linux/file.h> #include <linux/fs.h> +#include <linux/proc_fs.h> #include <linux/proc_ns.h> #include <linux/magic.h> #include <linux/ktime.h> @@ -11,6 +12,8 @@ #include <linux/nsfs.h> #include <linux/uaccess.h> +#include "internal.h" + static struct vfsmount *nsfs_mnt; static long ns_ioctl(struct file *filp, unsigned int ioctl, diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 1c4c51f3df60..cda1027d0819 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3282,6 +3282,7 @@ static void ocfs2_dlm_init_debug(struct ocfs2_super *osb) debugfs_create_u32("locking_filter", 0600, osb->osb_debug_root, &dlm_debug->d_filter_secs); + ocfs2_get_dlm_debug(dlm_debug); } static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 1afe57f425a0..68ba354cf361 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1066,6 +1066,14 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed) ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num); + if (replayed) { + jbd2_journal_lock_updates(journal->j_journal); + status = jbd2_journal_flush(journal->j_journal); + jbd2_journal_unlock_updates(journal->j_journal); + if (status < 0) + mlog_errno(status); + } + status = ocfs2_journal_toggle_dirty(osb, 1, replayed); if (status < 0) { mlog_errno(status); diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 84ad1c90d535..249672bf54fe 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -631,12 +631,15 @@ EXPORT_SYMBOL_GPL(posix_acl_create); /** * posix_acl_update_mode - update mode in set_acl + * @inode: target inode + * @mode_p: mode (pointer) for update + * @acl: acl pointer * * Update the file mode when setting an ACL: compute the new file permission * bits based on the ACL. In addition, if the ACL is equivalent to the new - * file mode, set *acl to NULL to indicate that no ACL should be set. + * file mode, set *@acl to NULL to indicate that no ACL should be set. * - * As with chmod, clear the setgit bit if the caller is not in the owning group + * As with chmod, clear the setgid bit if the caller is not in the owning group * or capable of CAP_FSETID (see inode_change_ok). * * Called from set_acl inode operations. diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 8caff834f002..013486b5125e 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -407,6 +407,17 @@ static int notrace ramoops_pstore_write(struct pstore_record *record) prz = cxt->dprzs[cxt->dump_write_cnt]; + /* + * Since this is a new crash dump, we need to reset the buffer in + * case it still has an old dump present. Without this, the new dump + * will get appended, which would seriously confuse anything trying + * to check dump file contents. Specifically, ramoops_read_kmsg_hdr() + * expects to find a dump header in the beginning of buffer data, so + * we must to reset the buffer values, in order to ensure that the + * header will be written to the beginning of the buffer. + */ + persistent_ram_zap(prz); + /* Build header and append record contents. */ hlen = ramoops_write_kmsg_hdr(prz, record); if (!hlen) @@ -572,6 +583,7 @@ static int ramoops_init_przs(const char *name, prz_ar[i] = persistent_ram_new(*paddr, zone_sz, sig, &cxt->ecc_info, cxt->memtype, flags, label); + kfree(label); if (IS_ERR(prz_ar[i])) { err = PTR_ERR(prz_ar[i]); dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n", @@ -617,6 +629,7 @@ static int ramoops_init_prz(const char *name, label = kasprintf(GFP_KERNEL, "ramoops:%s", name); *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, cxt->memtype, PRZ_FLAG_ZAP_OLD, label); + kfree(label); if (IS_ERR(*prz)) { int err = PTR_ERR(*prz); diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 8823f65888f0..1f4d8c06f9be 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -574,7 +574,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, /* Initialize general buffer state. */ raw_spin_lock_init(&prz->buffer_lock); prz->flags = flags; - prz->label = label; + prz->label = kstrdup(label, GFP_KERNEL); ret = persistent_ram_buffer_map(start, size, prz, memtype); if (ret) |