diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/backref.c | 2 | ||||
-rw-r--r-- | fs/btrfs/block-group.c | 20 | ||||
-rw-r--r-- | fs/btrfs/discard.h | 2 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 36 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 1 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 13 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 43 | ||||
-rw-r--r-- | fs/ceph/caps.c | 3 | ||||
-rw-r--r-- | fs/ceph/debugfs.c | 2 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 8 | ||||
-rw-r--r-- | fs/ceph/quota.c | 4 | ||||
-rw-r--r-- | fs/configfs/dir.c | 1 | ||||
-rw-r--r-- | fs/coredump.c | 8 | ||||
-rw-r--r-- | fs/eventpoll.c | 61 | ||||
-rw-r--r-- | fs/io_uring.c | 123 | ||||
-rw-r--r-- | fs/ioctl.c | 8 | ||||
-rw-r--r-- | fs/iomap/fiemap.c | 5 | ||||
-rw-r--r-- | fs/nfs/nfs3acl.c | 22 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 11 | ||||
-rw-r--r-- | fs/nfs/nfs4state.c | 2 | ||||
-rw-r--r-- | fs/nfs/pnfs.c | 11 | ||||
-rw-r--r-- | fs/nfs/super.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/dlmfs/dlmfs.c | 27 | ||||
-rw-r--r-- | fs/pnode.c | 9 | ||||
-rw-r--r-- | fs/splice.c | 45 | ||||
-rw-r--r-- | fs/super.c | 2 | ||||
-rw-r--r-- | fs/vboxsf/super.c | 2 |
27 files changed, 279 insertions, 194 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 9c380e7edf62..0cc02577577b 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -391,7 +391,7 @@ static int is_shared_data_backref(struct preftrees *preftrees, u64 bytenr) struct rb_node **p = &preftrees->direct.root.rb_root.rb_node; struct rb_node *parent = NULL; struct prelim_ref *ref = NULL; - struct prelim_ref target = {0}; + struct prelim_ref target = {}; int result; target.parent = bytenr; diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 47f66c6a7d7f..696f47103cfc 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -916,7 +916,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; - goto out; + goto out_put_group; } /* @@ -954,7 +954,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = btrfs_orphan_add(trans, BTRFS_I(inode)); if (ret) { btrfs_add_delayed_iput(inode); - goto out; + goto out_put_group; } clear_nlink(inode); /* One for the block groups ref */ @@ -977,13 +977,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); if (ret < 0) - goto out; + goto out_put_group; if (ret > 0) btrfs_release_path(path); if (ret == 0) { ret = btrfs_del_item(trans, tree_root, path); if (ret) - goto out; + goto out_put_group; btrfs_release_path(path); } @@ -1102,9 +1102,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = remove_block_group_free_space(trans, block_group); if (ret) - goto out; + goto out_put_group; - btrfs_put_block_group(block_group); + /* Once for the block groups rbtree */ btrfs_put_block_group(block_group); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); @@ -1127,6 +1127,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, /* once for the tree */ free_extent_map(em); } + +out_put_group: + /* Once for the lookup reference */ + btrfs_put_block_group(block_group); out: if (remove_rsv) btrfs_delayed_refs_rsv_release(fs_info, 1); @@ -1288,11 +1292,15 @@ static bool clean_pinned_extents(struct btrfs_trans_handle *trans, if (ret) goto err; mutex_unlock(&fs_info->unused_bg_unpin_mutex); + if (prev_trans) + btrfs_put_transaction(prev_trans); return true; err: mutex_unlock(&fs_info->unused_bg_unpin_mutex); + if (prev_trans) + btrfs_put_transaction(prev_trans); btrfs_dec_block_group_ro(bg); return false; } diff --git a/fs/btrfs/discard.h b/fs/btrfs/discard.h index 21a15776dac4..353228d62f5a 100644 --- a/fs/btrfs/discard.h +++ b/fs/btrfs/discard.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef BTRFS_DISCARD_H #define BTRFS_DISCARD_H diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a6cb5cbbdb9f..d10c7be10f3b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2036,9 +2036,6 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info) for (i = 0; i < ret; i++) btrfs_drop_and_free_fs_root(fs_info, gang[i]); } - - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) - btrfs_free_log_root_tree(NULL, fs_info); } static void btrfs_init_scrub(struct btrfs_fs_info *fs_info) @@ -3888,7 +3885,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, spin_unlock(&fs_info->fs_roots_radix_lock); if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { - btrfs_free_log(NULL, root); + ASSERT(root->log_root == NULL); if (root->reloc_root) { btrfs_put_root(root->reloc_root); root->reloc_root = NULL; @@ -4211,6 +4208,36 @@ static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info) up_write(&fs_info->cleanup_work_sem); } +static void btrfs_drop_all_logs(struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *gang[8]; + u64 root_objectid = 0; + int ret; + + spin_lock(&fs_info->fs_roots_radix_lock); + while ((ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, + (void **)gang, root_objectid, + ARRAY_SIZE(gang))) != 0) { + int i; + + for (i = 0; i < ret; i++) + gang[i] = btrfs_grab_root(gang[i]); + spin_unlock(&fs_info->fs_roots_radix_lock); + + for (i = 0; i < ret; i++) { + if (!gang[i]) + continue; + root_objectid = gang[i]->root_key.objectid; + btrfs_free_log(NULL, gang[i]); + btrfs_put_root(gang[i]); + } + root_objectid++; + spin_lock(&fs_info->fs_roots_radix_lock); + } + spin_unlock(&fs_info->fs_roots_radix_lock); + btrfs_free_log_root_tree(NULL, fs_info); +} + static void btrfs_destroy_ordered_extents(struct btrfs_root *root) { struct btrfs_ordered_extent *ordered; @@ -4603,6 +4630,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info) btrfs_destroy_delayed_inodes(fs_info); btrfs_assert_delayed_root_empty(fs_info); btrfs_destroy_all_delalloc_inodes(fs_info); + btrfs_drop_all_logs(fs_info); mutex_unlock(&fs_info->transaction_kthread_mutex); return 0; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index d35936c934ab..03bc7134e8cb 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4559,6 +4559,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) if (IS_ERR(fs_root)) { err = PTR_ERR(fs_root); list_add_tail(&reloc_root->root_list, &reloc_roots); + btrfs_end_transaction(trans); goto out_unset; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8cede6eb9843..2d5498136e5e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -662,10 +662,19 @@ again: } got_it: - btrfs_record_root_in_trans(h, root); - if (!current->journal_info) current->journal_info = h; + + /* + * btrfs_record_root_in_trans() needs to alloc new extents, and may + * call btrfs_join_transaction() while we're also starting a + * transaction. + * + * Thus it need to be called after current->journal_info initialized, + * or we can deadlock. + */ + btrfs_record_root_in_trans(h, root); + return h; join_fail: diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ec36a7c6ba3d..02ebdd9edc19 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4226,6 +4226,9 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, const u64 ino = btrfs_ino(inode); struct btrfs_path *dst_path = NULL; bool dropped_extents = false; + u64 truncate_offset = i_size; + struct extent_buffer *leaf; + int slot; int ins_nr = 0; int start_slot; int ret; @@ -4240,9 +4243,43 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, if (ret < 0) goto out; + /* + * We must check if there is a prealloc extent that starts before the + * i_size and crosses the i_size boundary. This is to ensure later we + * truncate down to the end of that extent and not to the i_size, as + * otherwise we end up losing part of the prealloc extent after a log + * replay and with an implicit hole if there is another prealloc extent + * that starts at an offset beyond i_size. + */ + ret = btrfs_previous_item(root, path, ino, BTRFS_EXTENT_DATA_KEY); + if (ret < 0) + goto out; + + if (ret == 0) { + struct btrfs_file_extent_item *ei; + + leaf = path->nodes[0]; + slot = path->slots[0]; + ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + + if (btrfs_file_extent_type(leaf, ei) == + BTRFS_FILE_EXTENT_PREALLOC) { + u64 extent_end; + + btrfs_item_key_to_cpu(leaf, &key, slot); + extent_end = key.offset + + btrfs_file_extent_num_bytes(leaf, ei); + + if (extent_end > i_size) + truncate_offset = extent_end; + } + } else { + ret = 0; + } + while (true) { - struct extent_buffer *leaf = path->nodes[0]; - int slot = path->slots[0]; + leaf = path->nodes[0]; + slot = path->slots[0]; if (slot >= btrfs_header_nritems(leaf)) { if (ins_nr > 0) { @@ -4280,7 +4317,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, ret = btrfs_truncate_inode_items(trans, root->log_root, &inode->vfs_inode, - i_size, + truncate_offset, BTRFS_EXTENT_DATA_KEY); } while (ret == -EAGAIN); if (ret) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 185db76300b3..5f3aa4d607de 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2749,7 +2749,7 @@ int ceph_try_get_caps(struct inode *inode, int need, int want, ret = try_get_cap_refs(inode, need, want, 0, flags, got); /* three special error codes */ - if (ret == -EAGAIN || ret == -EFBIG || ret == -EAGAIN) + if (ret == -EAGAIN || ret == -EFBIG || ret == -ESTALE) ret = 0; return ret; } @@ -3746,6 +3746,7 @@ retry: WARN_ON(1); tsession = NULL; target = -1; + mutex_lock(&session->s_mutex); } goto retry; diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 481ac97b4d25..dcaed75de9e6 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -271,7 +271,7 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) &congestion_kb_fops); snprintf(name, sizeof(name), "../../bdi/%s", - dev_name(fsc->sb->s_bdi->dev)); + bdi_dev_name(fsc->sb->s_bdi)); fsc->debugfs_bdi = debugfs_create_symlink("bdi", fsc->client->debugfs_dir, diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 486f91f9685b..7c63abf5bea9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3251,8 +3251,7 @@ static void handle_session(struct ceph_mds_session *session, void *end = p + msg->front.iov_len; struct ceph_mds_session_head *h; u32 op; - u64 seq; - unsigned long features = 0; + u64 seq, features = 0; int wake = 0; bool blacklisted = false; @@ -3271,9 +3270,8 @@ static void handle_session(struct ceph_mds_session *session, goto bad; /* version >= 3, feature bits */ ceph_decode_32_safe(&p, end, len, bad); - ceph_decode_need(&p, end, len, bad); - memcpy(&features, p, min_t(size_t, len, sizeof(features))); - p += len; + ceph_decode_64_safe(&p, end, features, bad); + p += len - sizeof(features); } mutex_lock(&mdsc->mutex); diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index de56dee60540..19507e2fdb57 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -159,8 +159,8 @@ static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc, } if (IS_ERR(in)) { - pr_warn("Can't lookup inode %llx (err: %ld)\n", - realm->ino, PTR_ERR(in)); + dout("Can't lookup inode %llx (err: %ld)\n", + realm->ino, PTR_ERR(in)); qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */ } else { qri->timeout = 0; diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index cf7b7e1d5bd7..cb733652ecca 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1519,6 +1519,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) spin_lock(&configfs_dirent_lock); configfs_detach_rollback(dentry); spin_unlock(&configfs_dirent_lock); + config_item_put(parent_item); return -EINTR; } frag->frag_dead = true; diff --git a/fs/coredump.c b/fs/coredump.c index 408418e6aa13..478a0d810136 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -788,6 +788,14 @@ void do_coredump(const kernel_siginfo_t *siginfo) if (displaced) put_files_struct(displaced); if (!dump_interrupted()) { + /* + * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would + * have this set to NULL. + */ + if (!cprm.file) { + pr_info("Core dump to |%s disabled\n", cn.corename); + goto close_fail; + } file_start_write(cprm.file); core_dumped = binfmt->core_dump(&cprm); file_end_write(cprm.file); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 8c596641a72b..aba03ee749f8 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1171,6 +1171,10 @@ static inline bool chain_epi_lockless(struct epitem *epi) { struct eventpoll *ep = epi->ep; + /* Fast preliminary check */ + if (epi->next != EP_UNACTIVE_PTR) + return false; + /* Check that the same epi has not been just chained from another CPU */ if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR) return false; @@ -1237,16 +1241,12 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v * chained in ep->ovflist and requeued later on. */ if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) { - if (epi->next == EP_UNACTIVE_PTR && - chain_epi_lockless(epi)) + if (chain_epi_lockless(epi)) + ep_pm_stay_awake_rcu(epi); + } else if (!ep_is_linked(epi)) { + /* In the usual case, add event to ready list. */ + if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) ep_pm_stay_awake_rcu(epi); - goto out_unlock; - } - - /* If this file is already in the ready list we exit soon */ - if (!ep_is_linked(epi) && - list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) { - ep_pm_stay_awake_rcu(epi); } /* @@ -1822,7 +1822,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, { int res = 0, eavail, timed_out = 0; u64 slack = 0; - bool waiter = false; wait_queue_entry_t wait; ktime_t expires, *to = NULL; @@ -1867,21 +1866,23 @@ fetch_events: */ ep_reset_busy_poll_napi_id(ep); - /* - * We don't have any available event to return to the caller. We need - * to sleep here, and we will be woken by ep_poll_callback() when events - * become available. - */ - if (!waiter) { - waiter = true; - init_waitqueue_entry(&wait, current); - + do { + /* + * Internally init_wait() uses autoremove_wake_function(), + * thus wait entry is removed from the wait queue on each + * wakeup. Why it is important? In case of several waiters + * each new wakeup will hit the next waiter, giving it the + * chance to harvest new event. Otherwise wakeup can be + * lost. This is also good performance-wise, because on + * normal wakeup path no need to call __remove_wait_queue() + * explicitly, thus ep->lock is not taken, which halts the + * event delivery. + */ + init_wait(&wait); write_lock_irq(&ep->lock); __add_wait_queue_exclusive(&ep->wq, &wait); write_unlock_irq(&ep->lock); - } - for (;;) { /* * We don't want to sleep if the ep_poll_callback() sends us * a wakeup in between. That's why we set the task state @@ -1911,10 +1912,20 @@ fetch_events: timed_out = 1; break; } - } + + /* We were woken up, thus go and try to harvest some events */ + eavail = 1; + + } while (0); __set_current_state(TASK_RUNNING); + if (!list_empty_careful(&wait.entry)) { + write_lock_irq(&ep->lock); + __remove_wait_queue(&ep->wq, &wait); + write_unlock_irq(&ep->lock); + } + send_events: /* * Try to transfer events to user space. In case we get 0 events and @@ -1925,12 +1936,6 @@ send_events: !(res = ep_send_events(ep, events, maxevents)) && !timed_out) goto fetch_events; - if (waiter) { - write_lock_irq(&ep->lock); - __remove_wait_queue(&ep->wq, &wait); - write_unlock_irq(&ep->lock); - } - return res; } diff --git a/fs/io_uring.c b/fs/io_uring.c index c687f57fb651..979d9f977409 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -524,6 +524,7 @@ enum { REQ_F_OVERFLOW_BIT, REQ_F_POLLED_BIT, REQ_F_BUFFER_SELECTED_BIT, + REQ_F_NO_FILE_TABLE_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -577,6 +578,8 @@ enum { REQ_F_POLLED = BIT(REQ_F_POLLED_BIT), /* buffer already selected */ REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), + /* doesn't need file table for this request */ + REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT), }; struct async_poll { @@ -677,8 +680,6 @@ struct io_op_def { unsigned needs_mm : 1; /* needs req->file assigned */ unsigned needs_file : 1; - /* needs req->file assigned IFF fd is >= 0 */ - unsigned fd_non_neg : 1; /* hash wq insertion if file is a regular file */ unsigned hash_reg_file : 1; /* unbound wq insertion if file is a non-regular file */ @@ -781,8 +782,6 @@ static const struct io_op_def io_op_defs[] = { .needs_file = 1, }, [IORING_OP_OPENAT] = { - .needs_file = 1, - .fd_non_neg = 1, .file_table = 1, .needs_fs = 1, }, @@ -796,9 +795,8 @@ static const struct io_op_def io_op_defs[] = { }, [IORING_OP_STATX] = { .needs_mm = 1, - .needs_file = 1, - .fd_non_neg = 1, .needs_fs = 1, + .file_table = 1, }, [IORING_OP_READ] = { .needs_mm = 1, @@ -833,8 +831,6 @@ static const struct io_op_def io_op_defs[] = { .buffer_select = 1, }, [IORING_OP_OPENAT2] = { - .needs_file = 1, - .fd_non_neg = 1, .file_table = 1, .needs_fs = 1, }, @@ -1291,7 +1287,7 @@ static struct io_kiocb *io_get_fallback_req(struct io_ring_ctx *ctx) struct io_kiocb *req; req = ctx->fallback_req; - if (!test_and_set_bit_lock(0, (unsigned long *) ctx->fallback_req)) + if (!test_and_set_bit_lock(0, (unsigned long *) &ctx->fallback_req)) return req; return NULL; @@ -1378,7 +1374,7 @@ static void __io_free_req(struct io_kiocb *req) if (likely(!io_is_fallback_req(req))) kmem_cache_free(req_cachep, req); else - clear_bit_unlock(0, (unsigned long *) req->ctx->fallback_req); + clear_bit_unlock(0, (unsigned long *) &req->ctx->fallback_req); } struct req_batch { @@ -2034,7 +2030,7 @@ static struct file *__io_file_get(struct io_submit_state *state, int fd) * any file. For now, just ensure that anything potentially problematic is done * inline. */ -static bool io_file_supports_async(struct file *file) +static bool io_file_supports_async(struct file *file, int rw) { umode_t mode = file_inode(file)->i_mode; @@ -2043,7 +2039,13 @@ static bool io_file_supports_async(struct file *file) if (S_ISREG(mode) && file->f_op != &io_uring_fops) return true; - return false; + if (!(file->f_mode & FMODE_NOWAIT)) + return false; + + if (rw == READ) + return file->f_op->read_iter != NULL; + + return file->f_op->write_iter != NULL; } static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, @@ -2571,7 +2573,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so * we know to async punt it even if it was opened O_NONBLOCK */ - if (force_nonblock && !io_file_supports_async(req->file)) + if (force_nonblock && !io_file_supports_async(req->file, READ)) goto copy_iov; iov_count = iov_iter_count(&iter); @@ -2594,7 +2596,8 @@ copy_iov: if (ret) goto out_free; /* any defer here is final, must blocking retry */ - if (!(req->flags & REQ_F_NOWAIT)) + if (!(req->flags & REQ_F_NOWAIT) && + !file_can_poll(req->file)) req->flags |= REQ_F_MUST_PUNT; return -EAGAIN; } @@ -2662,7 +2665,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock) * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so * we know to async punt it even if it was opened O_NONBLOCK */ - if (force_nonblock && !io_file_supports_async(req->file)) + if (force_nonblock && !io_file_supports_async(req->file, WRITE)) goto copy_iov; /* file path doesn't support NOWAIT for non-direct_IO */ @@ -2716,7 +2719,8 @@ copy_iov: if (ret) goto out_free; /* any defer here is final, must blocking retry */ - req->flags |= REQ_F_MUST_PUNT; + if (!file_can_poll(req->file)) + req->flags |= REQ_F_MUST_PUNT; return -EAGAIN; } } @@ -2756,15 +2760,6 @@ static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; } -static bool io_splice_punt(struct file *file) -{ - if (get_pipe_info(file)) - return false; - if (!io_file_supports_async(file)) - return true; - return !(file->f_flags & O_NONBLOCK); -} - static int io_splice(struct io_kiocb *req, bool force_nonblock) { struct io_splice *sp = &req->splice; @@ -2774,11 +2769,8 @@ static int io_splice(struct io_kiocb *req, bool force_nonblock) loff_t *poff_in, *poff_out; long ret; - if (force_nonblock) { - if (io_splice_punt(in) || io_splice_punt(out)) - return -EAGAIN; - flags |= SPLICE_F_NONBLOCK; - } + if (force_nonblock) + return -EAGAIN; poff_in = (sp->off_in == -1) ? NULL : &sp->off_in; poff_out = (sp->off_out == -1) ? NULL : &sp->off_out; @@ -3355,8 +3347,12 @@ static int io_statx(struct io_kiocb *req, bool force_nonblock) struct kstat stat; int ret; - if (force_nonblock) + if (force_nonblock) { + /* only need file table for an actual valid fd */ + if (ctx->dfd == -1 || ctx->dfd == AT_FDCWD) + req->flags |= REQ_F_NO_FILE_TABLE; return -EAGAIN; + } if (vfs_stat_set_lookup_flags(&lookup_flags, ctx->how.flags)) return -EINVAL; @@ -3502,7 +3498,7 @@ static void io_sync_file_range_finish(struct io_wq_work **workptr) if (io_req_cancelled(req)) return; __io_sync_file_range(req); - io_put_req(req); /* put submission ref */ + io_steal_work(req, workptr); } static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock) @@ -5015,7 +5011,7 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe) int ret; /* Still need defer if there is pending req in defer list. */ - if (!req_need_defer(req) && list_empty(&ctx->defer_list)) + if (!req_need_defer(req) && list_empty_careful(&ctx->defer_list)) return 0; if (!req->io && io_alloc_async_ctx(req)) @@ -5364,15 +5360,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr) io_steal_work(req, workptr); } -static int io_req_needs_file(struct io_kiocb *req, int fd) -{ - if (!io_op_defs[req->opcode].needs_file) - return 0; - if ((fd == -1 || fd == AT_FDCWD) && io_op_defs[req->opcode].fd_non_neg) - return 0; - return 1; -} - static inline struct file *io_file_from_index(struct io_ring_ctx *ctx, int index) { @@ -5410,14 +5397,11 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req, } static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req, - int fd, unsigned int flags) + int fd) { bool fixed; - if (!io_req_needs_file(req, fd)) - return 0; - - fixed = (flags & IOSQE_FIXED_FILE); + fixed = (req->flags & REQ_F_FIXED_FILE) != 0; if (unlikely(!fixed && req->needs_fixed_file)) return -EBADF; @@ -5429,7 +5413,7 @@ static int io_grab_files(struct io_kiocb *req) int ret = -EBADF; struct io_ring_ctx *ctx = req->ctx; - if (req->work.files) + if (req->work.files || (req->flags & REQ_F_NO_FILE_TABLE)) return 0; if (!ctx->ring_file) return -EBADF; @@ -5794,7 +5778,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, struct io_submit_state *state, bool async) { unsigned int sqe_flags; - int id, fd; + int id; /* * All io need record the previous position, if LINK vs DARIN, @@ -5846,8 +5830,10 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, IOSQE_ASYNC | IOSQE_FIXED_FILE | IOSQE_BUFFER_SELECT | IOSQE_IO_LINK); - fd = READ_ONCE(sqe->fd); - return io_req_set_file(state, req, fd, sqe_flags); + if (!io_op_defs[req->opcode].needs_file) + return 0; + + return io_req_set_file(state, req, READ_ONCE(sqe->fd)); } static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, @@ -7327,7 +7313,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) * it could cause shutdown to hang. */ while (ctx->sqo_thread && !wq_has_sleeper(&ctx->sqo_wait)) - cpu_relax(); + cond_resched(); io_kill_timeouts(ctx); io_poll_remove_all(ctx); @@ -7356,11 +7342,9 @@ static int io_uring_release(struct inode *inode, struct file *file) static void io_uring_cancel_files(struct io_ring_ctx *ctx, struct files_struct *files) { - struct io_kiocb *req; - DEFINE_WAIT(wait); - while (!list_empty_careful(&ctx->inflight_list)) { - struct io_kiocb *cancel_req = NULL; + struct io_kiocb *cancel_req = NULL, *req; + DEFINE_WAIT(wait); spin_lock_irq(&ctx->inflight_lock); list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { @@ -7400,6 +7384,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, */ if (refcount_sub_and_test(2, &cancel_req->refs)) { io_put_req(cancel_req); + finish_wait(&ctx->inflight_wait, &wait); continue; } } @@ -7407,8 +7392,8 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, io_wq_cancel_work(ctx->io_wq, &cancel_req->work); io_put_req(cancel_req); schedule(); + finish_wait(&ctx->inflight_wait, &wait); } - finish_wait(&ctx->inflight_wait, &wait); } static int io_uring_flush(struct file *file, void *data) @@ -7757,7 +7742,8 @@ err: return ret; } -static int io_uring_create(unsigned entries, struct io_uring_params *p) +static int io_uring_create(unsigned entries, struct io_uring_params *p, + struct io_uring_params __user *params) { struct user_struct *user = NULL; struct io_ring_ctx *ctx; @@ -7849,6 +7835,14 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p) p->cq_off.overflow = offsetof(struct io_rings, cq_overflow); p->cq_off.cqes = offsetof(struct io_rings, cqes); + p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | + IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS | + IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL; + + if (copy_to_user(params, p, sizeof(*p))) { + ret = -EFAULT; + goto err; + } /* * Install ring fd as the very last thing, so we don't risk someone * having closed it before we finish setup @@ -7857,9 +7851,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p) if (ret < 0) goto err; - p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | - IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS | - IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL; trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); return ret; err: @@ -7875,7 +7866,6 @@ err: static long io_uring_setup(u32 entries, struct io_uring_params __user *params) { struct io_uring_params p; - long ret; int i; if (copy_from_user(&p, params, sizeof(p))) @@ -7890,14 +7880,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params) IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ)) return -EINVAL; - ret = io_uring_create(entries, &p); - if (ret < 0) - return ret; - - if (copy_to_user(params, &p, sizeof(p))) - return -EFAULT; - - return ret; + return io_uring_create(entries, &p, params); } SYSCALL_DEFINE2(io_uring_setup, u32, entries, diff --git a/fs/ioctl.c b/fs/ioctl.c index 282d45be6f45..5e80b40bc1b5 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -55,6 +55,7 @@ EXPORT_SYMBOL(vfs_ioctl); static int ioctl_fibmap(struct file *filp, int __user *p) { struct inode *inode = file_inode(filp); + struct super_block *sb = inode->i_sb; int error, ur_block; sector_t block; @@ -71,6 +72,13 @@ static int ioctl_fibmap(struct file *filp, int __user *p) block = ur_block; error = bmap(inode, &block); + if (block > INT_MAX) { + error = -ERANGE; + pr_warn_ratelimited("[%s/%d] FS: %s File: %pD4 would truncate fibmap result\n", + current->comm, task_pid_nr(current), + sb->s_id, filp); + } + if (error) ur_block = 0; else diff --git a/fs/iomap/fiemap.c b/fs/iomap/fiemap.c index bccf305ea9ce..d55e8f491a5e 100644 --- a/fs/iomap/fiemap.c +++ b/fs/iomap/fiemap.c @@ -117,10 +117,7 @@ iomap_bmap_actor(struct inode *inode, loff_t pos, loff_t length, if (iomap->type == IOMAP_MAPPED) { addr = (pos - iomap->offset + iomap->addr) >> inode->i_blkbits; - if (addr > INT_MAX) - WARN(1, "would truncate bmap result\n"); - else - *bno = addr; + *bno = addr; } return 0; } diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index c5c3fc6e6c60..26c94b32d6f4 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -253,37 +253,45 @@ int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type) { - struct posix_acl *alloc = NULL, *dfacl = NULL; + struct posix_acl *orig = acl, *dfacl = NULL, *alloc; int status; if (S_ISDIR(inode->i_mode)) { switch(type) { case ACL_TYPE_ACCESS: - alloc = dfacl = get_acl(inode, ACL_TYPE_DEFAULT); + alloc = get_acl(inode, ACL_TYPE_DEFAULT); if (IS_ERR(alloc)) goto fail; + dfacl = alloc; break; case ACL_TYPE_DEFAULT: - dfacl = acl; - alloc = acl = get_acl(inode, ACL_TYPE_ACCESS); + alloc = get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(alloc)) goto fail; + dfacl = acl; + acl = alloc; break; } } if (acl == NULL) { - alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + alloc = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); if (IS_ERR(alloc)) goto fail; + acl = alloc; } status = __nfs3_proc_setacls(inode, acl, dfacl); - posix_acl_release(alloc); +out: + if (acl != orig) + posix_acl_release(acl); + if (dfacl != orig) + posix_acl_release(dfacl); return status; fail: - return PTR_ERR(alloc); + status = PTR_ERR(alloc); + goto out; } const struct xattr_handler *nfs3_xattr_handlers[] = { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 512afb1c7867..a0c1e653a935 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7891,6 +7891,7 @@ static void nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) { struct nfs41_bind_conn_to_session_args *args = task->tk_msg.rpc_argp; + struct nfs41_bind_conn_to_session_res *res = task->tk_msg.rpc_resp; struct nfs_client *clp = args->client; switch (task->tk_status) { @@ -7899,6 +7900,12 @@ nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); } + if (args->dir == NFS4_CDFC4_FORE_OR_BOTH && + res->dir != NFS4_CDFS4_BOTH) { + rpc_task_close_connection(task); + if (args->retries++ < MAX_BIND_CONN_TO_SESSION_RETRIES) + rpc_restart_call(task); + } } static const struct rpc_call_ops nfs4_bind_one_conn_to_session_ops = { @@ -7921,6 +7928,7 @@ int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt, struct nfs41_bind_conn_to_session_args args = { .client = clp, .dir = NFS4_CDFC4_FORE_OR_BOTH, + .retries = 0, }; struct nfs41_bind_conn_to_session_res res; struct rpc_message msg = { @@ -9191,8 +9199,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0); task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) - return ERR_CAST(task); + status = rpc_wait_for_completion_task(task); if (status != 0) goto out; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index ac93715c05a4..a8dc25ce48bb 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -734,9 +734,9 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner) state = new; state->owner = owner; atomic_inc(&owner->so_count); - list_add_rcu(&state->inode_states, &nfsi->open_states); ihold(inode); state->inode = inode; + list_add_rcu(&state->inode_states, &nfsi->open_states); spin_unlock(&inode->i_lock); /* Note: The reclaim code dictates that we add stateless * and read-only stateids to the end of the list */ diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b8d78f393365..dd2e14f5875d 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1332,13 +1332,15 @@ _pnfs_return_layout(struct inode *ino) !valid_layout) { spin_unlock(&ino->i_lock); dprintk("NFS: %s no layout segments to return\n", __func__); - goto out_put_layout_hdr; + goto out_wait_layoutreturn; } send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, NULL); spin_unlock(&ino->i_lock); if (send) status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY, true); +out_wait_layoutreturn: + wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, TASK_UNINTERRUPTIBLE); out_put_layout_hdr: pnfs_free_lseg_list(&tmp_list); pnfs_put_layout_hdr(lo); @@ -1456,18 +1458,15 @@ retry: /* lo ref dropped in pnfs_roc_release() */ layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &lc_cred, &iomode); /* If the creds don't match, we can't compound the layoutreturn */ - if (!layoutreturn) + if (!layoutreturn || cred_fscmp(cred, lc_cred) != 0) goto out_noroc; - if (cred_fscmp(cred, lc_cred) != 0) - goto out_noroc_put_cred; roc = layoutreturn; pnfs_init_layoutreturn_args(args, lo, &stateid, iomode); res->lrs_present = 0; layoutreturn = false; - -out_noroc_put_cred: put_cred(lc_cred); + out_noroc: spin_unlock(&ino->i_lock); rcu_read_unlock(); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index cc34aa3a8ba4..7a70287f21a2 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -185,7 +185,7 @@ static int __nfs_list_for_each_server(struct list_head *head, rcu_read_lock(); list_for_each_entry_rcu(server, head, client_link) { - if (!nfs_sb_active(server->super)) + if (!(server->super && nfs_sb_active(server->super))) continue; rcu_read_unlock(); if (last) diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 8e4f1ace467c..1de77f1a600b 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -275,7 +275,6 @@ static ssize_t dlmfs_file_write(struct file *filp, loff_t *ppos) { int bytes_left; - ssize_t writelen; char *lvb_buf; struct inode *inode = file_inode(filp); @@ -285,32 +284,30 @@ static ssize_t dlmfs_file_write(struct file *filp, if (*ppos >= i_size_read(inode)) return -ENOSPC; + /* don't write past the lvb */ + if (count > i_size_read(inode) - *ppos) + count = i_size_read(inode) - *ppos; + if (!count) return 0; if (!access_ok(buf, count)) return -EFAULT; - /* don't write past the lvb */ - if ((count + *ppos) > i_size_read(inode)) - writelen = i_size_read(inode) - *ppos; - else - writelen = count - *ppos; - - lvb_buf = kmalloc(writelen, GFP_NOFS); + lvb_buf = kmalloc(count, GFP_NOFS); if (!lvb_buf) return -ENOMEM; - bytes_left = copy_from_user(lvb_buf, buf, writelen); - writelen -= bytes_left; - if (writelen) - user_dlm_write_lvb(inode, lvb_buf, writelen); + bytes_left = copy_from_user(lvb_buf, buf, count); + count -= bytes_left; + if (count) + user_dlm_write_lvb(inode, lvb_buf, count); kfree(lvb_buf); - *ppos = *ppos + writelen; - mlog(0, "wrote %zd bytes\n", writelen); - return writelen; + *ppos = *ppos + count; + mlog(0, "wrote %zu bytes\n", count); + return count; } static void dlmfs_init_once(void *foo) diff --git a/fs/pnode.c b/fs/pnode.c index 49f6d7ff2139..1106137c747a 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -261,14 +261,13 @@ static int propagate_one(struct mount *m) child = copy_tree(last_source, last_source->mnt.mnt_root, type); if (IS_ERR(child)) return PTR_ERR(child); + read_seqlock_excl(&mount_lock); mnt_set_mountpoint(m, mp, child); + if (m->mnt_master != dest_master) + SET_MNT_MARK(m->mnt_master); + read_sequnlock_excl(&mount_lock); last_dest = m; last_source = child; - if (m->mnt_master != dest_master) { - read_seqlock_excl(&mount_lock); - SET_MNT_MARK(m->mnt_master); - read_sequnlock_excl(&mount_lock); - } hlist_add_head(&child->mnt_hash, list); return count_mounts(m->mnt_ns, child); } diff --git a/fs/splice.c b/fs/splice.c index 4735defc46ee..fd0a1e7e5959 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1118,6 +1118,10 @@ long do_splice(struct file *in, loff_t __user *off_in, loff_t offset; long ret; + if (unlikely(!(in->f_mode & FMODE_READ) || + !(out->f_mode & FMODE_WRITE))) + return -EBADF; + ipipe = get_pipe_info(in); opipe = get_pipe_info(out); @@ -1125,12 +1129,6 @@ long do_splice(struct file *in, loff_t __user *off_in, if (off_in || off_out) return -ESPIPE; - if (!(in->f_mode & FMODE_READ)) - return -EBADF; - - if (!(out->f_mode & FMODE_WRITE)) - return -EBADF; - /* Splicing to self would be fun, but... */ if (ipipe == opipe) return -EINVAL; @@ -1153,9 +1151,6 @@ long do_splice(struct file *in, loff_t __user *off_in, offset = out->f_pos; } - if (unlikely(!(out->f_mode & FMODE_WRITE))) - return -EBADF; - if (unlikely(out->f_flags & O_APPEND)) return -EINVAL; @@ -1440,15 +1435,11 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, error = -EBADF; in = fdget(fd_in); if (in.file) { - if (in.file->f_mode & FMODE_READ) { - out = fdget(fd_out); - if (out.file) { - if (out.file->f_mode & FMODE_WRITE) - error = do_splice(in.file, off_in, - out.file, off_out, - len, flags); - fdput(out); - } + out = fdget(fd_out); + if (out.file) { + error = do_splice(in.file, off_in, out.file, off_out, + len, flags); + fdput(out); } fdput(in); } @@ -1770,6 +1761,10 @@ static long do_tee(struct file *in, struct file *out, size_t len, struct pipe_inode_info *opipe = get_pipe_info(out); int ret = -EINVAL; + if (unlikely(!(in->f_mode & FMODE_READ) || + !(out->f_mode & FMODE_WRITE))) + return -EBADF; + /* * Duplicate the contents of ipipe to opipe without actually * copying the data. @@ -1795,7 +1790,7 @@ static long do_tee(struct file *in, struct file *out, size_t len, SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) { - struct fd in; + struct fd in, out; int error; if (unlikely(flags & ~SPLICE_F_ALL)) @@ -1807,14 +1802,10 @@ SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) error = -EBADF; in = fdget(fdin); if (in.file) { - if (in.file->f_mode & FMODE_READ) { - struct fd out = fdget(fdout); - if (out.file) { - if (out.file->f_mode & FMODE_WRITE) - error = do_tee(in.file, out.file, - len, flags); - fdput(out); - } + out = fdget(fdout); + if (out.file) { + error = do_tee(in.file, out.file, len, flags); + fdput(out); } fdput(in); } diff --git a/fs/super.c b/fs/super.c index cd352530eca9..a288cd60d2ae 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1302,8 +1302,8 @@ int get_tree_bdev(struct fs_context *fc, mutex_lock(&bdev->bd_fsfreeze_mutex); if (bdev->bd_fsfreeze_count > 0) { mutex_unlock(&bdev->bd_fsfreeze_mutex); - blkdev_put(bdev, mode); warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev); + blkdev_put(bdev, mode); return -EBUSY; } diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index 675e26989376..8fe03b4a0d2b 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -164,7 +164,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) goto fail_free; } - err = super_setup_bdi_name(sb, "vboxsf-%s.%d", fc->source, sbi->bdi_id); + err = super_setup_bdi_name(sb, "vboxsf-%d", sbi->bdi_id); if (err) goto fail_free; |