summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c186
-rw-r--r--fs/btrfs/delalloc-space.c12
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/extent_io.c14
-rw-r--r--fs/btrfs/ioctl.c6
-rw-r--r--fs/btrfs/root-tree.c3
-rw-r--r--fs/btrfs/tree-log.c5
-rw-r--r--fs/btrfs/zoned.c2
-rw-r--r--fs/cifs/connect.c11
-rw-r--r--fs/cifs/fscache.c46
-rw-r--r--fs/cifs/inode.c7
-rw-r--r--fs/cifs/sess.c54
-rw-r--r--fs/file.c68
-rw-r--r--fs/gfs2/glock.c10
-rw-r--r--fs/gfs2/inode.c109
-rw-r--r--fs/io-wq.c36
-rw-r--r--fs/io_uring.c6
-rw-r--r--fs/netfs/read_helper.c25
-rw-r--r--fs/nfsd/nfs4recover.c1
-rw-r--r--fs/nfsd/nfs4state.c9
-rw-r--r--fs/nfsd/nfsctl.c14
-rw-r--r--fs/signalfd.c12
-rw-r--r--fs/smbfs_common/cifs_arc4.c13
-rw-r--r--fs/tracefs/inode.c76
-rw-r--r--fs/xfs/xfs_inode.c1
-rw-r--r--fs/xfs/xfs_super.c14
26 files changed, 500 insertions, 243 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 9c81cf611d65..f6f1cbffef9e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -181,8 +181,9 @@ struct poll_iocb {
struct file *file;
struct wait_queue_head *head;
__poll_t events;
- bool done;
bool cancelled;
+ bool work_scheduled;
+ bool work_need_resched;
struct wait_queue_entry wait;
struct work_struct work;
};
@@ -1619,6 +1620,51 @@ static void aio_poll_put_work(struct work_struct *work)
iocb_put(iocb);
}
+/*
+ * Safely lock the waitqueue which the request is on, synchronizing with the
+ * case where the ->poll() provider decides to free its waitqueue early.
+ *
+ * Returns true on success, meaning that req->head->lock was locked, req->wait
+ * is on req->head, and an RCU read lock was taken. Returns false if the
+ * request was already removed from its waitqueue (which might no longer exist).
+ */
+static bool poll_iocb_lock_wq(struct poll_iocb *req)
+{
+ wait_queue_head_t *head;
+
+ /*
+ * While we hold the waitqueue lock and the waitqueue is nonempty,
+ * wake_up_pollfree() will wait for us. However, taking the waitqueue
+ * lock in the first place can race with the waitqueue being freed.
+ *
+ * We solve this as eventpoll does: by taking advantage of the fact that
+ * all users of wake_up_pollfree() will RCU-delay the actual free. If
+ * we enter rcu_read_lock() and see that the pointer to the queue is
+ * non-NULL, we can then lock it without the memory being freed out from
+ * under us, then check whether the request is still on the queue.
+ *
+ * Keep holding rcu_read_lock() as long as we hold the queue lock, in
+ * case the caller deletes the entry from the queue, leaving it empty.
+ * In that case, only RCU prevents the queue memory from being freed.
+ */
+ rcu_read_lock();
+ head = smp_load_acquire(&req->head);
+ if (head) {
+ spin_lock(&head->lock);
+ if (!list_empty(&req->wait.entry))
+ return true;
+ spin_unlock(&head->lock);
+ }
+ rcu_read_unlock();
+ return false;
+}
+
+static void poll_iocb_unlock_wq(struct poll_iocb *req)
+{
+ spin_unlock(&req->head->lock);
+ rcu_read_unlock();
+}
+
static void aio_poll_complete_work(struct work_struct *work)
{
struct poll_iocb *req = container_of(work, struct poll_iocb, work);
@@ -1638,14 +1684,27 @@ static void aio_poll_complete_work(struct work_struct *work)
* avoid further branches in the fast path.
*/
spin_lock_irq(&ctx->ctx_lock);
- if (!mask && !READ_ONCE(req->cancelled)) {
- add_wait_queue(req->head, &req->wait);
- spin_unlock_irq(&ctx->ctx_lock);
- return;
- }
+ if (poll_iocb_lock_wq(req)) {
+ if (!mask && !READ_ONCE(req->cancelled)) {
+ /*
+ * The request isn't actually ready to be completed yet.
+ * Reschedule completion if another wakeup came in.
+ */
+ if (req->work_need_resched) {
+ schedule_work(&req->work);
+ req->work_need_resched = false;
+ } else {
+ req->work_scheduled = false;
+ }
+ poll_iocb_unlock_wq(req);
+ spin_unlock_irq(&ctx->ctx_lock);
+ return;
+ }
+ list_del_init(&req->wait.entry);
+ poll_iocb_unlock_wq(req);
+ } /* else, POLLFREE has freed the waitqueue, so we must complete */
list_del_init(&iocb->ki_list);
iocb->ki_res.res = mangle_poll(mask);
- req->done = true;
spin_unlock_irq(&ctx->ctx_lock);
iocb_put(iocb);
@@ -1657,13 +1716,14 @@ static int aio_poll_cancel(struct kiocb *iocb)
struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
struct poll_iocb *req = &aiocb->poll;
- spin_lock(&req->head->lock);
- WRITE_ONCE(req->cancelled, true);
- if (!list_empty(&req->wait.entry)) {
- list_del_init(&req->wait.entry);
- schedule_work(&aiocb->poll.work);
- }
- spin_unlock(&req->head->lock);
+ if (poll_iocb_lock_wq(req)) {
+ WRITE_ONCE(req->cancelled, true);
+ if (!req->work_scheduled) {
+ schedule_work(&aiocb->poll.work);
+ req->work_scheduled = true;
+ }
+ poll_iocb_unlock_wq(req);
+ } /* else, the request was force-cancelled by POLLFREE already */
return 0;
}
@@ -1680,21 +1740,27 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
if (mask && !(mask & req->events))
return 0;
- list_del_init(&req->wait.entry);
-
- if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
+ /*
+ * Complete the request inline if possible. This requires that three
+ * conditions be met:
+ * 1. An event mask must have been passed. If a plain wakeup was done
+ * instead, then mask == 0 and we have to call vfs_poll() to get
+ * the events, so inline completion isn't possible.
+ * 2. The completion work must not have already been scheduled.
+ * 3. ctx_lock must not be busy. We have to use trylock because we
+ * already hold the waitqueue lock, so this inverts the normal
+ * locking order. Use irqsave/irqrestore because not all
+ * filesystems (e.g. fuse) call this function with IRQs disabled,
+ * yet IRQs have to be disabled before ctx_lock is obtained.
+ */
+ if (mask && !req->work_scheduled &&
+ spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
struct kioctx *ctx = iocb->ki_ctx;
- /*
- * Try to complete the iocb inline if we can. Use
- * irqsave/irqrestore because not all filesystems (e.g. fuse)
- * call this function with IRQs disabled and because IRQs
- * have to be disabled before ctx_lock is obtained.
- */
+ list_del_init(&req->wait.entry);
list_del(&iocb->ki_list);
iocb->ki_res.res = mangle_poll(mask);
- req->done = true;
- if (iocb->ki_eventfd && eventfd_signal_allowed()) {
+ if (iocb->ki_eventfd && !eventfd_signal_allowed()) {
iocb = NULL;
INIT_WORK(&req->work, aio_poll_put_work);
schedule_work(&req->work);
@@ -1703,7 +1769,43 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
if (iocb)
iocb_put(iocb);
} else {
- schedule_work(&req->work);
+ /*
+ * Schedule the completion work if needed. If it was already
+ * scheduled, record that another wakeup came in.
+ *
+ * Don't remove the request from the waitqueue here, as it might
+ * not actually be complete yet (we won't know until vfs_poll()
+ * is called), and we must not miss any wakeups. POLLFREE is an
+ * exception to this; see below.
+ */
+ if (req->work_scheduled) {
+ req->work_need_resched = true;
+ } else {
+ schedule_work(&req->work);
+ req->work_scheduled = true;
+ }
+
+ /*
+ * If the waitqueue is being freed early but we can't complete
+ * the request inline, we have to tear down the request as best
+ * we can. That means immediately removing the request from its
+ * waitqueue and preventing all further accesses to the
+ * waitqueue via the request. We also need to schedule the
+ * completion work (done above). Also mark the request as
+ * cancelled, to potentially skip an unneeded call to ->poll().
+ */
+ if (mask & POLLFREE) {
+ WRITE_ONCE(req->cancelled, true);
+ list_del_init(&req->wait.entry);
+
+ /*
+ * Careful: this *must* be the last step, since as soon
+ * as req->head is NULL'ed out, the request can be
+ * completed and freed, since aio_poll_complete_work()
+ * will no longer need to take the waitqueue lock.
+ */
+ smp_store_release(&req->head, NULL);
+ }
}
return 1;
}
@@ -1711,6 +1813,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
struct aio_poll_table {
struct poll_table_struct pt;
struct aio_kiocb *iocb;
+ bool queued;
int error;
};
@@ -1721,11 +1824,12 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt);
/* multiple wait queues per file are not supported */
- if (unlikely(pt->iocb->poll.head)) {
+ if (unlikely(pt->queued)) {
pt->error = -EINVAL;
return;
}
+ pt->queued = true;
pt->error = 0;
pt->iocb->poll.head = head;
add_wait_queue(head, &pt->iocb->poll.wait);
@@ -1750,12 +1854,14 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
req->head = NULL;
- req->done = false;
req->cancelled = false;
+ req->work_scheduled = false;
+ req->work_need_resched = false;
apt.pt._qproc = aio_poll_queue_proc;
apt.pt._key = req->events;
apt.iocb = aiocb;
+ apt.queued = false;
apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
/* initialized the list so that we can do list_empty checks */
@@ -1764,23 +1870,35 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
mask = vfs_poll(req->file, &apt.pt) & req->events;
spin_lock_irq(&ctx->ctx_lock);
- if (likely(req->head)) {
- spin_lock(&req->head->lock);
- if (unlikely(list_empty(&req->wait.entry))) {
- if (apt.error)
+ if (likely(apt.queued)) {
+ bool on_queue = poll_iocb_lock_wq(req);
+
+ if (!on_queue || req->work_scheduled) {
+ /*
+ * aio_poll_wake() already either scheduled the async
+ * completion work, or completed the request inline.
+ */
+ if (apt.error) /* unsupported case: multiple queues */
cancel = true;
apt.error = 0;
mask = 0;
}
if (mask || apt.error) {
+ /* Steal to complete synchronously. */
list_del_init(&req->wait.entry);
} else if (cancel) {
+ /* Cancel if possible (may be too late though). */
WRITE_ONCE(req->cancelled, true);
- } else if (!req->done) { /* actually waiting for an event */
+ } else if (on_queue) {
+ /*
+ * Actually waiting for an event, so add the request to
+ * active_reqs so that it can be cancelled if needed.
+ */
list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
aiocb->ki_cancel = aio_poll_cancel;
}
- spin_unlock(&req->head->lock);
+ if (on_queue)
+ poll_iocb_unlock_wq(req);
}
if (mask) { /* no async, we'd stolen it */
aiocb->ki_res.res = mangle_poll(mask);
diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
index 2059d1504149..40c4d6ba3fb9 100644
--- a/fs/btrfs/delalloc-space.c
+++ b/fs/btrfs/delalloc-space.c
@@ -143,10 +143,13 @@ int btrfs_check_data_free_space(struct btrfs_inode *inode,
/* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
- if (ret < 0)
+ if (ret < 0) {
btrfs_free_reserved_data_space_noquota(fs_info, len);
- else
+ extent_changeset_free(*reserved);
+ *reserved = NULL;
+ } else {
ret = 0;
+ }
return ret;
}
@@ -452,8 +455,11 @@ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
if (ret < 0)
return ret;
ret = btrfs_delalloc_reserve_metadata(inode, len);
- if (ret < 0)
+ if (ret < 0) {
btrfs_free_reserved_data_space(inode, *reserved, start, len);
+ extent_changeset_free(*reserved);
+ *reserved = NULL;
+ }
return ret;
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3fd736a02c1e..fc4895e6a62c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6051,6 +6051,9 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
int dev_ret = 0;
int ret = 0;
+ if (range->start == U64_MAX)
+ return -EINVAL;
+
/*
* Check range overflow if range->len is set.
* The default range->len is U64_MAX.
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4e03a6d3aa32..3258b6f01e85 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4314,6 +4314,20 @@ static void set_btree_ioerr(struct page *page, struct extent_buffer *eb)
return;
/*
+ * A read may stumble upon this buffer later, make sure that it gets an
+ * error and knows there was an error.
+ */
+ clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+
+ /*
+ * We need to set the mapping with the io error as well because a write
+ * error will flip the file system readonly, and then syncfs() will
+ * return a 0 because we are readonly if we don't modify the err seq for
+ * the superblock.
+ */
+ mapping_set_error(page->mapping, -EIO);
+
+ /*
* If we error out, we should add back the dirty_metadata_bytes
* to make it consistent.
*/
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 92138ac2a4e2..2b84846ed934 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3187,10 +3187,8 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
return -EPERM;
vol_args = memdup_user(arg, sizeof(*vol_args));
- if (IS_ERR(vol_args)) {
- ret = PTR_ERR(vol_args);
- goto out;
- }
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
if (vol_args->flags & ~BTRFS_DEVICE_REMOVE_ARGS_MASK) {
ret = -EOPNOTSUPP;
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 12ceb14a1141..d20166336557 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -334,7 +334,8 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
key.offset = ref_id;
again:
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
- BUG_ON(ret < 0);
+ if (ret < 0)
+ goto out;
if (ret == 0) {
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0],
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 8ab33caf016f..3e6f14e13918 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2908,6 +2908,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
path->nodes[*level]->len);
if (ret)
return ret;
+ btrfs_redirty_list_add(trans->transaction,
+ next);
} else {
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
clear_extent_buffer_dirty(next);
@@ -2988,6 +2990,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
next->start, next->len);
if (ret)
goto out;
+ btrfs_redirty_list_add(trans->transaction, next);
} else {
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
clear_extent_buffer_dirty(next);
@@ -3438,8 +3441,6 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
extent_io_tree_release(&log->log_csum_range);
- if (trans && log->node)
- btrfs_redirty_list_add(trans->transaction, log->node);
btrfs_put_root(log);
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 67d932d70798..678a29469511 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1860,6 +1860,7 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
block_group->alloc_offset = block_group->zone_capacity;
block_group->free_space_ctl->free_space = 0;
btrfs_clear_treelog_bg(block_group);
+ btrfs_clear_data_reloc_bg(block_group);
spin_unlock(&block_group->lock);
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
@@ -1942,6 +1943,7 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 len
ASSERT(block_group->alloc_offset == block_group->zone_capacity);
ASSERT(block_group->free_space_ctl->free_space == 0);
btrfs_clear_treelog_bg(block_group);
+ btrfs_clear_data_reloc_bg(block_group);
spin_unlock(&block_group->lock);
map = block_group->physical_map;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 6b705026da1a..18448dbd762a 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1562,6 +1562,10 @@ smbd_connected:
/* fscache server cookies are based on primary channel only */
if (!CIFS_SERVER_IS_CHAN(tcp_ses))
cifs_fscache_get_client_cookie(tcp_ses);
+#ifdef CONFIG_CIFS_FSCACHE
+ else
+ tcp_ses->fscache = tcp_ses->primary_server->fscache;
+#endif /* CONFIG_CIFS_FSCACHE */
/* queue echo request delayed work */
queue_delayed_work(cifsiod_wq, &tcp_ses->echo, tcp_ses->echo_interval);
@@ -3046,12 +3050,6 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx)
cifs_dbg(VFS, "read only mount of RW share\n");
/* no need to log a RW mount of a typical RW share */
}
- /*
- * The cookie is initialized from volume info returned above.
- * Inside cifs_fscache_get_super_cookie it checks
- * that we do not get super cookie twice.
- */
- cifs_fscache_get_super_cookie(tcon);
}
/*
@@ -3426,6 +3424,7 @@ static int connect_dfs_root(struct mount_ctx *mnt_ctx, struct dfs_cache_tgt_list
*/
mount_put_conns(mnt_ctx);
mount_get_dfs_conns(mnt_ctx);
+ set_root_ses(mnt_ctx);
full_path = build_unc_path_to_root(ctx, cifs_sb, true);
if (IS_ERR(full_path))
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index 7e409a38a2d7..003c5f1f4dfb 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -16,14 +16,7 @@
* Key layout of CIFS server cache index object
*/
struct cifs_server_key {
- struct {
- uint16_t family; /* address family */
- __be16 port; /* IP port */
- } hdr;
- union {
- struct in_addr ipv4_addr;
- struct in6_addr ipv6_addr;
- };
+ __u64 conn_id;
} __packed;
/*
@@ -31,42 +24,23 @@ struct cifs_server_key {
*/
void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server)
{
- const struct sockaddr *sa = (struct sockaddr *) &server->dstaddr;
- const struct sockaddr_in *addr = (struct sockaddr_in *) sa;
- const struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) sa;
struct cifs_server_key key;
- uint16_t key_len = sizeof(key.hdr);
-
- memset(&key, 0, sizeof(key));
/*
- * Should not be a problem as sin_family/sin6_family overlays
- * sa_family field
+ * Check if cookie was already initialized so don't reinitialize it.
+ * In the future, as we integrate with newer fscache features,
+ * we may want to instead add a check if cookie has changed
*/
- key.hdr.family = sa->sa_family;
- switch (sa->sa_family) {
- case AF_INET:
- key.hdr.port = addr->sin_port;
- key.ipv4_addr = addr->sin_addr;
- key_len += sizeof(key.ipv4_addr);
- break;
-
- case AF_INET6:
- key.hdr.port = addr6->sin6_port;
- key.ipv6_addr = addr6->sin6_addr;
- key_len += sizeof(key.ipv6_addr);
- break;
-
- default:
- cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family);
- server->fscache = NULL;
+ if (server->fscache)
return;
- }
+
+ memset(&key, 0, sizeof(key));
+ key.conn_id = server->conn_id;
server->fscache =
fscache_acquire_cookie(cifs_fscache_netfs.primary_index,
&cifs_fscache_server_index_def,
- &key, key_len,
+ &key, sizeof(key),
NULL, 0,
server, 0, true);
cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
@@ -92,7 +66,7 @@ void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
* In the future, as we integrate with newer fscache features,
* we may want to instead add a check if cookie has changed
*/
- if (tcon->fscache == NULL)
+ if (tcon->fscache)
return;
sharename = extract_sharename(tcon->treeName);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 82848412ad85..96d083db1737 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1376,6 +1376,13 @@ iget_no_retry:
inode = ERR_PTR(rc);
}
+ /*
+ * The cookie is initialized from volume info returned above.
+ * Inside cifs_fscache_get_super_cookie it checks
+ * that we do not get super cookie twice.
+ */
+ cifs_fscache_get_super_cookie(tcon);
+
out:
kfree(path);
free_xid(xid);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index af63548eaf26..035dc3e245dc 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -590,8 +590,8 @@ int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
{
unsigned int tioffset; /* challenge message target info area */
unsigned int tilen; /* challenge message target info area length */
-
CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr;
+ __u32 server_flags;
if (blob_len < sizeof(CHALLENGE_MESSAGE)) {
cifs_dbg(VFS, "challenge blob len %d too small\n", blob_len);
@@ -609,12 +609,37 @@ int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
return -EINVAL;
}
+ server_flags = le32_to_cpu(pblob->NegotiateFlags);
+ cifs_dbg(FYI, "%s: negotiate=0x%08x challenge=0x%08x\n", __func__,
+ ses->ntlmssp->client_flags, server_flags);
+
+ if ((ses->ntlmssp->client_flags & (NTLMSSP_NEGOTIATE_SEAL | NTLMSSP_NEGOTIATE_SIGN)) &&
+ (!(server_flags & NTLMSSP_NEGOTIATE_56) && !(server_flags & NTLMSSP_NEGOTIATE_128))) {
+ cifs_dbg(VFS, "%s: requested signing/encryption but server did not return either 56-bit or 128-bit session key size\n",
+ __func__);
+ return -EINVAL;
+ }
+ if (!(server_flags & NTLMSSP_NEGOTIATE_NTLM) && !(server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC)) {
+ cifs_dbg(VFS, "%s: server does not seem to support either NTLMv1 or NTLMv2\n", __func__);
+ return -EINVAL;
+ }
+ if (ses->server->sign && !(server_flags & NTLMSSP_NEGOTIATE_SIGN)) {
+ cifs_dbg(VFS, "%s: forced packet signing but server does not seem to support it\n",
+ __func__);
+ return -EOPNOTSUPP;
+ }
+ if ((ses->ntlmssp->client_flags & NTLMSSP_NEGOTIATE_KEY_XCH) &&
+ !(server_flags & NTLMSSP_NEGOTIATE_KEY_XCH))
+ pr_warn_once("%s: authentication has been weakened as server does not support key exchange\n",
+ __func__);
+
+ ses->ntlmssp->server_flags = server_flags;
+
memcpy(ses->ntlmssp->cryptkey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE);
- /* BB we could decode pblob->NegotiateFlags; some may be useful */
/* In particular we can examine sign flags */
/* BB spec says that if AvId field of MsvAvTimestamp is populated then
we must set the MIC field of the AUTHENTICATE_MESSAGE */
- ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags);
+
tioffset = le32_to_cpu(pblob->TargetInfoArray.BufferOffset);
tilen = le16_to_cpu(pblob->TargetInfoArray.Length);
if (tioffset > blob_len || tioffset + tilen > blob_len) {
@@ -721,13 +746,13 @@ int build_ntlmssp_negotiate_blob(unsigned char **pbuffer,
flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET |
NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC |
- NTLMSSP_NEGOTIATE_SEAL;
- if (server->sign)
- flags |= NTLMSSP_NEGOTIATE_SIGN;
+ NTLMSSP_NEGOTIATE_ALWAYS_SIGN | NTLMSSP_NEGOTIATE_SEAL |
+ NTLMSSP_NEGOTIATE_SIGN;
if (!server->session_estab || ses->ntlmssp->sesskey_per_smbsess)
flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
tmp = *pbuffer + sizeof(NEGOTIATE_MESSAGE);
+ ses->ntlmssp->client_flags = flags;
sec_blob->NegotiateFlags = cpu_to_le32(flags);
/* these fields should be null in negotiate phase MS-NLMP 3.1.5.1.1 */
@@ -779,15 +804,8 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer,
memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
sec_blob->MessageType = NtLmAuthenticate;
- flags = NTLMSSP_NEGOTIATE_56 |
- NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO |
- NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
- NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC |
- NTLMSSP_NEGOTIATE_SEAL | NTLMSSP_NEGOTIATE_WORKSTATION_SUPPLIED;
- if (ses->server->sign)
- flags |= NTLMSSP_NEGOTIATE_SIGN;
- if (!ses->server->session_estab || ses->ntlmssp->sesskey_per_smbsess)
- flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
+ flags = ses->ntlmssp->server_flags | NTLMSSP_REQUEST_TARGET |
+ NTLMSSP_NEGOTIATE_TARGET_INFO | NTLMSSP_NEGOTIATE_WORKSTATION_SUPPLIED;
tmp = *pbuffer + sizeof(AUTHENTICATE_MESSAGE);
sec_blob->NegotiateFlags = cpu_to_le32(flags);
@@ -834,9 +852,9 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer,
*pbuffer, &tmp,
nls_cp);
- if (((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) ||
- (ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
- && !calc_seckey(ses)) {
+ if ((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) &&
+ (!ses->server->session_estab || ses->ntlmssp->sesskey_per_smbsess) &&
+ !calc_seckey(ses)) {
memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
diff --git a/fs/file.c b/fs/file.c
index 8627dacfc424..97d212a9b814 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -841,24 +841,68 @@ void do_close_on_exec(struct files_struct *files)
spin_unlock(&files->file_lock);
}
+static inline struct file *__fget_files_rcu(struct files_struct *files,
+ unsigned int fd, fmode_t mask, unsigned int refs)
+{
+ for (;;) {
+ struct file *file;
+ struct fdtable *fdt = rcu_dereference_raw(files->fdt);
+ struct file __rcu **fdentry;
+
+ if (unlikely(fd >= fdt->max_fds))
+ return NULL;
+
+ fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds);
+ file = rcu_dereference_raw(*fdentry);
+ if (unlikely(!file))
+ return NULL;
+
+ if (unlikely(file->f_mode & mask))
+ return NULL;
+
+ /*
+ * Ok, we have a file pointer. However, because we do
+ * this all locklessly under RCU, we may be racing with
+ * that file being closed.
+ *
+ * Such a race can take two forms:
+ *
+ * (a) the file ref already went down to zero,
+ * and get_file_rcu_many() fails. Just try
+ * again:
+ */
+ if (unlikely(!get_file_rcu_many(file, refs)))
+ continue;
+
+ /*
+ * (b) the file table entry has changed under us.
+ * Note that we don't need to re-check the 'fdt->fd'
+ * pointer having changed, because it always goes
+ * hand-in-hand with 'fdt'.
+ *
+ * If so, we need to put our refs and try again.
+ */
+ if (unlikely(rcu_dereference_raw(files->fdt) != fdt) ||
+ unlikely(rcu_dereference_raw(*fdentry) != file)) {
+ fput_many(file, refs);
+ continue;
+ }
+
+ /*
+ * Ok, we have a ref to the file, and checked that it
+ * still exists.
+ */
+ return file;
+ }
+}
+
static struct file *__fget_files(struct files_struct *files, unsigned int fd,
fmode_t mask, unsigned int refs)
{
struct file *file;
rcu_read_lock();
-loop:
- file = files_lookup_fd_rcu(files, fd);
- if (file) {
- /* File object ref couldn't be taken.
- * dup2() atomicity guarantee is the reason
- * we loop to catch the new file (or NULL pointer)
- */
- if (file->f_mode & mask)
- file = NULL;
- else if (!get_file_rcu_many(file, refs))
- goto loop;
- }
+ file = __fget_files_rcu(files, fd, mask, refs);
rcu_read_unlock();
return file;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 8dbd6fe66420..44a7a4288956 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1857,7 +1857,6 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
{
- struct gfs2_holder mock_gh = { .gh_gl = gl, .gh_state = state, };
unsigned long delay = 0;
unsigned long holdtime;
unsigned long now = jiffies;
@@ -1890,8 +1889,13 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
* keep the glock until the last strong holder is done with it.
*/
if (!find_first_strong_holder(gl)) {
- if (state == LM_ST_UNLOCKED)
- mock_gh.gh_state = LM_ST_EXCLUSIVE;
+ struct gfs2_holder mock_gh = {
+ .gh_gl = gl,
+ .gh_state = (state == LM_ST_UNLOCKED) ?
+ LM_ST_EXCLUSIVE : state,
+ .gh_iflags = BIT(HIF_HOLDER)
+ };
+
demote_incompat_holders(gl, &mock_gh);
}
handle_callback(gl, state, delay, true);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 6424b903e885..89905f4f29bb 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -40,37 +40,6 @@ static const struct inode_operations gfs2_file_iops;
static const struct inode_operations gfs2_dir_iops;
static const struct inode_operations gfs2_symlink_iops;
-static int iget_test(struct inode *inode, void *opaque)
-{
- u64 no_addr = *(u64 *)opaque;
-
- return GFS2_I(inode)->i_no_addr == no_addr;
-}
-
-static int iget_set(struct inode *inode, void *opaque)
-{
- u64 no_addr = *(u64 *)opaque;
-
- GFS2_I(inode)->i_no_addr = no_addr;
- inode->i_ino = no_addr;
- return 0;
-}
-
-static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
-{
- struct inode *inode;
-
-repeat:
- inode = iget5_locked(sb, no_addr, iget_test, iget_set, &no_addr);
- if (!inode)
- return inode;
- if (is_bad_inode(inode)) {
- iput(inode);
- goto repeat;
- }
- return inode;
-}
-
/**
* gfs2_set_iop - Sets inode operations
* @inode: The inode with correct i_mode filled in
@@ -104,6 +73,22 @@ static void gfs2_set_iop(struct inode *inode)
}
}
+static int iget_test(struct inode *inode, void *opaque)
+{
+ u64 no_addr = *(u64 *)opaque;
+
+ return GFS2_I(inode)->i_no_addr == no_addr;
+}
+
+static int iget_set(struct inode *inode, void *opaque)
+{
+ u64 no_addr = *(u64 *)opaque;
+
+ GFS2_I(inode)->i_no_addr = no_addr;
+ inode->i_ino = no_addr;
+ return 0;
+}
+
/**
* gfs2_inode_lookup - Lookup an inode
* @sb: The super block
@@ -132,12 +117,11 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
{
struct inode *inode;
struct gfs2_inode *ip;
- struct gfs2_glock *io_gl = NULL;
struct gfs2_holder i_gh;
int error;
gfs2_holder_mark_uninitialized(&i_gh);
- inode = gfs2_iget(sb, no_addr);
+ inode = iget5_locked(sb, no_addr, iget_test, iget_set, &no_addr);
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -145,22 +129,16 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
if (inode->i_state & I_NEW) {
struct gfs2_sbd *sdp = GFS2_SB(inode);
+ struct gfs2_glock *io_gl;
error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
if (unlikely(error))
goto fail;
- flush_delayed_work(&ip->i_gl->gl_work);
-
- error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
- if (unlikely(error))
- goto fail;
- if (blktype != GFS2_BLKST_UNLINKED)
- gfs2_cancel_delete_work(io_gl);
if (type == DT_UNKNOWN || blktype != GFS2_BLKST_FREE) {
/*
* The GL_SKIP flag indicates to skip reading the inode
- * block. We read the inode with gfs2_inode_refresh
+ * block. We read the inode when instantiating it
* after possibly checking the block type.
*/
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE,
@@ -181,24 +159,31 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
}
}
- glock_set_object(ip->i_gl, ip);
set_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags);
- error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
+
+ error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
if (unlikely(error))
goto fail;
- glock_set_object(ip->i_iopen_gh.gh_gl, ip);
+ if (blktype != GFS2_BLKST_UNLINKED)
+ gfs2_cancel_delete_work(io_gl);
+ error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
gfs2_glock_put(io_gl);
- io_gl = NULL;
+ if (unlikely(error))
+ goto fail;
/* Lowest possible timestamp; will be overwritten in gfs2_dinode_in. */
inode->i_atime.tv_sec = 1LL << (8 * sizeof(inode->i_atime.tv_sec) - 1);
inode->i_atime.tv_nsec = 0;
+ glock_set_object(ip->i_gl, ip);
+
if (type == DT_UNKNOWN) {
/* Inode glock must be locked already */
error = gfs2_instantiate(&i_gh);
- if (error)
+ if (error) {
+ glock_clear_object(ip->i_gl, ip);
goto fail;
+ }
} else {
ip->i_no_formal_ino = no_formal_ino;
inode->i_mode = DT2IF(type);
@@ -206,31 +191,23 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
if (gfs2_holder_initialized(&i_gh))
gfs2_glock_dq_uninit(&i_gh);
+ glock_set_object(ip->i_iopen_gh.gh_gl, ip);
gfs2_set_iop(inode);
+ unlock_new_inode(inode);
}
if (no_formal_ino && ip->i_no_formal_ino &&
no_formal_ino != ip->i_no_formal_ino) {
- error = -ESTALE;
- if (inode->i_state & I_NEW)
- goto fail;
iput(inode);
- return ERR_PTR(error);
+ return ERR_PTR(-ESTALE);
}
- if (inode->i_state & I_NEW)
- unlock_new_inode(inode);
-
return inode;
fail:
- if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
- glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
+ if (gfs2_holder_initialized(&ip->i_iopen_gh))
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
- }
- if (io_gl)
- gfs2_glock_put(io_gl);
if (gfs2_holder_initialized(&i_gh))
gfs2_glock_dq_uninit(&i_gh);
iget_failed(inode);
@@ -730,18 +707,19 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
if (error)
goto fail_free_inode;
- flush_delayed_work(&ip->i_gl->gl_work);
error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
if (error)
goto fail_free_inode;
gfs2_cancel_delete_work(io_gl);
+ error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr);
+ BUG_ON(error);
+
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
if (error)
goto fail_gunlock2;
- glock_set_object(ip->i_gl, ip);
error = gfs2_trans_begin(sdp, blocks, 0);
if (error)
goto fail_gunlock2;
@@ -757,9 +735,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (error)
goto fail_gunlock2;
+ glock_set_object(ip->i_gl, ip);
glock_set_object(io_gl, ip);
gfs2_set_iop(inode);
- insert_inode_hash(inode);
free_vfs_inode = 0; /* After this point, the inode is no longer
considered free. Any failures need to undo
@@ -801,17 +779,17 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
gfs2_glock_dq_uninit(ghs + 1);
gfs2_glock_put(io_gl);
gfs2_qa_put(dip);
+ unlock_new_inode(inode);
return error;
fail_gunlock3:
+ glock_clear_object(ip->i_gl, ip);
glock_clear_object(io_gl, ip);
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
fail_gunlock2:
- glock_clear_object(io_gl, ip);
gfs2_glock_put(io_gl);
fail_free_inode:
if (ip->i_gl) {
- glock_clear_object(ip->i_gl, ip);
if (free_vfs_inode) /* else evict will do the put for us */
gfs2_glock_put(ip->i_gl);
}
@@ -829,7 +807,10 @@ fail_gunlock:
mark_inode_dirty(inode);
set_bit(free_vfs_inode ? GIF_FREE_VFS_INODE : GIF_ALLOC_FAILED,
&GFS2_I(inode)->i_flags);
- iput(inode);
+ if (inode->i_state & I_NEW)
+ iget_failed(inode);
+ else
+ iput(inode);
}
if (gfs2_holder_initialized(ghs + 1))
gfs2_glock_dq_uninit(ghs + 1);
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 88202de519f6..8d2bb818a3bb 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -142,6 +142,7 @@ static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
struct io_wqe_acct *acct,
struct io_cb_cancel_data *match);
static void create_worker_cb(struct callback_head *cb);
+static void io_wq_cancel_tw_create(struct io_wq *wq);
static bool io_worker_get(struct io_worker *worker)
{
@@ -357,12 +358,22 @@ static bool io_queue_worker_create(struct io_worker *worker,
test_and_set_bit_lock(0, &worker->create_state))
goto fail_release;
+ atomic_inc(&wq->worker_refs);
init_task_work(&worker->create_work, func);
worker->create_index = acct->index;
if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) {
- clear_bit_unlock(0, &worker->create_state);
+ /*
+ * EXIT may have been set after checking it above, check after
+ * adding the task_work and remove any creation item if it is
+ * now set. wq exit does that too, but we can have added this
+ * work item after we canceled in io_wq_exit_workers().
+ */
+ if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
+ io_wq_cancel_tw_create(wq);
+ io_worker_ref_put(wq);
return true;
}
+ io_worker_ref_put(wq);
clear_bit_unlock(0, &worker->create_state);
fail_release:
io_worker_release(worker);
@@ -714,6 +725,13 @@ static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
static inline bool io_should_retry_thread(long err)
{
+ /*
+ * Prevent perpetual task_work retry, if the task (or its group) is
+ * exiting.
+ */
+ if (fatal_signal_pending(current))
+ return false;
+
switch (err) {
case -EAGAIN:
case -ERESTARTSYS:
@@ -1191,13 +1209,9 @@ void io_wq_exit_start(struct io_wq *wq)
set_bit(IO_WQ_BIT_EXIT, &wq->state);
}
-static void io_wq_exit_workers(struct io_wq *wq)
+static void io_wq_cancel_tw_create(struct io_wq *wq)
{
struct callback_head *cb;
- int node;
-
- if (!wq->task)
- return;
while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) {
struct io_worker *worker;
@@ -1205,6 +1219,16 @@ static void io_wq_exit_workers(struct io_wq *wq)
worker = container_of(cb, struct io_worker, create_work);
io_worker_cancel_cb(worker);
}
+}
+
+static void io_wq_exit_workers(struct io_wq *wq)
+{
+ int node;
+
+ if (!wq->task)
+ return;
+
+ io_wq_cancel_tw_create(wq);
rcu_read_lock();
for_each_node(node) {
diff --git a/fs/io_uring.c b/fs/io_uring.c
index c4f217613f56..d5ab0e9a3f29 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -9824,7 +9824,7 @@ static __cold void io_uring_drop_tctx_refs(struct task_struct *task)
/*
* Find any io_uring ctx that this task has registered or done IO on, and cancel
- * requests. @sqd should be not-null IIF it's an SQPOLL thread cancellation.
+ * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation.
*/
static __cold void io_uring_cancel_generic(bool cancel_all,
struct io_sq_data *sqd)
@@ -9866,8 +9866,10 @@ static __cold void io_uring_cancel_generic(bool cancel_all,
cancel_all);
}
- prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE);
+ io_run_task_work();
io_uring_drop_tctx_refs(current);
+
/*
* If we've seen completions, retry without waiting. This
* avoids a race where a completion comes in before we did
diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c
index 9320a42dfaf9..75c76cbb27cc 100644
--- a/fs/netfs/read_helper.c
+++ b/fs/netfs/read_helper.c
@@ -354,16 +354,11 @@ static void netfs_rreq_write_to_cache_work(struct work_struct *work)
netfs_rreq_do_write_to_cache(rreq);
}
-static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq,
- bool was_async)
+static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq)
{
- if (was_async) {
- rreq->work.func = netfs_rreq_write_to_cache_work;
- if (!queue_work(system_unbound_wq, &rreq->work))
- BUG();
- } else {
- netfs_rreq_do_write_to_cache(rreq);
- }
+ rreq->work.func = netfs_rreq_write_to_cache_work;
+ if (!queue_work(system_unbound_wq, &rreq->work))
+ BUG();
}
/*
@@ -558,7 +553,7 @@ again:
wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS);
if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags))
- return netfs_rreq_write_to_cache(rreq, was_async);
+ return netfs_rreq_write_to_cache(rreq);
netfs_rreq_completed(rreq, was_async);
}
@@ -960,7 +955,7 @@ int netfs_readpage(struct file *file,
rreq = netfs_alloc_read_request(ops, netfs_priv, file);
if (!rreq) {
if (netfs_priv)
- ops->cleanup(netfs_priv, folio_file_mapping(folio));
+ ops->cleanup(folio_file_mapping(folio), netfs_priv);
folio_unlock(folio);
return -ENOMEM;
}
@@ -1008,8 +1003,8 @@ out:
}
EXPORT_SYMBOL(netfs_readpage);
-/**
- * netfs_skip_folio_read - prep a folio for writing without reading first
+/*
+ * Prepare a folio for writing without reading first
* @folio: The folio being prepared
* @pos: starting position for the write
* @len: length of write
@@ -1191,7 +1186,7 @@ have_folio:
goto error;
have_folio_no_wait:
if (netfs_priv)
- ops->cleanup(netfs_priv, mapping);
+ ops->cleanup(mapping, netfs_priv);
*_folio = folio;
_leave(" = 0");
return 0;
@@ -1202,7 +1197,7 @@ error:
folio_unlock(folio);
folio_put(folio);
if (netfs_priv)
- ops->cleanup(netfs_priv, mapping);
+ ops->cleanup(mapping, netfs_priv);
_leave(" = %d", ret);
return ret;
}
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 6fedc49726bf..c634483d85d2 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -2156,6 +2156,7 @@ static struct notifier_block nfsd4_cld_block = {
int
register_cld_notifier(void)
{
+ WARN_ON(!nfsd_net_id);
return rpc_pipefs_notifier_register(&nfsd4_cld_block);
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bfad94c70b84..1956d377d1a6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1207,6 +1207,11 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
return 0;
}
+static bool delegation_hashed(struct nfs4_delegation *dp)
+{
+ return !(list_empty(&dp->dl_perfile));
+}
+
static bool
unhash_delegation_locked(struct nfs4_delegation *dp)
{
@@ -1214,7 +1219,7 @@ unhash_delegation_locked(struct nfs4_delegation *dp)
lockdep_assert_held(&state_lock);
- if (list_empty(&dp->dl_perfile))
+ if (!delegation_hashed(dp))
return false;
dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
@@ -4598,7 +4603,7 @@ static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
* queued for a lease break. Don't queue it again.
*/
spin_lock(&state_lock);
- if (dp->dl_time == 0) {
+ if (delegation_hashed(dp) && dp->dl_time == 0) {
dp->dl_time = ktime_get_boottime_seconds();
list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index af8531c3854a..51a49e0cfe37 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1521,12 +1521,9 @@ static int __init init_nfsd(void)
int retval;
printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
- retval = register_cld_notifier();
- if (retval)
- return retval;
retval = nfsd4_init_slabs();
if (retval)
- goto out_unregister_notifier;
+ return retval;
retval = nfsd4_init_pnfs();
if (retval)
goto out_free_slabs;
@@ -1545,9 +1542,14 @@ static int __init init_nfsd(void)
goto out_free_exports;
retval = register_pernet_subsys(&nfsd_net_ops);
if (retval < 0)
+ goto out_free_filesystem;
+ retval = register_cld_notifier();
+ if (retval)
goto out_free_all;
return 0;
out_free_all:
+ unregister_pernet_subsys(&nfsd_net_ops);
+out_free_filesystem:
unregister_filesystem(&nfsd_fs_type);
out_free_exports:
remove_proc_entry("fs/nfs/exports", NULL);
@@ -1561,13 +1563,12 @@ out_free_pnfs:
nfsd4_exit_pnfs();
out_free_slabs:
nfsd4_free_slabs();
-out_unregister_notifier:
- unregister_cld_notifier();
return retval;
}
static void __exit exit_nfsd(void)
{
+ unregister_cld_notifier();
unregister_pernet_subsys(&nfsd_net_ops);
nfsd_drc_slab_free();
remove_proc_entry("fs/nfs/exports", NULL);
@@ -1577,7 +1578,6 @@ static void __exit exit_nfsd(void)
nfsd4_free_slabs();
nfsd4_exit_pnfs();
unregister_filesystem(&nfsd_fs_type);
- unregister_cld_notifier();
}
MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 040e1cf90528..65ce0e72e7b9 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -35,17 +35,7 @@
void signalfd_cleanup(struct sighand_struct *sighand)
{
- wait_queue_head_t *wqh = &sighand->signalfd_wqh;
- /*
- * The lockless check can race with remove_wait_queue() in progress,
- * but in this case its caller should run under rcu_read_lock() and
- * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return.
- */
- if (likely(!waitqueue_active(wqh)))
- return;
-
- /* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */
- wake_up_poll(wqh, EPOLLHUP | POLLFREE);
+ wake_up_pollfree(&sighand->signalfd_wqh);
}
struct signalfd_ctx {
diff --git a/fs/smbfs_common/cifs_arc4.c b/fs/smbfs_common/cifs_arc4.c
index 85ba15a60b13..043e4cb839fa 100644
--- a/fs/smbfs_common/cifs_arc4.c
+++ b/fs/smbfs_common/cifs_arc4.c
@@ -72,16 +72,3 @@ void cifs_arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int l
ctx->y = y;
}
EXPORT_SYMBOL_GPL(cifs_arc4_crypt);
-
-static int __init
-init_smbfs_common(void)
-{
- return 0;
-}
-static void __init
-exit_smbfs_common(void)
-{
-}
-
-module_init(init_smbfs_common)
-module_exit(exit_smbfs_common)
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 925a621b432e..3616839c5c4b 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -161,6 +161,77 @@ struct tracefs_fs_info {
struct tracefs_mount_opts mount_opts;
};
+static void change_gid(struct dentry *dentry, kgid_t gid)
+{
+ if (!dentry->d_inode)
+ return;
+ dentry->d_inode->i_gid = gid;
+}
+
+/*
+ * Taken from d_walk, but without he need for handling renames.
+ * Nothing can be renamed while walking the list, as tracefs
+ * does not support renames. This is only called when mounting
+ * or remounting the file system, to set all the files to
+ * the given gid.
+ */
+static void set_gid(struct dentry *parent, kgid_t gid)
+{
+ struct dentry *this_parent;
+ struct list_head *next;
+
+ this_parent = parent;
+ spin_lock(&this_parent->d_lock);
+
+ change_gid(this_parent, gid);
+repeat:
+ next = this_parent->d_subdirs.next;
+resume:
+ while (next != &this_parent->d_subdirs) {
+ struct list_head *tmp = next;
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+ next = tmp->next;
+
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+
+ change_gid(dentry, gid);
+
+ if (!list_empty(&dentry->d_subdirs)) {
+ spin_unlock(&this_parent->d_lock);
+ spin_release(&dentry->d_lock.dep_map, _RET_IP_);
+ this_parent = dentry;
+ spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
+ goto repeat;
+ }
+ spin_unlock(&dentry->d_lock);
+ }
+ /*
+ * All done at this level ... ascend and resume the search.
+ */
+ rcu_read_lock();
+ascend:
+ if (this_parent != parent) {
+ struct dentry *child = this_parent;
+ this_parent = child->d_parent;
+
+ spin_unlock(&child->d_lock);
+ spin_lock(&this_parent->d_lock);
+
+ /* go into the first sibling still alive */
+ do {
+ next = child->d_child.next;
+ if (next == &this_parent->d_subdirs)
+ goto ascend;
+ child = list_entry(next, struct dentry, d_child);
+ } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
+ rcu_read_unlock();
+ goto resume;
+ }
+ rcu_read_unlock();
+ spin_unlock(&this_parent->d_lock);
+ return;
+}
+
static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
{
substring_t args[MAX_OPT_ARGS];
@@ -193,6 +264,7 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
if (!gid_valid(gid))
return -EINVAL;
opts->gid = gid;
+ set_gid(tracefs_mount->mnt_root, gid);
break;
case Opt_mode:
if (match_octal(&args[0], &option))
@@ -414,6 +486,8 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
inode->i_mode = mode;
inode->i_fop = fops ? fops : &tracefs_file_operations;
inode->i_private = data;
+ inode->i_uid = d_inode(dentry->d_parent)->i_uid;
+ inode->i_gid = d_inode(dentry->d_parent)->i_gid;
d_instantiate(dentry, inode);
fsnotify_create(dentry->d_parent->d_inode, dentry);
return end_creating(dentry);
@@ -436,6 +510,8 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent,
inode->i_mode = S_IFDIR | S_IRWXU | S_IRUSR| S_IRGRP | S_IXUSR | S_IXGRP;
inode->i_op = ops;
inode->i_fop = &simple_dir_operations;
+ inode->i_uid = d_inode(dentry->d_parent)->i_uid;
+ inode->i_gid = d_inode(dentry->d_parent)->i_gid;
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 64b9bf334806..6771f357ad2c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3122,7 +3122,6 @@ xfs_rename(
* appropriately.
*/
if (flags & RENAME_WHITEOUT) {
- ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE)));
error = xfs_rename_alloc_whiteout(mnt_userns, target_dp, &wip);
if (error)
return error;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index e21459f9923a..778b57b1f020 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1765,7 +1765,10 @@ static int
xfs_remount_ro(
struct xfs_mount *mp)
{
- int error;
+ struct xfs_icwalk icw = {
+ .icw_flags = XFS_ICWALK_FLAG_SYNC,
+ };
+ int error;
/*
* Cancel background eofb scanning so it cannot race with the final
@@ -1773,8 +1776,13 @@ xfs_remount_ro(
*/
xfs_blockgc_stop(mp);
- /* Get rid of any leftover CoW reservations... */
- error = xfs_blockgc_free_space(mp, NULL);
+ /*
+ * Clear out all remaining COW staging extents and speculative post-EOF
+ * preallocations so that we don't leave inodes requiring inactivation
+ * cleanups during reclaim on a read-only mount. We must process every
+ * cached inode, so this requires a synchronous cache scan.
+ */
+ error = xfs_blockgc_free_space(mp, &icw);
if (error) {
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
return error;