summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/addr_list.c2
-rw-r--r--fs/afs/cmservice.c14
-rw-r--r--fs/afs/internal.h14
-rw-r--r--fs/afs/rxrpc.c74
-rw-r--r--fs/btrfs/block-group.c4
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/cifs/file.c3
-rw-r--r--fs/cifs/inode.c2
-rw-r--r--fs/cifs/smb2ops.c4
-rw-r--r--fs/eventpoll.c8
-rw-r--r--fs/file.c7
-rw-r--r--fs/fuse/dev.c6
-rw-r--r--fs/fuse/fuse_i.h2
-rw-r--r--fs/inode.c1
-rw-r--r--fs/io_uring.c47
-rw-r--r--fs/locks.c54
-rw-r--r--fs/nfs/client.c1
-rw-r--r--fs/nfs/fs_context.c9
-rw-r--r--fs/nfs/fscache.c2
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfs4client.c1
-rw-r--r--fs/overlayfs/Kconfig1
-rw-r--r--fs/overlayfs/file.c6
-rw-r--r--fs/overlayfs/overlayfs.h7
-rw-r--r--fs/overlayfs/super.c9
-rw-r--r--fs/overlayfs/util.c4
-rw-r--r--fs/zonefs/super.c28
27 files changed, 196 insertions, 120 deletions
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index df415c05939e..de1ae0bead3b 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -19,7 +19,7 @@
void afs_put_addrlist(struct afs_addr_list *alist)
{
if (alist && refcount_dec_and_test(&alist->usage))
- call_rcu(&alist->rcu, (rcu_callback_t)kfree);
+ kfree_rcu(alist, rcu);
}
/*
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index ff3994a6be23..6765949b3aab 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -244,6 +244,17 @@ static void afs_cm_destructor(struct afs_call *call)
}
/*
+ * Abort a service call from within an action function.
+ */
+static void afs_abort_service_call(struct afs_call *call, u32 abort_code, int error,
+ const char *why)
+{
+ rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+ abort_code, error, why);
+ afs_set_call_complete(call, error, 0);
+}
+
+/*
* The server supplied a list of callbacks that it wanted to break.
*/
static void SRXAFSCB_CallBack(struct work_struct *work)
@@ -510,8 +521,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0)
afs_send_empty_reply(call);
else
- rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
- 1, 1, "K-1");
+ afs_abort_service_call(call, 1, 1, "K-1");
afs_put_call(call);
_leave("");
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 1d81fc4c3058..ef732dd4e7ef 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -81,7 +81,7 @@ enum afs_call_state {
* List of server addresses.
*/
struct afs_addr_list {
- struct rcu_head rcu; /* Must be first */
+ struct rcu_head rcu;
refcount_t usage;
u32 version; /* Version */
unsigned char max_addrs;
@@ -154,7 +154,7 @@ struct afs_call {
};
unsigned char unmarshall; /* unmarshalling phase */
unsigned char addr_ix; /* Address in ->alist */
- bool incoming; /* T if incoming call */
+ bool drop_ref; /* T if need to drop ref for incoming call */
bool send_pages; /* T if data from mapping should be sent */
bool need_attention; /* T if RxRPC poked us */
bool async; /* T if asynchronous */
@@ -1209,8 +1209,16 @@ static inline void afs_set_call_complete(struct afs_call *call,
ok = true;
}
spin_unlock_bh(&call->state_lock);
- if (ok)
+ if (ok) {
trace_afs_call_done(call);
+
+ /* Asynchronous calls have two refs to release - one from the alloc and
+ * one queued with the work item - and we can't just deallocate the
+ * call because the work item may be queued again.
+ */
+ if (call->drop_ref)
+ afs_put_call(call);
+ }
}
/*
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 58d396592250..1ecc67da6c1a 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -18,7 +18,6 @@ struct workqueue_struct *afs_async_calls;
static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
-static void afs_delete_async_call(struct work_struct *);
static void afs_process_async_call(struct work_struct *);
static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long);
@@ -169,7 +168,7 @@ void afs_put_call(struct afs_call *call)
int n = atomic_dec_return(&call->usage);
int o = atomic_read(&net->nr_outstanding_calls);
- trace_afs_call(call, afs_call_trace_put, n + 1, o,
+ trace_afs_call(call, afs_call_trace_put, n, o,
__builtin_return_address(0));
ASSERTCMP(n, >=, 0);
@@ -402,8 +401,10 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
/* If the call is going to be asynchronous, we need an extra ref for
* the call to hold itself so the caller need not hang on to its ref.
*/
- if (call->async)
+ if (call->async) {
afs_get_call(call, afs_call_trace_get);
+ call->drop_ref = true;
+ }
/* create a call */
rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key,
@@ -413,7 +414,8 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
afs_wake_up_async_call :
afs_wake_up_call_waiter),
call->upgrade,
- call->intr,
+ (call->intr ? RXRPC_PREINTERRUPTIBLE :
+ RXRPC_UNINTERRUPTIBLE),
call->debug_id);
if (IS_ERR(rxcall)) {
ret = PTR_ERR(rxcall);
@@ -584,8 +586,6 @@ static void afs_deliver_to_call(struct afs_call *call)
done:
if (call->type->done)
call->type->done(call);
- if (state == AFS_CALL_COMPLETE && call->incoming)
- afs_put_call(call);
out:
_leave("");
return;
@@ -604,11 +604,7 @@ call_complete:
long afs_wait_for_call_to_complete(struct afs_call *call,
struct afs_addr_cursor *ac)
{
- signed long rtt2, timeout;
long ret;
- bool stalled = false;
- u64 rtt;
- u32 life, last_life;
bool rxrpc_complete = false;
DECLARE_WAITQUEUE(myself, current);
@@ -619,14 +615,6 @@ long afs_wait_for_call_to_complete(struct afs_call *call,
if (ret < 0)
goto out;
- rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
- rtt2 = nsecs_to_jiffies64(rtt) * 2;
- if (rtt2 < 2)
- rtt2 = 2;
-
- timeout = rtt2;
- rxrpc_kernel_check_life(call->net->socket, call->rxcall, &last_life);
-
add_wait_queue(&call->waitq, &myself);
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
@@ -637,37 +625,19 @@ long afs_wait_for_call_to_complete(struct afs_call *call,
call->need_attention = false;
__set_current_state(TASK_RUNNING);
afs_deliver_to_call(call);
- timeout = rtt2;
continue;
}
if (afs_check_call_state(call, AFS_CALL_COMPLETE))
break;
- if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall, &life)) {
+ if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall)) {
/* rxrpc terminated the call. */
rxrpc_complete = true;
break;
}
- if (call->intr && timeout == 0 &&
- life == last_life && signal_pending(current)) {
- if (stalled)
- break;
- __set_current_state(TASK_RUNNING);
- rxrpc_kernel_probe_life(call->net->socket, call->rxcall);
- timeout = rtt2;
- stalled = true;
- continue;
- }
-
- if (life != last_life) {
- timeout = rtt2;
- last_life = life;
- stalled = false;
- }
-
- timeout = schedule_timeout(timeout);
+ schedule();
}
remove_wait_queue(&call->waitq, &myself);
@@ -735,7 +705,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
u = atomic_fetch_add_unless(&call->usage, 1, 0);
if (u != 0) {
- trace_afs_call(call, afs_call_trace_wake, u,
+ trace_afs_call(call, afs_call_trace_wake, u + 1,
atomic_read(&call->net->nr_outstanding_calls),
__builtin_return_address(0));
@@ -745,21 +715,6 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
}
/*
- * Delete an asynchronous call. The work item carries a ref to the call struct
- * that we need to release.
- */
-static void afs_delete_async_call(struct work_struct *work)
-{
- struct afs_call *call = container_of(work, struct afs_call, async_work);
-
- _enter("");
-
- afs_put_call(call);
-
- _leave("");
-}
-
-/*
* Perform I/O processing on an asynchronous call. The work item carries a ref
* to the call struct that we either need to release or to pass on.
*/
@@ -774,16 +729,6 @@ static void afs_process_async_call(struct work_struct *work)
afs_deliver_to_call(call);
}
- if (call->state == AFS_CALL_COMPLETE) {
- /* We have two refs to release - one from the alloc and one
- * queued with the work item - and we can't just deallocate the
- * call because the work item may be queued again.
- */
- call->async_work.func = afs_delete_async_call;
- if (!queue_work(afs_async_calls, &call->async_work))
- afs_put_call(call);
- }
-
afs_put_call(call);
_leave("");
}
@@ -810,6 +755,7 @@ void afs_charge_preallocation(struct work_struct *work)
if (!call)
break;
+ call->drop_ref = true;
call->async = true;
call->state = AFS_CALL_SV_AWAIT_OP_ID;
init_waitqueue_head(&call->waitq);
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 404e050ce8ee..7f09147872dc 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -856,9 +856,9 @@ static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
found_raid1c34 = true;
up_read(&sinfo->groups_sem);
}
- if (found_raid56)
+ if (!found_raid56)
btrfs_clear_fs_incompat(fs_info, RAID56);
- if (found_raid1c34)
+ if (!found_raid1c34)
btrfs_clear_fs_incompat(fs_info, RAID1C34);
}
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 27076ebadb36..d267eb5caa7b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9496,6 +9496,10 @@ out_fail:
ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx);
if (ret)
commit_transaction = true;
+ } else if (sync_log) {
+ mutex_lock(&root->log_mutex);
+ list_del(&ctx.list);
+ mutex_unlock(&root->log_mutex);
}
if (commit_transaction) {
ret = btrfs_commit_transaction(trans);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 3b942ecdd4be..8f9d849a0012 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1169,7 +1169,8 @@ try_again:
rc = posix_lock_file(file, flock, NULL);
up_write(&cinode->lock_sem);
if (rc == FILE_LOCK_DEFERRED) {
- rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
+ rc = wait_event_interruptible(flock->fl_wait,
+ list_empty(&flock->fl_blocked_member));
if (!rc)
goto try_again;
locks_delete_block(flock);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 1e8a4b1579db..b16f8d23e97b 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2191,7 +2191,7 @@ int cifs_getattr(const struct path *path, struct kstat *stat,
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID))
stat->gid = current_fsgid();
}
- return rc;
+ return 0;
}
int cifs_fiemap(struct inode *inode, struct fiemap_extent_info *fei, u64 start,
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index c31e84ee3c39..cfe9b800ea8c 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2222,6 +2222,8 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
goto qdf_free;
}
+ atomic_inc(&tcon->num_remote_opens);
+
qd_rsp = (struct smb2_query_directory_rsp *)rsp_iov[1].iov_base;
if (qd_rsp->sync_hdr.Status == STATUS_NO_MORE_FILES) {
trace_smb3_query_dir_done(xid, fid->persistent_fid,
@@ -3417,7 +3419,7 @@ static int smb3_fiemap(struct cifs_tcon *tcon,
if (rc)
goto out;
- if (out_data_len < sizeof(struct file_allocated_range_buffer)) {
+ if (out_data_len && out_data_len < sizeof(struct file_allocated_range_buffer)) {
rc = -EINVAL;
goto out;
}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index b041b66002db..eee3c92a9ebf 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1854,9 +1854,9 @@ fetch_events:
waiter = true;
init_waitqueue_entry(&wait, current);
- spin_lock_irq(&ep->wq.lock);
+ write_lock_irq(&ep->lock);
__add_wait_queue_exclusive(&ep->wq, &wait);
- spin_unlock_irq(&ep->wq.lock);
+ write_unlock_irq(&ep->lock);
}
for (;;) {
@@ -1904,9 +1904,9 @@ send_events:
goto fetch_events;
if (waiter) {
- spin_lock_irq(&ep->wq.lock);
+ write_lock_irq(&ep->lock);
__remove_wait_queue(&ep->wq, &wait);
- spin_unlock_irq(&ep->wq.lock);
+ write_unlock_irq(&ep->lock);
}
return res;
diff --git a/fs/file.c b/fs/file.c
index a364e1a9b7e8..c8a4e4c86e55 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -540,9 +540,14 @@ static int alloc_fd(unsigned start, unsigned flags)
return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
}
+int __get_unused_fd_flags(unsigned flags, unsigned long nofile)
+{
+ return __alloc_fd(current->files, 0, nofile, flags);
+}
+
int get_unused_fd_flags(unsigned flags)
{
- return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
+ return __get_unused_fd_flags(flags, rlimit(RLIMIT_NOFILE));
}
EXPORT_SYMBOL(get_unused_fd_flags);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8e02d76fe104..97eec7522bf2 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -276,12 +276,10 @@ static void flush_bg_queue(struct fuse_conn *fc)
void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req)
{
struct fuse_iqueue *fiq = &fc->iq;
- bool async;
if (test_and_set_bit(FR_FINISHED, &req->flags))
goto put_request;
- async = req->args->end;
/*
* test_and_set_bit() implies smp_mb() between bit
* changing and below intr_entry check. Pairs with
@@ -324,7 +322,7 @@ void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req)
wake_up(&req->waitq);
}
- if (async)
+ if (test_bit(FR_ASYNC, &req->flags))
req->args->end(fc, req->args, req->out.h.error);
put_request:
fuse_put_request(fc, req);
@@ -471,6 +469,8 @@ static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args)
req->in.h.opcode = args->opcode;
req->in.h.nodeid = args->nodeid;
req->args = args;
+ if (args->end)
+ __set_bit(FR_ASYNC, &req->flags);
}
ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index aa75e2305b75..ca344bf71404 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -301,6 +301,7 @@ struct fuse_io_priv {
* FR_SENT: request is in userspace, waiting for an answer
* FR_FINISHED: request is finished
* FR_PRIVATE: request is on private list
+ * FR_ASYNC: request is asynchronous
*/
enum fuse_req_flag {
FR_ISREPLY,
@@ -314,6 +315,7 @@ enum fuse_req_flag {
FR_SENT,
FR_FINISHED,
FR_PRIVATE,
+ FR_ASYNC,
};
/**
diff --git a/fs/inode.c b/fs/inode.c
index 7d57068b6b7a..93d9252a00ab 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -138,6 +138,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_sb = sb;
inode->i_blkbits = sb->s_blocksize_bits;
inode->i_flags = 0;
+ atomic64_set(&inode->i_sequence, 0);
atomic_set(&inode->i_count, 1);
inode->i_op = &empty_iops;
inode->i_fop = &no_open_fops;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index c06082bb039a..3affd96a98ba 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -191,7 +191,6 @@ struct fixed_file_data {
struct llist_head put_llist;
struct work_struct ref_work;
struct completion done;
- struct rcu_head rcu;
};
struct io_ring_ctx {
@@ -344,6 +343,7 @@ struct io_accept {
struct sockaddr __user *addr;
int __user *addr_len;
int flags;
+ unsigned long nofile;
};
struct io_sync {
@@ -398,6 +398,7 @@ struct io_open {
struct filename *filename;
struct statx __user *buffer;
struct open_how how;
+ unsigned long nofile;
};
struct io_files_update {
@@ -2578,6 +2579,7 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return ret;
}
+ req->open.nofile = rlimit(RLIMIT_NOFILE);
req->flags |= REQ_F_NEED_CLEANUP;
return 0;
}
@@ -2619,6 +2621,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return ret;
}
+ req->open.nofile = rlimit(RLIMIT_NOFILE);
req->flags |= REQ_F_NEED_CLEANUP;
return 0;
}
@@ -2637,7 +2640,7 @@ static int io_openat2(struct io_kiocb *req, struct io_kiocb **nxt,
if (ret)
goto err;
- ret = get_unused_fd_flags(req->open.how.flags);
+ ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile);
if (ret < 0)
goto err;
@@ -3322,6 +3325,7 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
accept->flags = READ_ONCE(sqe->accept_flags);
+ accept->nofile = rlimit(RLIMIT_NOFILE);
return 0;
#else
return -EOPNOTSUPP;
@@ -3338,7 +3342,8 @@ static int __io_accept(struct io_kiocb *req, struct io_kiocb **nxt,
file_flags = force_nonblock ? O_NONBLOCK : 0;
ret = __sys_accept4_file(req->file, file_flags, accept->addr,
- accept->addr_len, accept->flags);
+ accept->addr_len, accept->flags,
+ accept->nofile);
if (ret == -EAGAIN && force_nonblock)
return -EAGAIN;
if (ret == -ERESTARTSYS)
@@ -4132,6 +4137,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
{
ssize_t ret = 0;
+ if (!sqe)
+ return 0;
+
if (io_op_defs[req->opcode].file_table) {
ret = io_grab_files(req);
if (unlikely(ret))
@@ -4908,6 +4916,11 @@ err_req:
if (sqe_flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) {
req->flags |= REQ_F_LINK;
INIT_LIST_HEAD(&req->link_list);
+
+ if (io_alloc_async_ctx(req)) {
+ ret = -EAGAIN;
+ goto err_req;
+ }
ret = io_req_defer_prep(req, sqe);
if (ret)
req->flags |= REQ_F_FAIL_LINK;
@@ -5331,24 +5344,21 @@ static void io_file_ref_kill(struct percpu_ref *ref)
complete(&data->done);
}
-static void __io_file_ref_exit_and_free(struct rcu_head *rcu)
+static void io_file_ref_exit_and_free(struct work_struct *work)
{
- struct fixed_file_data *data = container_of(rcu, struct fixed_file_data,
- rcu);
- percpu_ref_exit(&data->refs);
- kfree(data);
-}
+ struct fixed_file_data *data;
+
+ data = container_of(work, struct fixed_file_data, ref_work);
-static void io_file_ref_exit_and_free(struct rcu_head *rcu)
-{
/*
- * We need to order our exit+free call against the potentially
- * existing call_rcu() for switching to atomic. One way to do that
- * is to have this rcu callback queue the final put and free, as we
- * could otherwise have a pre-existing atomic switch complete _after_
- * the free callback we queued.
+ * Ensure any percpu-ref atomic switch callback has run, it could have
+ * been in progress when the files were being unregistered. Once
+ * that's done, we can safely exit and free the ref and containing
+ * data structure.
*/
- call_rcu(rcu, __io_file_ref_exit_and_free);
+ rcu_barrier();
+ percpu_ref_exit(&data->refs);
+ kfree(data);
}
static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
@@ -5369,7 +5379,8 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
for (i = 0; i < nr_tables; i++)
kfree(data->table[i].files);
kfree(data->table);
- call_rcu(&data->rcu, io_file_ref_exit_and_free);
+ INIT_WORK(&data->ref_work, io_file_ref_exit_and_free);
+ queue_work(system_wq, &data->ref_work);
ctx->file_data = NULL;
ctx->nr_user_files = 0;
return 0;
diff --git a/fs/locks.c b/fs/locks.c
index 426b55d333d5..b8a31c1c4fff 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -725,7 +725,6 @@ static void __locks_delete_block(struct file_lock *waiter)
{
locks_delete_global_blocked(waiter);
list_del_init(&waiter->fl_blocked_member);
- waiter->fl_blocker = NULL;
}
static void __locks_wake_up_blocks(struct file_lock *blocker)
@@ -740,6 +739,13 @@ static void __locks_wake_up_blocks(struct file_lock *blocker)
waiter->fl_lmops->lm_notify(waiter);
else
wake_up(&waiter->fl_wait);
+
+ /*
+ * The setting of fl_blocker to NULL marks the "done"
+ * point in deleting a block. Paired with acquire at the top
+ * of locks_delete_block().
+ */
+ smp_store_release(&waiter->fl_blocker, NULL);
}
}
@@ -753,11 +759,42 @@ int locks_delete_block(struct file_lock *waiter)
{
int status = -ENOENT;
+ /*
+ * If fl_blocker is NULL, it won't be set again as this thread "owns"
+ * the lock and is the only one that might try to claim the lock.
+ *
+ * We use acquire/release to manage fl_blocker so that we can
+ * optimize away taking the blocked_lock_lock in many cases.
+ *
+ * The smp_load_acquire guarantees two things:
+ *
+ * 1/ that fl_blocked_requests can be tested locklessly. If something
+ * was recently added to that list it must have been in a locked region
+ * *before* the locked region when fl_blocker was set to NULL.
+ *
+ * 2/ that no other thread is accessing 'waiter', so it is safe to free
+ * it. __locks_wake_up_blocks is careful not to touch waiter after
+ * fl_blocker is released.
+ *
+ * If a lockless check of fl_blocker shows it to be NULL, we know that
+ * no new locks can be inserted into its fl_blocked_requests list, and
+ * can avoid doing anything further if the list is empty.
+ */
+ if (!smp_load_acquire(&waiter->fl_blocker) &&
+ list_empty(&waiter->fl_blocked_requests))
+ return status;
+
spin_lock(&blocked_lock_lock);
if (waiter->fl_blocker)
status = 0;
__locks_wake_up_blocks(waiter);
__locks_delete_block(waiter);
+
+ /*
+ * The setting of fl_blocker to NULL marks the "done" point in deleting
+ * a block. Paired with acquire at the top of this function.
+ */
+ smp_store_release(&waiter->fl_blocker, NULL);
spin_unlock(&blocked_lock_lock);
return status;
}
@@ -1350,7 +1387,8 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
error = posix_lock_inode(inode, fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
- error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker);
+ error = wait_event_interruptible(fl->fl_wait,
+ list_empty(&fl->fl_blocked_member));
if (error)
break;
}
@@ -1435,7 +1473,8 @@ int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start,
error = posix_lock_inode(inode, &fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
- error = wait_event_interruptible(fl.fl_wait, !fl.fl_blocker);
+ error = wait_event_interruptible(fl.fl_wait,
+ list_empty(&fl.fl_blocked_member));
if (!error) {
/*
* If we've been sleeping someone might have
@@ -1638,7 +1677,8 @@ restart:
locks_dispose_list(&dispose);
error = wait_event_interruptible_timeout(new_fl->fl_wait,
- !new_fl->fl_blocker, break_time);
+ list_empty(&new_fl->fl_blocked_member),
+ break_time);
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
@@ -2122,7 +2162,8 @@ static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
error = flock_lock_inode(inode, fl);
if (error != FILE_LOCK_DEFERRED)
break;
- error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker);
+ error = wait_event_interruptible(fl->fl_wait,
+ list_empty(&fl->fl_blocked_member));
if (error)
break;
}
@@ -2399,7 +2440,8 @@ static int do_lock_file_wait(struct file *filp, unsigned int cmd,
error = vfs_lock_file(filp, cmd, fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
- error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker);
+ error = wait_event_interruptible(fl->fl_wait,
+ list_empty(&fl->fl_blocked_member));
if (error)
break;
}
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 989c30c98511..f1ff3076e4a4 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -153,6 +153,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
goto error_0;
+ clp->cl_minorversion = cl_init->minorversion;
clp->cl_nfs_mod = cl_init->nfs_mod;
if (!try_module_get(clp->cl_nfs_mod->owner))
goto error_dealloc;
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index e1b938457ab9..e113fcb4bb4c 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -832,6 +832,8 @@ static int nfs_parse_source(struct fs_context *fc,
if (len > maxnamlen)
goto out_hostname;
+ kfree(ctx->nfs_server.hostname);
+
/* N.B. caller will free nfs_server.hostname in all cases */
ctx->nfs_server.hostname = kmemdup_nul(dev_name, len, GFP_KERNEL);
if (!ctx->nfs_server.hostname)
@@ -1240,6 +1242,13 @@ static int nfs_fs_context_validate(struct fs_context *fc)
}
ctx->nfs_mod = nfs_mod;
}
+
+ /* Ensure the filesystem context has the correct fs_type */
+ if (fc->fs_type != ctx->nfs_mod->nfs_fs) {
+ module_put(fc->fs_type->owner);
+ __module_get(ctx->nfs_mod->nfs_fs->owner);
+ fc->fs_type = ctx->nfs_mod->nfs_fs;
+ }
return 0;
out_no_device_name:
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 52270bfac120..1abf126c2df4 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -31,6 +31,7 @@ static DEFINE_SPINLOCK(nfs_fscache_keys_lock);
struct nfs_server_key {
struct {
uint16_t nfsversion; /* NFS protocol version */
+ uint32_t minorversion; /* NFSv4 minor version */
uint16_t family; /* address family */
__be16 port; /* IP port */
} hdr;
@@ -55,6 +56,7 @@ void nfs_fscache_get_client_cookie(struct nfs_client *clp)
memset(&key, 0, sizeof(key));
key.hdr.nfsversion = clp->rpc_ops->version;
+ key.hdr.minorversion = clp->cl_minorversion;
key.hdr.family = clp->cl_addr.ss_family;
switch (clp->cl_addr.ss_family) {
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index ad6077404947..f3ece8ed3203 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -153,7 +153,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
/* Open a new filesystem context, transferring parameters from the
* parent superblock, including the network namespace.
*/
- fc = fs_context_for_submount(&nfs_fs_type, path->dentry);
+ fc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
if (IS_ERR(fc))
return ERR_CAST(fc);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 0cd767e5c977..0bd77cc1f639 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -216,7 +216,6 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
INIT_LIST_HEAD(&clp->cl_ds_clients);
rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
- clp->cl_minorversion = cl_init->minorversion;
clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
clp->cl_mig_gen = 1;
#if IS_ENABLED(CONFIG_NFS_V4_1)
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 444e2da4f60e..714c14c47ca5 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -93,6 +93,7 @@ config OVERLAY_FS_XINO_AUTO
bool "Overlayfs: auto enable inode number mapping"
default n
depends on OVERLAY_FS
+ depends on 64BIT
help
If this config option is enabled then overlay filesystems will use
unused high bits in undelying filesystem inode numbers to map all
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index a5317216de73..87c362f65448 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -244,6 +244,9 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
if (iocb->ki_flags & IOCB_WRITE) {
struct inode *inode = file_inode(orig_iocb->ki_filp);
+ /* Actually acquired in ovl_write_iter() */
+ __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
+ SB_FREEZE_WRITE);
file_end_write(iocb->ki_filp);
ovl_copyattr(ovl_inode_real(inode), inode);
}
@@ -346,6 +349,9 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
goto out;
file_start_write(real.file);
+ /* Pacify lockdep, same trick as done in aio_write() */
+ __sb_writers_release(file_inode(real.file)->i_sb,
+ SB_FREEZE_WRITE);
aio_req->fd = real;
real.flags = 0;
aio_req->orig_iocb = iocb;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 3623d28aa4fa..3d3f2b8bdae5 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -318,7 +318,12 @@ static inline unsigned int ovl_xino_bits(struct super_block *sb)
return ovl_same_dev(sb) ? OVL_FS(sb)->xino_mode : 0;
}
-static inline int ovl_inode_lock(struct inode *inode)
+static inline void ovl_inode_lock(struct inode *inode)
+{
+ mutex_lock(&OVL_I(inode)->lock);
+}
+
+static inline int ovl_inode_lock_interruptible(struct inode *inode)
{
return mutex_lock_interruptible(&OVL_I(inode)->lock);
}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 319fe0d355b0..ac967f1cb6e5 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1411,6 +1411,8 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
if (ofs->config.xino == OVL_XINO_ON)
pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n");
ofs->xino_mode = 0;
+ } else if (ofs->config.xino == OVL_XINO_OFF) {
+ ofs->xino_mode = -1;
} else if (ofs->config.xino == OVL_XINO_ON && ofs->xino_mode < 0) {
/*
* This is a roundup of number of bits needed for encoding
@@ -1623,8 +1625,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
sb->s_stack_depth = 0;
sb->s_maxbytes = MAX_LFS_FILESIZE;
/* Assume underlaying fs uses 32bit inodes unless proven otherwise */
- if (ofs->config.xino != OVL_XINO_OFF)
+ if (ofs->config.xino != OVL_XINO_OFF) {
ofs->xino_mode = BITS_PER_LONG - 32;
+ if (!ofs->xino_mode) {
+ pr_warn("xino not supported on 32bit kernel, falling back to xino=off.\n");
+ ofs->config.xino = OVL_XINO_OFF;
+ }
+ }
/* alloc/destroy_inode needed for setting up traps in inode cache */
sb->s_op = &ovl_super_operations;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index ea005085803f..042f7eb4f7f4 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -509,7 +509,7 @@ int ovl_copy_up_start(struct dentry *dentry, int flags)
struct inode *inode = d_inode(dentry);
int err;
- err = ovl_inode_lock(inode);
+ err = ovl_inode_lock_interruptible(inode);
if (!err && ovl_already_copied_up_locked(dentry, flags)) {
err = 1; /* Already copied up */
ovl_inode_unlock(inode);
@@ -764,7 +764,7 @@ int ovl_nlink_start(struct dentry *dentry)
return err;
}
- err = ovl_inode_lock(inode);
+ err = ovl_inode_lock_interruptible(inode);
if (err)
return err;
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 69aee3dfb660..3ce9829a6936 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -178,7 +178,8 @@ static void zonefs_update_stats(struct inode *inode, loff_t new_isize)
* amount of readable data in the zone.
*/
static loff_t zonefs_check_zone_condition(struct inode *inode,
- struct blk_zone *zone, bool warn)
+ struct blk_zone *zone, bool warn,
+ bool mount)
{
struct zonefs_inode_info *zi = ZONEFS_I(inode);
@@ -196,13 +197,26 @@ static loff_t zonefs_check_zone_condition(struct inode *inode,
zone->wp = zone->start;
return 0;
case BLK_ZONE_COND_READONLY:
- /* Do not allow writes in read-only zones */
+ /*
+ * The write pointer of read-only zones is invalid. If such a
+ * zone is found during mount, the file size cannot be retrieved
+ * so we treat the zone as offline (mount == true case).
+ * Otherwise, keep the file size as it was when last updated
+ * so that the user can recover data. In both cases, writes are
+ * always disabled for the zone.
+ */
if (warn)
zonefs_warn(inode->i_sb, "inode %lu: read-only zone\n",
inode->i_ino);
inode->i_flags |= S_IMMUTABLE;
+ if (mount) {
+ zone->cond = BLK_ZONE_COND_OFFLINE;
+ inode->i_mode &= ~0777;
+ zone->wp = zone->start;
+ return 0;
+ }
inode->i_mode &= ~0222;
- /* fallthrough */
+ return i_size_read(inode);
default:
if (zi->i_ztype == ZONEFS_ZTYPE_CNV)
return zi->i_max_size;
@@ -231,7 +245,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
* as there is no inconsistency between the inode size and the amount of
* data writen in the zone (data_size).
*/
- data_size = zonefs_check_zone_condition(inode, zone, true);
+ data_size = zonefs_check_zone_condition(inode, zone, true, false);
isize = i_size_read(inode);
if (zone->cond != BLK_ZONE_COND_OFFLINE &&
zone->cond != BLK_ZONE_COND_READONLY &&
@@ -274,7 +288,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
if (zone->cond != BLK_ZONE_COND_OFFLINE) {
zone->cond = BLK_ZONE_COND_OFFLINE;
data_size = zonefs_check_zone_condition(inode, zone,
- false);
+ false, false);
}
} else if (zone->cond == BLK_ZONE_COND_READONLY ||
sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) {
@@ -283,7 +297,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
if (zone->cond != BLK_ZONE_COND_READONLY) {
zone->cond = BLK_ZONE_COND_READONLY;
data_size = zonefs_check_zone_condition(inode, zone,
- false);
+ false, false);
}
}
@@ -975,7 +989,7 @@ static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
zi->i_zsector = zone->start;
zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE,
zone->len << SECTOR_SHIFT);
- zi->i_wpoffset = zonefs_check_zone_condition(inode, zone, true);
+ zi->i_wpoffset = zonefs_check_zone_condition(inode, zone, true, true);
inode->i_uid = sbi->s_uid;
inode->i_gid = sbi->s_gid;