diff options
Diffstat (limited to 'fs/fuse')
-rw-r--r-- | fs/fuse/Kconfig | 2 | ||||
-rw-r--r-- | fs/fuse/Makefile | 5 | ||||
-rw-r--r-- | fs/fuse/backing.c | 179 | ||||
-rw-r--r-- | fs/fuse/cuse.c | 3 | ||||
-rw-r--r-- | fs/fuse/dev.c | 231 | ||||
-rw-r--r-- | fs/fuse/dev_uring.c | 16 | ||||
-rw-r--r-- | fs/fuse/dir.c | 21 | ||||
-rw-r--r-- | fs/fuse/file.c | 86 | ||||
-rw-r--r-- | fs/fuse/fuse_dev_i.h | 13 | ||||
-rw-r--r-- | fs/fuse/fuse_i.h | 70 | ||||
-rw-r--r-- | fs/fuse/inode.c | 80 | ||||
-rw-r--r-- | fs/fuse/iomode.c | 3 | ||||
-rw-r--r-- | fs/fuse/passthrough.c | 167 | ||||
-rw-r--r-- | fs/fuse/trace.c | 13 | ||||
-rw-r--r-- | fs/fuse/virtio_fs.c | 12 |
15 files changed, 534 insertions, 367 deletions
diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig index a774166264de..3a4ae632c94a 100644 --- a/fs/fuse/Kconfig +++ b/fs/fuse/Kconfig @@ -13,7 +13,7 @@ config FUSE_FS although chances are your distribution already has that library installed if you've installed the "fuse" package itself. - See <file:Documentation/filesystems/fuse.rst> for more information. + See <file:Documentation/filesystems/fuse/fuse.rst> for more information. See <file:Documentation/Changes> for needed library/utility version. If you want to develop a userspace FS, or if you want to use diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index 3f0f312a31c1..22ad9538dfc4 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -10,10 +10,11 @@ obj-$(CONFIG_FUSE_FS) += fuse.o obj-$(CONFIG_CUSE) += cuse.o obj-$(CONFIG_VIRTIO_FS) += virtiofs.o -fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o +fuse-y := trace.o # put trace.o first so we see ftrace errors sooner +fuse-y += dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o fuse-y += iomode.o fuse-$(CONFIG_FUSE_DAX) += dax.o -fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o +fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o backing.o fuse-$(CONFIG_SYSCTL) += sysctl.o fuse-$(CONFIG_FUSE_IO_URING) += dev_uring.o diff --git a/fs/fuse/backing.c b/fs/fuse/backing.c new file mode 100644 index 000000000000..4afda419dd14 --- /dev/null +++ b/fs/fuse/backing.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * FUSE passthrough to backing file. + * + * Copyright (c) 2023 CTERA Networks. + */ + +#include "fuse_i.h" + +#include <linux/file.h> + +struct fuse_backing *fuse_backing_get(struct fuse_backing *fb) +{ + if (fb && refcount_inc_not_zero(&fb->count)) + return fb; + return NULL; +} + +static void fuse_backing_free(struct fuse_backing *fb) +{ + pr_debug("%s: fb=0x%p\n", __func__, fb); + + if (fb->file) + fput(fb->file); + put_cred(fb->cred); + kfree_rcu(fb, rcu); +} + +void fuse_backing_put(struct fuse_backing *fb) +{ + if (fb && refcount_dec_and_test(&fb->count)) + fuse_backing_free(fb); +} + +void fuse_backing_files_init(struct fuse_conn *fc) +{ + idr_init(&fc->backing_files_map); +} + +static int fuse_backing_id_alloc(struct fuse_conn *fc, struct fuse_backing *fb) +{ + int id; + + idr_preload(GFP_KERNEL); + spin_lock(&fc->lock); + /* FIXME: xarray might be space inefficient */ + id = idr_alloc_cyclic(&fc->backing_files_map, fb, 1, 0, GFP_ATOMIC); + spin_unlock(&fc->lock); + idr_preload_end(); + + WARN_ON_ONCE(id == 0); + return id; +} + +static struct fuse_backing *fuse_backing_id_remove(struct fuse_conn *fc, + int id) +{ + struct fuse_backing *fb; + + spin_lock(&fc->lock); + fb = idr_remove(&fc->backing_files_map, id); + spin_unlock(&fc->lock); + + return fb; +} + +static int fuse_backing_id_free(int id, void *p, void *data) +{ + struct fuse_backing *fb = p; + + WARN_ON_ONCE(refcount_read(&fb->count) != 1); + fuse_backing_free(fb); + return 0; +} + +void fuse_backing_files_free(struct fuse_conn *fc) +{ + idr_for_each(&fc->backing_files_map, fuse_backing_id_free, NULL); + idr_destroy(&fc->backing_files_map); +} + +int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map) +{ + struct file *file; + struct super_block *backing_sb; + struct fuse_backing *fb = NULL; + int res; + + pr_debug("%s: fd=%d flags=0x%x\n", __func__, map->fd, map->flags); + + /* TODO: relax CAP_SYS_ADMIN once backing files are visible to lsof */ + res = -EPERM; + if (!fc->passthrough || !capable(CAP_SYS_ADMIN)) + goto out; + + res = -EINVAL; + if (map->flags || map->padding) + goto out; + + file = fget_raw(map->fd); + res = -EBADF; + if (!file) + goto out; + + /* read/write/splice/mmap passthrough only relevant for regular files */ + res = d_is_dir(file->f_path.dentry) ? -EISDIR : -EINVAL; + if (!d_is_reg(file->f_path.dentry)) + goto out_fput; + + backing_sb = file_inode(file)->i_sb; + res = -ELOOP; + if (backing_sb->s_stack_depth >= fc->max_stack_depth) + goto out_fput; + + fb = kmalloc(sizeof(struct fuse_backing), GFP_KERNEL); + res = -ENOMEM; + if (!fb) + goto out_fput; + + fb->file = file; + fb->cred = prepare_creds(); + refcount_set(&fb->count, 1); + + res = fuse_backing_id_alloc(fc, fb); + if (res < 0) { + fuse_backing_free(fb); + fb = NULL; + } + +out: + pr_debug("%s: fb=0x%p, ret=%i\n", __func__, fb, res); + + return res; + +out_fput: + fput(file); + goto out; +} + +int fuse_backing_close(struct fuse_conn *fc, int backing_id) +{ + struct fuse_backing *fb = NULL; + int err; + + pr_debug("%s: backing_id=%d\n", __func__, backing_id); + + /* TODO: relax CAP_SYS_ADMIN once backing files are visible to lsof */ + err = -EPERM; + if (!fc->passthrough || !capable(CAP_SYS_ADMIN)) + goto out; + + err = -EINVAL; + if (backing_id <= 0) + goto out; + + err = -ENOENT; + fb = fuse_backing_id_remove(fc, backing_id); + if (!fb) + goto out; + + fuse_backing_put(fb); + err = 0; +out: + pr_debug("%s: fb=0x%p, err=%i\n", __func__, fb, err); + + return err; +} + +struct fuse_backing *fuse_backing_lookup(struct fuse_conn *fc, int backing_id) +{ + struct fuse_backing *fb; + + rcu_read_lock(); + fb = idr_find(&fc->backing_files_map, backing_id); + fb = fuse_backing_get(fb); + rcu_read_unlock(); + + return fb; +} diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index b39844d75a80..28c96961e85d 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -52,6 +52,7 @@ #include <linux/user_namespace.h> #include "fuse_i.h" +#include "fuse_dev_i.h" #define CUSE_CONNTBL_LEN 64 @@ -547,7 +548,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file) */ static int cuse_channel_release(struct inode *inode, struct file *file) { - struct fuse_dev *fud = file->private_data; + struct fuse_dev *fud = __fuse_get_dev(file); struct cuse_conn *cc = fc_to_cc(fud->fc); /* remove from the conntbl, no more access from this point on */ diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 5150aa25e64b..132f38619d70 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -25,7 +25,6 @@ #include <linux/sched.h> #include <linux/seq_file.h> -#define CREATE_TRACE_POINTS #include "fuse_trace.h" MODULE_ALIAS_MISCDEV(FUSE_MINOR); @@ -119,7 +118,7 @@ void fuse_check_timeout(struct work_struct *work) goto abort_conn; out: - queue_delayed_work(system_wq, &fc->timeout.work, + queue_delayed_work(system_percpu_wq, &fc->timeout.work, fuse_timeout_timer_freq); return; @@ -207,8 +206,9 @@ static struct fuse_req *fuse_get_req(struct mnt_idmap *idmap, if (fuse_block_alloc(fc, for_background)) { err = -EINTR; - if (wait_event_killable_exclusive(fc->blocked_waitq, - !fuse_block_alloc(fc, for_background))) + if (wait_event_state_exclusive(fc->blocked_waitq, + !fuse_block_alloc(fc, for_background), + (TASK_KILLABLE | TASK_FREEZABLE))) goto out; } /* Matches smp_wmb() in fuse_set_initialized() */ @@ -322,6 +322,7 @@ unsigned int fuse_req_hash(u64 unique) { return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS); } +EXPORT_SYMBOL_GPL(fuse_req_hash); /* * A new request is available, wake fiq->waitq @@ -369,12 +370,32 @@ void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) } } +static inline void fuse_request_assign_unique_locked(struct fuse_iqueue *fiq, + struct fuse_req *req) +{ + if (req->in.h.opcode != FUSE_NOTIFY_REPLY) + req->in.h.unique = fuse_get_unique_locked(fiq); + + /* tracepoint captures in.h.unique and in.h.len */ + trace_fuse_request_send(req); +} + +inline void fuse_request_assign_unique(struct fuse_iqueue *fiq, + struct fuse_req *req) +{ + if (req->in.h.opcode != FUSE_NOTIFY_REPLY) + req->in.h.unique = fuse_get_unique(fiq); + + /* tracepoint captures in.h.unique and in.h.len */ + trace_fuse_request_send(req); +} +EXPORT_SYMBOL_GPL(fuse_request_assign_unique); + static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req) { spin_lock(&fiq->lock); if (fiq->connected) { - if (req->in.h.opcode != FUSE_NOTIFY_REPLY) - req->in.h.unique = fuse_get_unique_locked(fiq); + fuse_request_assign_unique_locked(fiq, req); list_add_tail(&req->list, &fiq->pending); fuse_dev_wake_and_unlock(fiq); } else { @@ -397,7 +418,6 @@ static void fuse_send_one(struct fuse_iqueue *fiq, struct fuse_req *req) req->in.h.len = sizeof(struct fuse_in_header) + fuse_len_args(req->args->in_numargs, (struct fuse_arg *) req->args->in_args); - trace_fuse_request_send(req); fiq->ops->send_req(fiq, req); } @@ -687,10 +707,10 @@ static bool fuse_request_queue_background_uring(struct fuse_conn *fc, { struct fuse_iqueue *fiq = &fc->iq; - req->in.h.unique = fuse_get_unique(fiq); req->in.h.len = sizeof(struct fuse_in_header) + fuse_len_args(req->args->in_numargs, (struct fuse_arg *) req->args->in_args); + fuse_request_assign_unique(fiq, req); return fuse_uring_queue_bq_req(req); } @@ -935,7 +955,7 @@ static int fuse_check_folio(struct folio *folio) { if (folio_mapped(folio) || folio->mapping != NULL || - (folio->flags & PAGE_FLAGS_CHECK_AT_PREP & + (folio->flags.f & PAGE_FLAGS_CHECK_AT_PREP & ~(1 << PG_locked | 1 << PG_referenced | 1 << PG_lru | @@ -1528,14 +1548,34 @@ static int fuse_dev_open(struct inode *inode, struct file *file) return 0; } +struct fuse_dev *fuse_get_dev(struct file *file) +{ + struct fuse_dev *fud = __fuse_get_dev(file); + int err; + + if (likely(fud)) + return fud; + + err = wait_event_interruptible(fuse_dev_waitq, + READ_ONCE(file->private_data) != FUSE_DEV_SYNC_INIT); + if (err) + return ERR_PTR(err); + + fud = __fuse_get_dev(file); + if (!fud) + return ERR_PTR(-EPERM); + + return fud; +} + static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to) { struct fuse_copy_state cs; struct file *file = iocb->ki_filp; struct fuse_dev *fud = fuse_get_dev(file); - if (!fud) - return -EPERM; + if (IS_ERR(fud)) + return PTR_ERR(fud); if (!user_backed_iter(to)) return -EINVAL; @@ -1555,8 +1595,8 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, struct fuse_copy_state cs; struct fuse_dev *fud = fuse_get_dev(in); - if (!fud) - return -EPERM; + if (IS_ERR(fud)) + return PTR_ERR(fud); bufs = kvmalloc_array(pipe->max_usage, sizeof(struct pipe_buffer), GFP_KERNEL); @@ -1600,35 +1640,31 @@ static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size, struct fuse_copy_state *cs) { struct fuse_notify_poll_wakeup_out outarg; - int err = -EINVAL; + int err; if (size != sizeof(outarg)) - goto err; + return -EINVAL; err = fuse_copy_one(cs, &outarg, sizeof(outarg)); if (err) - goto err; + return err; fuse_copy_finish(cs); return fuse_notify_poll_wakeup(fc, &outarg); - -err: - fuse_copy_finish(cs); - return err; } static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size, struct fuse_copy_state *cs) { struct fuse_notify_inval_inode_out outarg; - int err = -EINVAL; + int err; if (size != sizeof(outarg)) - goto err; + return -EINVAL; err = fuse_copy_one(cs, &outarg, sizeof(outarg)); if (err) - goto err; + return err; fuse_copy_finish(cs); down_read(&fc->killsb); @@ -1636,10 +1672,6 @@ static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size, outarg.off, outarg.len); up_read(&fc->killsb); return err; - -err: - fuse_copy_finish(cs); - return err; } static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, @@ -1647,29 +1679,26 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, { struct fuse_notify_inval_entry_out outarg; int err; - char *buf = NULL; + char *buf; struct qstr name; - err = -EINVAL; if (size < sizeof(outarg)) - goto err; + return -EINVAL; err = fuse_copy_one(cs, &outarg, sizeof(outarg)); if (err) - goto err; + return err; - err = -ENAMETOOLONG; if (outarg.namelen > fc->name_max) - goto err; + return -ENAMETOOLONG; err = -EINVAL; if (size != sizeof(outarg) + outarg.namelen + 1) - goto err; + return -EINVAL; - err = -ENOMEM; buf = kzalloc(outarg.namelen + 1, GFP_KERNEL); if (!buf) - goto err; + return -ENOMEM; name.name = buf; name.len = outarg.namelen; @@ -1682,12 +1711,8 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, down_read(&fc->killsb); err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name, outarg.flags); up_read(&fc->killsb); - kfree(buf); - return err; - err: kfree(buf); - fuse_copy_finish(cs); return err; } @@ -1696,29 +1721,25 @@ static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size, { struct fuse_notify_delete_out outarg; int err; - char *buf = NULL; + char *buf; struct qstr name; - err = -EINVAL; if (size < sizeof(outarg)) - goto err; + return -EINVAL; err = fuse_copy_one(cs, &outarg, sizeof(outarg)); if (err) - goto err; + return err; - err = -ENAMETOOLONG; if (outarg.namelen > fc->name_max) - goto err; + return -ENAMETOOLONG; - err = -EINVAL; if (size != sizeof(outarg) + outarg.namelen + 1) - goto err; + return -EINVAL; - err = -ENOMEM; buf = kzalloc(outarg.namelen + 1, GFP_KERNEL); if (!buf) - goto err; + return -ENOMEM; name.name = buf; name.len = outarg.namelen; @@ -1731,12 +1752,8 @@ static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size, down_read(&fc->killsb); err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name, 0); up_read(&fc->killsb); - kfree(buf); - return err; - err: kfree(buf); - fuse_copy_finish(cs); return err; } @@ -1754,17 +1771,15 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, loff_t file_size; loff_t end; - err = -EINVAL; if (size < sizeof(outarg)) - goto out_finish; + return -EINVAL; err = fuse_copy_one(cs, &outarg, sizeof(outarg)); if (err) - goto out_finish; + return err; - err = -EINVAL; if (size - sizeof(outarg) != outarg.size) - goto out_finish; + return -EINVAL; nodeid = outarg.nodeid; @@ -1824,8 +1839,6 @@ out_iput: iput(inode); out_up_killsb: up_read(&fc->killsb); -out_finish: - fuse_copy_finish(cs); return err; } @@ -1940,13 +1953,12 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size, u64 nodeid; int err; - err = -EINVAL; if (size != sizeof(outarg)) - goto copy_finish; + return -EINVAL; err = fuse_copy_one(cs, &outarg, sizeof(outarg)); if (err) - goto copy_finish; + return err; fuse_copy_finish(cs); @@ -1962,10 +1974,6 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size, up_read(&fc->killsb); return err; - -copy_finish: - fuse_copy_finish(cs); - return err; } /* @@ -2044,6 +2052,42 @@ static int fuse_notify_inc_epoch(struct fuse_conn *fc) return 0; } +static int fuse_notify_prune(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_prune_out outarg; + const unsigned int batch = 512; + u64 *nodeids __free(kfree) = kmalloc(sizeof(u64) * batch, GFP_KERNEL); + unsigned int num, i; + int err; + + if (!nodeids) + return -ENOMEM; + + if (size < sizeof(outarg)) + return -EINVAL; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + return err; + + if (size - sizeof(outarg) != outarg.count * sizeof(u64)) + return -EINVAL; + + for (; outarg.count; outarg.count -= num) { + num = min(batch, outarg.count); + err = fuse_copy_one(cs, nodeids, num * sizeof(u64)); + if (err) + return err; + + scoped_guard(rwsem_read, &fc->killsb) { + for (i = 0; i < num; i++) + fuse_try_prune_one_inode(fc, nodeids[i]); + } + } + return 0; +} + static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, unsigned int size, struct fuse_copy_state *cs) { @@ -2075,8 +2119,10 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, case FUSE_NOTIFY_INC_EPOCH: return fuse_notify_inc_epoch(fc); + case FUSE_NOTIFY_PRUNE: + return fuse_notify_prune(fc, size, cs); + default: - fuse_copy_finish(cs); return -EINVAL; } } @@ -2156,7 +2202,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, */ if (!oh.unique) { err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs); - goto out; + goto copy_finish; } err = -EINVAL; @@ -2229,7 +2275,7 @@ copy_finish: static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from) { struct fuse_copy_state cs; - struct fuse_dev *fud = fuse_get_dev(iocb->ki_filp); + struct fuse_dev *fud = __fuse_get_dev(iocb->ki_filp); if (!fud) return -EPERM; @@ -2251,11 +2297,10 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, unsigned idx; struct pipe_buffer *bufs; struct fuse_copy_state cs; - struct fuse_dev *fud; + struct fuse_dev *fud = __fuse_get_dev(out); size_t rem; ssize_t ret; - fud = fuse_get_dev(out); if (!fud) return -EPERM; @@ -2341,7 +2386,7 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait) struct fuse_iqueue *fiq; struct fuse_dev *fud = fuse_get_dev(file); - if (!fud) + if (IS_ERR(fud)) return EPOLLERR; fiq = &fud->fc->iq; @@ -2394,7 +2439,7 @@ static void end_polls(struct fuse_conn *fc) * The same effect is usually achievable through killing the filesystem daemon * and all users of the filesystem. The exception is the combination of an * asynchronous request and the tricky deadlock (see - * Documentation/filesystems/fuse.rst). + * Documentation/filesystems/fuse/fuse.rst). * * Aborting requests under I/O goes as follows: 1: Separate out unlocked * requests, they should be finished off immediately. Locked requests will be @@ -2488,7 +2533,7 @@ void fuse_wait_aborted(struct fuse_conn *fc) int fuse_dev_release(struct inode *inode, struct file *file) { - struct fuse_dev *fud = fuse_get_dev(file); + struct fuse_dev *fud = __fuse_get_dev(file); if (fud) { struct fuse_conn *fc = fud->fc; @@ -2519,8 +2564,8 @@ static int fuse_dev_fasync(int fd, struct file *file, int on) { struct fuse_dev *fud = fuse_get_dev(file); - if (!fud) - return -EPERM; + if (IS_ERR(fud)) + return PTR_ERR(fud); /* No locking - fasync_helper does its own locking */ return fasync_helper(fd, file, on, &fud->fc->iq.fasync); @@ -2530,7 +2575,7 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new) { struct fuse_dev *fud; - if (new->private_data) + if (__fuse_get_dev(new)) return -EINVAL; fud = fuse_dev_alloc_install(fc); @@ -2561,7 +2606,7 @@ static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp) * uses the same ioctl handler. */ if (fd_file(f)->f_op == file->f_op) - fud = fuse_get_dev(fd_file(f)); + fud = __fuse_get_dev(fd_file(f)); res = -EINVAL; if (fud) { @@ -2579,8 +2624,8 @@ static long fuse_dev_ioctl_backing_open(struct file *file, struct fuse_dev *fud = fuse_get_dev(file); struct fuse_backing_map map; - if (!fud) - return -EPERM; + if (IS_ERR(fud)) + return PTR_ERR(fud); if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) return -EOPNOTSUPP; @@ -2596,8 +2641,8 @@ static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp) struct fuse_dev *fud = fuse_get_dev(file); int backing_id; - if (!fud) - return -EPERM; + if (IS_ERR(fud)) + return PTR_ERR(fud); if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) return -EOPNOTSUPP; @@ -2608,6 +2653,19 @@ static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp) return fuse_backing_close(fud->fc, backing_id); } +static long fuse_dev_ioctl_sync_init(struct file *file) +{ + int err = -EINVAL; + + mutex_lock(&fuse_mutex); + if (!__fuse_get_dev(file)) { + WRITE_ONCE(file->private_data, FUSE_DEV_SYNC_INIT); + err = 0; + } + mutex_unlock(&fuse_mutex); + return err; +} + static long fuse_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -2623,6 +2681,9 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd, case FUSE_DEV_IOC_BACKING_CLOSE: return fuse_dev_ioctl_backing_close(file, argp); + case FUSE_DEV_IOC_SYNC_INIT: + return fuse_dev_ioctl_sync_init(file); + default: return -ENOTTY; } @@ -2631,7 +2692,7 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd, #ifdef CONFIG_PROC_FS static void fuse_dev_show_fdinfo(struct seq_file *seq, struct file *file) { - struct fuse_dev *fud = fuse_get_dev(file); + struct fuse_dev *fud = __fuse_get_dev(file); if (!fud) return; diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c index 249b210becb1..f6b12aebb8bb 100644 --- a/fs/fuse/dev_uring.c +++ b/fs/fuse/dev_uring.c @@ -7,6 +7,7 @@ #include "fuse_i.h" #include "dev_uring_i.h" #include "fuse_dev_i.h" +#include "fuse_trace.h" #include <linux/fs.h> #include <linux/io_uring/cmd.h> @@ -351,7 +352,7 @@ static void fuse_uring_entry_teardown(struct fuse_ring_ent *ent) spin_unlock(&queue->lock); if (cmd) - io_uring_cmd_done(cmd, -ENOTCONN, 0, IO_URING_F_UNLOCKED); + io_uring_cmd_done(cmd, -ENOTCONN, IO_URING_F_UNLOCKED); if (req) fuse_uring_stop_fuse_req_end(req); @@ -518,7 +519,7 @@ static void fuse_uring_cancel(struct io_uring_cmd *cmd, if (need_cmd_done) { /* no queue lock to avoid lock order issues */ - io_uring_cmd_done(cmd, -ENOTCONN, 0, issue_flags); + io_uring_cmd_done(cmd, -ENOTCONN, issue_flags); } } @@ -733,7 +734,7 @@ static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ent, list_move_tail(&ent->list, &queue->ent_in_userspace); spin_unlock(&queue->lock); - io_uring_cmd_done(cmd, 0, 0, issue_flags); + io_uring_cmd_done(cmd, 0, issue_flags); return 0; } @@ -1139,9 +1140,9 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) return -EINVAL; fud = fuse_get_dev(cmd->file); - if (!fud) { + if (IS_ERR(fud)) { pr_info_ratelimited("No fuse device found\n"); - return -ENOTCONN; + return PTR_ERR(fud); } fc = fud->fc; @@ -1200,7 +1201,7 @@ static void fuse_uring_send(struct fuse_ring_ent *ent, struct io_uring_cmd *cmd, ent->cmd = NULL; spin_unlock(&queue->lock); - io_uring_cmd_done(cmd, ret, 0, issue_flags); + io_uring_cmd_done(cmd, ret, issue_flags); } /* @@ -1268,8 +1269,7 @@ void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req) if (!queue) goto err; - if (req->in.h.opcode != FUSE_NOTIFY_REPLY) - req->in.h.unique = fuse_get_unique(fiq); + fuse_request_assign_unique(fiq, req); spin_lock(&queue->lock); err = -ENOTCONN; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 5c569c3cb53f..ecaec0fea3a1 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -739,22 +739,18 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, int err; struct mnt_idmap *idmap = file_mnt_idmap(file); struct fuse_conn *fc = get_fuse_conn(dir); - struct dentry *res = NULL; if (fuse_is_bad(dir)) return -EIO; if (d_in_lookup(entry)) { - res = fuse_lookup(dir, entry, 0); - if (IS_ERR(res)) - return PTR_ERR(res); - - if (res) - entry = res; + struct dentry *res = fuse_lookup(dir, entry, 0); + if (res || d_really_is_positive(entry)) + return finish_no_open(file, res); } - if (!(flags & O_CREAT) || d_really_is_positive(entry)) - goto no_open; + if (!(flags & O_CREAT)) + return finish_no_open(file, NULL); /* Only creates */ file->f_mode |= FMODE_CREATED; @@ -768,16 +764,13 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, goto mknod; } else if (err == -EEXIST) fuse_invalidate_entry(entry); -out_dput: - dput(res); return err; mknod: err = fuse_mknod(idmap, dir, entry, mode, 0); if (err) - goto out_dput; -no_open: - return finish_no_open(file, res); + return err; + return finish_no_open(file, NULL); } /* diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 4adcf09d4b01..f1ef77a0be05 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -356,8 +356,14 @@ void fuse_file_release(struct inode *inode, struct fuse_file *ff, * Make the release synchronous if this is a fuseblk mount, * synchronous RELEASE is allowed (and desirable) in this case * because the server can be trusted not to screw up. + * + * Always use the asynchronous file put because the current thread + * might be the fuse server. This can happen if a process starts some + * aio and closes the fd before the aio completes. Since aio takes its + * own ref to the file, the IO completion has to drop the ref, which is + * how the fuse server can end up closing its clients' files. */ - fuse_file_put(ff, ff->fm->fc->destroy); + fuse_file_put(ff, false); } void fuse_release_common(struct file *file, bool isdir) @@ -865,22 +871,20 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args, struct fuse_args_pages *ap = &ia->ap; size_t count = ia->read.in.size; size_t num_read = args->out_args[0].size; - struct address_space *mapping = NULL; - - for (i = 0; mapping == NULL && i < ap->num_folios; i++) - mapping = ap->folios[i]->mapping; + struct address_space *mapping; + struct inode *inode; - if (mapping) { - struct inode *inode = mapping->host; + WARN_ON_ONCE(!ap->num_folios); + mapping = ap->folios[0]->mapping; + inode = mapping->host; - /* - * Short read means EOF. If file size is larger, truncate it - */ - if (!err && num_read < count) - fuse_short_read(inode, ia->read.attr_ver, num_read, ap); + /* + * Short read means EOF. If file size is larger, truncate it + */ + if (!err && num_read < count) + fuse_short_read(inode, ia->read.attr_ver, num_read, ap); - fuse_invalidate_atime(inode); - } + fuse_invalidate_atime(inode); for (i = 0; i < ap->num_folios; i++) { folio_end_read(ap->folios[i], !err); @@ -1175,7 +1179,6 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia, num = min(iov_iter_count(ii), fc->max_write); ap->args.in_pages = true; - ap->descs[0].offset = offset; while (num && ap->num_folios < max_folios) { size_t tmp; @@ -1823,19 +1826,15 @@ static void fuse_writepage_finish(struct fuse_writepage_args *wpa) struct fuse_args_pages *ap = &wpa->ia.ap; struct inode *inode = wpa->inode; struct fuse_inode *fi = get_fuse_inode(inode); - struct backing_dev_info *bdi = inode_to_bdi(inode); int i; - for (i = 0; i < ap->num_folios; i++) { + for (i = 0; i < ap->num_folios; i++) /* * Benchmarks showed that ending writeback within the * scope of the fi->lock alleviates xarray lock * contention and noticeably improves performance. */ iomap_finish_folio_write(inode, ap->folios[i], 1); - dec_wb_stat(&bdi->wb, WB_WRITEBACK); - wb_writeout_inc(&bdi->wb); - } wake_up(&fi->page_waitq); } @@ -2010,14 +2009,11 @@ static void fuse_writepage_add_to_bucket(struct fuse_conn *fc, static void fuse_writepage_args_page_fill(struct fuse_writepage_args *wpa, struct folio *folio, uint32_t folio_index, loff_t offset, unsigned len) { - struct inode *inode = folio->mapping->host; struct fuse_args_pages *ap = &wpa->ia.ap; ap->folios[folio_index] = folio; ap->descs[folio_index].offset = offset; ap->descs[folio_index].length = len; - - inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); } static struct fuse_writepage_args *fuse_writepage_args_setup(struct folio *folio, @@ -2960,10 +2956,12 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, .nodeid_out = ff_out->nodeid, .fh_out = ff_out->fh, .off_out = pos_out, - .len = min_t(size_t, len, UINT_MAX & PAGE_MASK), + .len = len, .flags = flags }; struct fuse_write_out outarg; + struct fuse_copy_file_range_out outarg_64; + u64 bytes_copied; ssize_t err; /* mark unstable when write-back is not used, and file_out gets * extended */ @@ -3013,33 +3011,51 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, if (is_unstable) set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state); - args.opcode = FUSE_COPY_FILE_RANGE; + args.opcode = FUSE_COPY_FILE_RANGE_64; args.nodeid = ff_in->nodeid; args.in_numargs = 1; args.in_args[0].size = sizeof(inarg); args.in_args[0].value = &inarg; args.out_numargs = 1; - args.out_args[0].size = sizeof(outarg); - args.out_args[0].value = &outarg; + args.out_args[0].size = sizeof(outarg_64); + args.out_args[0].value = &outarg_64; + if (fc->no_copy_file_range_64) { +fallback: + /* Fall back to old op that can't handle large copy length */ + args.opcode = FUSE_COPY_FILE_RANGE; + args.out_args[0].size = sizeof(outarg); + args.out_args[0].value = &outarg; + inarg.len = len = min_t(size_t, len, UINT_MAX & PAGE_MASK); + } err = fuse_simple_request(fm, &args); if (err == -ENOSYS) { - fc->no_copy_file_range = 1; - err = -EOPNOTSUPP; + if (fc->no_copy_file_range_64) { + fc->no_copy_file_range = 1; + err = -EOPNOTSUPP; + } else { + fc->no_copy_file_range_64 = 1; + goto fallback; + } } - if (!err && outarg.size > len) - err = -EIO; - if (err) goto out; + bytes_copied = fc->no_copy_file_range_64 ? + outarg.size : outarg_64.bytes_copied; + + if (bytes_copied > len) { + err = -EIO; + goto out; + } + truncate_inode_pages_range(inode_out->i_mapping, ALIGN_DOWN(pos_out, PAGE_SIZE), - ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1); + ALIGN(pos_out + bytes_copied, PAGE_SIZE) - 1); file_update_time(file_out); - fuse_write_update_attr(inode_out, pos_out + outarg.size, outarg.size); + fuse_write_update_attr(inode_out, pos_out + bytes_copied, bytes_copied); - err = outarg.size; + err = bytes_copied; out: if (is_unstable) clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state); diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h index 5a9bd771a319..6e8373f97040 100644 --- a/fs/fuse/fuse_dev_i.h +++ b/fs/fuse/fuse_dev_i.h @@ -12,6 +12,8 @@ #define FUSE_INT_REQ_BIT (1ULL << 0) #define FUSE_REQ_ID_STEP (1ULL << 1) +extern struct wait_queue_head fuse_dev_waitq; + struct fuse_arg; struct fuse_args; struct fuse_pqueue; @@ -37,15 +39,22 @@ struct fuse_copy_state { } ring; }; -static inline struct fuse_dev *fuse_get_dev(struct file *file) +#define FUSE_DEV_SYNC_INIT ((struct fuse_dev *) 1) +#define FUSE_DEV_PTR_MASK (~1UL) + +static inline struct fuse_dev *__fuse_get_dev(struct file *file) { /* * Lockless access is OK, because file->private data is set * once during mount and is valid until the file is released. */ - return READ_ONCE(file->private_data); + struct fuse_dev *fud = READ_ONCE(file->private_data); + + return (typeof(fud)) ((unsigned long) fud & FUSE_DEV_PTR_MASK); } +struct fuse_dev *fuse_get_dev(struct file *file); + unsigned int fuse_req_hash(u64 unique); struct fuse_req *fuse_request_find(struct fuse_pqueue *fpq, u64 unique); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index cc428d04be3e..c2f2a48156d6 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -856,6 +856,9 @@ struct fuse_conn { /** Does the filesystem support copy_file_range? */ unsigned no_copy_file_range:1; + /** Does the filesystem support copy_file_range_64? */ + unsigned no_copy_file_range_64:1; + /* Send DESTROY request */ unsigned int destroy:1; @@ -901,6 +904,9 @@ struct fuse_conn { /* Is link not implemented by fs? */ unsigned int no_link:1; + /* Is synchronous FUSE_INIT allowed? */ + unsigned int sync_init:1; + /* Use io_uring for communication */ unsigned int io_uring; @@ -1255,6 +1261,11 @@ int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args, gfp_t gfp_flags); /** + * Assign a unique id to a fuse request + */ +void fuse_request_assign_unique(struct fuse_iqueue *fiq, struct fuse_req *req); + +/** * End a finished request */ void fuse_request_end(struct fuse_req *req); @@ -1315,7 +1326,7 @@ struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc); struct fuse_dev *fuse_dev_alloc(void); void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc); void fuse_dev_free(struct fuse_dev *fud); -void fuse_send_init(struct fuse_mount *fm); +int fuse_send_init(struct fuse_mount *fm); /** * Fill in superblock and initialize fuse connection @@ -1407,6 +1418,12 @@ int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, u64 child_nodeid, struct qstr *name, u32 flags); +/* + * Try to prune this inode. If neither the inode itself nor dentries associated + * with this inode have any external reference, then the inode can be freed. + */ +void fuse_try_prune_one_inode(struct fuse_conn *fc, u64 nodeid); + int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file, bool isdir); @@ -1512,29 +1529,11 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid, void fuse_file_release(struct inode *inode, struct fuse_file *ff, unsigned int open_flags, fl_owner_t id, bool isdir); -/* passthrough.c */ -static inline struct fuse_backing *fuse_inode_backing(struct fuse_inode *fi) -{ -#ifdef CONFIG_FUSE_PASSTHROUGH - return READ_ONCE(fi->fb); -#else - return NULL; -#endif -} - -static inline struct fuse_backing *fuse_inode_backing_set(struct fuse_inode *fi, - struct fuse_backing *fb) -{ -#ifdef CONFIG_FUSE_PASSTHROUGH - return xchg(&fi->fb, fb); -#else - return NULL; -#endif -} - +/* backing.c */ #ifdef CONFIG_FUSE_PASSTHROUGH struct fuse_backing *fuse_backing_get(struct fuse_backing *fb); void fuse_backing_put(struct fuse_backing *fb); +struct fuse_backing *fuse_backing_lookup(struct fuse_conn *fc, int backing_id); #else static inline struct fuse_backing *fuse_backing_get(struct fuse_backing *fb) @@ -1545,6 +1544,11 @@ static inline struct fuse_backing *fuse_backing_get(struct fuse_backing *fb) static inline void fuse_backing_put(struct fuse_backing *fb) { } +static inline struct fuse_backing *fuse_backing_lookup(struct fuse_conn *fc, + int backing_id) +{ + return NULL; +} #endif void fuse_backing_files_init(struct fuse_conn *fc); @@ -1552,9 +1556,27 @@ void fuse_backing_files_free(struct fuse_conn *fc); int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map); int fuse_backing_close(struct fuse_conn *fc, int backing_id); -struct fuse_backing *fuse_passthrough_open(struct file *file, - struct inode *inode, - int backing_id); +/* passthrough.c */ +static inline struct fuse_backing *fuse_inode_backing(struct fuse_inode *fi) +{ +#ifdef CONFIG_FUSE_PASSTHROUGH + return READ_ONCE(fi->fb); +#else + return NULL; +#endif +} + +static inline struct fuse_backing *fuse_inode_backing_set(struct fuse_inode *fi, + struct fuse_backing *fb) +{ +#ifdef CONFIG_FUSE_PASSTHROUGH + return xchg(&fi->fb, fb); +#else + return NULL; +#endif +} + +struct fuse_backing *fuse_passthrough_open(struct file *file, int backing_id); void fuse_passthrough_release(struct fuse_file *ff, struct fuse_backing *fb); static inline struct file *fuse_file_passthrough(struct fuse_file *ff) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 7ddfd2b3cc9c..d1babf56f254 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -7,6 +7,7 @@ */ #include "fuse_i.h" +#include "fuse_dev_i.h" #include "dev_uring_i.h" #include <linux/dax.h> @@ -34,6 +35,7 @@ MODULE_LICENSE("GPL"); static struct kmem_cache *fuse_inode_cachep; struct list_head fuse_conn_list; DEFINE_MUTEX(fuse_mutex); +DECLARE_WAIT_QUEUE_HEAD(fuse_dev_waitq); static int set_global_limit(const char *val, const struct kernel_param *kp); @@ -101,14 +103,11 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) if (!fi) return NULL; - fi->i_time = 0; + /* Initialize private data (i.e. everything except fi->inode) */ + BUILD_BUG_ON(offsetof(struct fuse_inode, inode) != 0); + memset((void *) fi + sizeof(fi->inode), 0, sizeof(*fi) - sizeof(fi->inode)); + fi->inval_mask = ~0; - fi->nodeid = 0; - fi->nlookup = 0; - fi->attr_version = 0; - fi->orig_ino = 0; - fi->state = 0; - fi->submount_lookup = NULL; mutex_init(&fi->mutex); spin_lock_init(&fi->lock); fi->forget = fuse_alloc_forget(); @@ -586,6 +585,17 @@ int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, return 0; } +void fuse_try_prune_one_inode(struct fuse_conn *fc, u64 nodeid) +{ + struct inode *inode; + + inode = fuse_ilookup(fc, nodeid, NULL); + if (!inode) + return; + d_prune_aliases(inode); + iput(inode); +} + bool fuse_lock_inode(struct inode *inode) { bool locked = false; @@ -1209,7 +1219,7 @@ static const struct super_operations fuse_super_operations = { .free_inode = fuse_free_inode, .evict_inode = fuse_evict_inode, .write_inode = fuse_write_inode, - .drop_inode = generic_delete_inode, + .drop_inode = inode_just_drop, .umount_begin = fuse_umount_begin, .statfs = fuse_statfs, .sync_fs = fuse_sync_fs, @@ -1273,7 +1283,7 @@ static void set_request_timeout(struct fuse_conn *fc, unsigned int timeout) { fc->timeout.req_timeout = secs_to_jiffies(timeout); INIT_DELAYED_WORK(&fc->timeout.work, fuse_check_timeout); - queue_delayed_work(system_wq, &fc->timeout.work, + queue_delayed_work(system_percpu_wq, &fc->timeout.work, fuse_timeout_timer_freq); } @@ -1469,7 +1479,7 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, wake_up_all(&fc->blocked_waitq); } -void fuse_send_init(struct fuse_mount *fm) +static struct fuse_init_args *fuse_new_init(struct fuse_mount *fm) { struct fuse_init_args *ia; u64 flags; @@ -1528,10 +1538,30 @@ void fuse_send_init(struct fuse_mount *fm) ia->args.out_args[0].value = &ia->out; ia->args.force = true; ia->args.nocreds = true; - ia->args.end = process_init_reply; - if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0) - process_init_reply(fm, &ia->args, -ENOTCONN); + return ia; +} + +int fuse_send_init(struct fuse_mount *fm) +{ + struct fuse_init_args *ia = fuse_new_init(fm); + int err; + + if (fm->fc->sync_init) { + err = fuse_simple_request(fm, &ia->args); + /* Ignore size of init reply */ + if (err > 0) + err = 0; + } else { + ia->args.end = process_init_reply; + err = fuse_simple_background(fm, &ia->args, GFP_KERNEL); + if (!err) + return 0; + } + process_init_reply(fm, &ia->args, err); + if (fm->fc->conn_error) + return -ENOTCONN; + return 0; } EXPORT_SYMBOL_GPL(fuse_send_init); @@ -1561,8 +1591,6 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) if (err) return err; - /* fuse does it's own writeback accounting */ - sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT; sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT; /* @@ -1821,6 +1849,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) !sb_set_blocksize(sb, PAGE_SIZE)) goto err; #endif + fc->sync_fs = 1; } else { sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; @@ -1872,8 +1901,12 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) mutex_lock(&fuse_mutex); err = -EINVAL; - if (ctx->fudptr && *ctx->fudptr) - goto err_unlock; + if (ctx->fudptr && *ctx->fudptr) { + if (*ctx->fudptr == FUSE_DEV_SYNC_INIT) + fc->sync_init = 1; + else + goto err_unlock; + } err = fuse_ctl_add_conn(fc); if (err) @@ -1881,8 +1914,10 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) list_add_tail(&fc->entry, &fuse_conn_list); sb->s_root = root_dentry; - if (ctx->fudptr) + if (ctx->fudptr) { *ctx->fudptr = fud; + wake_up_all(&fuse_dev_waitq); + } mutex_unlock(&fuse_mutex); return 0; @@ -1903,6 +1938,7 @@ EXPORT_SYMBOL_GPL(fuse_fill_super_common); static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) { struct fuse_fs_context *ctx = fsc->fs_private; + struct fuse_mount *fm; int err; if (!ctx->file || !ctx->rootmode_present || @@ -1923,8 +1959,10 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) return err; /* file->private_data shall be visible on all CPUs after this */ smp_mb(); - fuse_send_init(get_fuse_mount_super(sb)); - return 0; + + fm = get_fuse_mount_super(sb); + + return fuse_send_init(fm); } /* @@ -1985,7 +2023,7 @@ static int fuse_get_tree(struct fs_context *fsc) * Allow creating a fuse mount with an already initialized fuse * connection */ - fud = READ_ONCE(ctx->file->private_data); + fud = __fuse_get_dev(ctx->file); if (ctx->file->f_op == &fuse_dev_operations && fud) { fsc->sget_key = fud->fc; sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super); diff --git a/fs/fuse/iomode.c b/fs/fuse/iomode.c index c99e285f3183..3728933188f3 100644 --- a/fs/fuse/iomode.c +++ b/fs/fuse/iomode.c @@ -177,8 +177,7 @@ static int fuse_file_passthrough_open(struct inode *inode, struct file *file) (ff->open_flags & ~FOPEN_PASSTHROUGH_MASK)) return -EINVAL; - fb = fuse_passthrough_open(file, inode, - ff->args->open_outarg.backing_id); + fb = fuse_passthrough_open(file, ff->args->open_outarg.backing_id); if (IS_ERR(fb)) return PTR_ERR(fb); diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c index eb97ac009e75..72de97c03d0e 100644 --- a/fs/fuse/passthrough.c +++ b/fs/fuse/passthrough.c @@ -144,171 +144,12 @@ ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma) return backing_file_mmap(backing_file, vma, &ctx); } -struct fuse_backing *fuse_backing_get(struct fuse_backing *fb) -{ - if (fb && refcount_inc_not_zero(&fb->count)) - return fb; - return NULL; -} - -static void fuse_backing_free(struct fuse_backing *fb) -{ - pr_debug("%s: fb=0x%p\n", __func__, fb); - - if (fb->file) - fput(fb->file); - put_cred(fb->cred); - kfree_rcu(fb, rcu); -} - -void fuse_backing_put(struct fuse_backing *fb) -{ - if (fb && refcount_dec_and_test(&fb->count)) - fuse_backing_free(fb); -} - -void fuse_backing_files_init(struct fuse_conn *fc) -{ - idr_init(&fc->backing_files_map); -} - -static int fuse_backing_id_alloc(struct fuse_conn *fc, struct fuse_backing *fb) -{ - int id; - - idr_preload(GFP_KERNEL); - spin_lock(&fc->lock); - /* FIXME: xarray might be space inefficient */ - id = idr_alloc_cyclic(&fc->backing_files_map, fb, 1, 0, GFP_ATOMIC); - spin_unlock(&fc->lock); - idr_preload_end(); - - WARN_ON_ONCE(id == 0); - return id; -} - -static struct fuse_backing *fuse_backing_id_remove(struct fuse_conn *fc, - int id) -{ - struct fuse_backing *fb; - - spin_lock(&fc->lock); - fb = idr_remove(&fc->backing_files_map, id); - spin_unlock(&fc->lock); - - return fb; -} - -static int fuse_backing_id_free(int id, void *p, void *data) -{ - struct fuse_backing *fb = p; - - WARN_ON_ONCE(refcount_read(&fb->count) != 1); - fuse_backing_free(fb); - return 0; -} - -void fuse_backing_files_free(struct fuse_conn *fc) -{ - idr_for_each(&fc->backing_files_map, fuse_backing_id_free, NULL); - idr_destroy(&fc->backing_files_map); -} - -int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map) -{ - struct file *file; - struct super_block *backing_sb; - struct fuse_backing *fb = NULL; - int res; - - pr_debug("%s: fd=%d flags=0x%x\n", __func__, map->fd, map->flags); - - /* TODO: relax CAP_SYS_ADMIN once backing files are visible to lsof */ - res = -EPERM; - if (!fc->passthrough || !capable(CAP_SYS_ADMIN)) - goto out; - - res = -EINVAL; - if (map->flags || map->padding) - goto out; - - file = fget_raw(map->fd); - res = -EBADF; - if (!file) - goto out; - - /* read/write/splice/mmap passthrough only relevant for regular files */ - res = d_is_dir(file->f_path.dentry) ? -EISDIR : -EINVAL; - if (!d_is_reg(file->f_path.dentry)) - goto out_fput; - - backing_sb = file_inode(file)->i_sb; - res = -ELOOP; - if (backing_sb->s_stack_depth >= fc->max_stack_depth) - goto out_fput; - - fb = kmalloc(sizeof(struct fuse_backing), GFP_KERNEL); - res = -ENOMEM; - if (!fb) - goto out_fput; - - fb->file = file; - fb->cred = prepare_creds(); - refcount_set(&fb->count, 1); - - res = fuse_backing_id_alloc(fc, fb); - if (res < 0) { - fuse_backing_free(fb); - fb = NULL; - } - -out: - pr_debug("%s: fb=0x%p, ret=%i\n", __func__, fb, res); - - return res; - -out_fput: - fput(file); - goto out; -} - -int fuse_backing_close(struct fuse_conn *fc, int backing_id) -{ - struct fuse_backing *fb = NULL; - int err; - - pr_debug("%s: backing_id=%d\n", __func__, backing_id); - - /* TODO: relax CAP_SYS_ADMIN once backing files are visible to lsof */ - err = -EPERM; - if (!fc->passthrough || !capable(CAP_SYS_ADMIN)) - goto out; - - err = -EINVAL; - if (backing_id <= 0) - goto out; - - err = -ENOENT; - fb = fuse_backing_id_remove(fc, backing_id); - if (!fb) - goto out; - - fuse_backing_put(fb); - err = 0; -out: - pr_debug("%s: fb=0x%p, err=%i\n", __func__, fb, err); - - return err; -} - /* * Setup passthrough to a backing file. * * Returns an fb object with elevated refcount to be stored in fuse inode. */ -struct fuse_backing *fuse_passthrough_open(struct file *file, - struct inode *inode, - int backing_id) +struct fuse_backing *fuse_passthrough_open(struct file *file, int backing_id) { struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fm->fc; @@ -320,12 +161,8 @@ struct fuse_backing *fuse_passthrough_open(struct file *file, if (backing_id <= 0) goto out; - rcu_read_lock(); - fb = idr_find(&fc->backing_files_map, backing_id); - fb = fuse_backing_get(fb); - rcu_read_unlock(); - err = -ENOENT; + fb = fuse_backing_lookup(fc, backing_id); if (!fb) goto out; diff --git a/fs/fuse/trace.c b/fs/fuse/trace.c new file mode 100644 index 000000000000..93bd72efc98c --- /dev/null +++ b/fs/fuse/trace.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2025 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "dev_uring_i.h" +#include "fuse_i.h" +#include "fuse_dev_i.h" + +#include <linux/pagemap.h> + +#define CREATE_TRACE_POINTS +#include "fuse_trace.h" diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 76c8fd0bfc75..6bc7c97b017d 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -20,6 +20,7 @@ #include <linux/cleanup.h> #include <linux/uio.h> #include "fuse_i.h" +#include "fuse_dev_i.h" /* Used to help calculate the FUSE connection's max_pages limit for a request's * size. Parts of the struct fuse_req are sliced into scattergather lists in @@ -761,7 +762,6 @@ static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) static void virtio_fs_request_complete(struct fuse_req *req, struct virtio_fs_vq *fsvq) { - struct fuse_pqueue *fpq = &fsvq->fud->pq; struct fuse_args *args; struct fuse_args_pages *ap; unsigned int len, i, thislen; @@ -790,9 +790,7 @@ static void virtio_fs_request_complete(struct fuse_req *req, } } - spin_lock(&fpq->lock); clear_bit(FR_SENT, &req->flags); - spin_unlock(&fpq->lock); fuse_request_end(req); spin_lock(&fsvq->lock); @@ -1384,7 +1382,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, unsigned int out_sgs = 0; unsigned int in_sgs = 0; unsigned int total_sgs; - unsigned int i; + unsigned int i, hash; int ret; bool notify; struct fuse_pqueue *fpq; @@ -1444,8 +1442,9 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, /* Request successfully sent. */ fpq = &fsvq->fud->pq; + hash = fuse_req_hash(req->in.h.unique); spin_lock(&fpq->lock); - list_add_tail(&req->list, fpq->processing); + list_add_tail(&req->list, &fpq->processing[hash]); spin_unlock(&fpq->lock); set_bit(FR_SENT, &req->flags); /* matches barrier in request_wait_answer() */ @@ -1480,8 +1479,7 @@ static void virtio_fs_send_req(struct fuse_iqueue *fiq, struct fuse_req *req) struct virtio_fs_vq *fsvq; int ret; - if (req->in.h.opcode != FUSE_NOTIFY_REPLY) - req->in.h.unique = fuse_get_unique(fiq); + fuse_request_assign_unique(fiq, req); clear_bit(FR_PENDING, &req->flags); |