summaryrefslogtreecommitdiff
path: root/fs/fuse/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/fuse/dev.c')
-rw-r--r--fs/fuse/dev.c598
1 files changed, 403 insertions, 195 deletions
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 3ec8bb5e68ff..6dcbaa218b7a 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -6,7 +6,9 @@
See the file COPYING.
*/
+#include "dev_uring_i.h"
#include "fuse_i.h"
+#include "fuse_dev_i.h"
#include <linux/init.h>
#include <linux/module.h>
@@ -22,22 +24,106 @@
#include <linux/splice.h>
#include <linux/sched.h>
+#define CREATE_TRACE_POINTS
+#include "fuse_trace.h"
+
MODULE_ALIAS_MISCDEV(FUSE_MINOR);
MODULE_ALIAS("devname:fuse");
-/* Ordinary requests have even IDs, while interrupts IDs are odd */
-#define FUSE_INT_REQ_BIT (1ULL << 0)
-#define FUSE_REQ_ID_STEP (1ULL << 1)
-
static struct kmem_cache *fuse_req_cachep;
-static struct fuse_dev *fuse_get_dev(struct file *file)
+const unsigned long fuse_timeout_timer_freq =
+ secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ);
+
+bool fuse_request_expired(struct fuse_conn *fc, struct list_head *list)
{
- /*
- * Lockless access is OK, because file->private data is set
- * once during mount and is valid until the file is released.
- */
- return READ_ONCE(file->private_data);
+ struct fuse_req *req;
+
+ req = list_first_entry_or_null(list, struct fuse_req, list);
+ if (!req)
+ return false;
+ return time_is_before_jiffies(req->create_time + fc->timeout.req_timeout);
+}
+
+bool fuse_fpq_processing_expired(struct fuse_conn *fc, struct list_head *processing)
+{
+ int i;
+
+ for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
+ if (fuse_request_expired(fc, &processing[i]))
+ return true;
+
+ return false;
+}
+
+/*
+ * Check if any requests aren't being completed by the time the request timeout
+ * elapses. To do so, we:
+ * - check the fiq pending list
+ * - check the bg queue
+ * - check the fpq io and processing lists
+ *
+ * To make this fast, we only check against the head request on each list since
+ * these are generally queued in order of creation time (eg newer requests get
+ * queued to the tail). We might miss a few edge cases (eg requests transitioning
+ * between lists, re-sent requests at the head of the pending list having a
+ * later creation time than other requests on that list, etc.) but that is fine
+ * since if the request never gets fulfilled, it will eventually be caught.
+ */
+void fuse_check_timeout(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct fuse_conn *fc = container_of(dwork, struct fuse_conn,
+ timeout.work);
+ struct fuse_iqueue *fiq = &fc->iq;
+ struct fuse_dev *fud;
+ struct fuse_pqueue *fpq;
+ bool expired = false;
+
+ if (!atomic_read(&fc->num_waiting))
+ goto out;
+
+ spin_lock(&fiq->lock);
+ expired = fuse_request_expired(fc, &fiq->pending);
+ spin_unlock(&fiq->lock);
+ if (expired)
+ goto abort_conn;
+
+ spin_lock(&fc->bg_lock);
+ expired = fuse_request_expired(fc, &fc->bg_queue);
+ spin_unlock(&fc->bg_lock);
+ if (expired)
+ goto abort_conn;
+
+ spin_lock(&fc->lock);
+ if (!fc->connected) {
+ spin_unlock(&fc->lock);
+ return;
+ }
+ list_for_each_entry(fud, &fc->devices, entry) {
+ fpq = &fud->pq;
+ spin_lock(&fpq->lock);
+ if (fuse_request_expired(fc, &fpq->io) ||
+ fuse_fpq_processing_expired(fc, fpq->processing)) {
+ spin_unlock(&fpq->lock);
+ spin_unlock(&fc->lock);
+ goto abort_conn;
+ }
+
+ spin_unlock(&fpq->lock);
+ }
+ spin_unlock(&fc->lock);
+
+ if (fuse_uring_request_expired(fc))
+ goto abort_conn;
+
+out:
+ queue_delayed_work(system_wq, &fc->timeout.work,
+ fuse_timeout_timer_freq);
+ return;
+
+abort_conn:
+ fuse_abort_conn(fc);
}
static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
@@ -48,6 +134,7 @@ static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
refcount_set(&req->count, 1);
__set_bit(FR_PENDING, &req->flags);
req->fm = fm;
+ req->create_time = jiffies;
}
static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags)
@@ -84,7 +171,8 @@ void fuse_set_initialized(struct fuse_conn *fc)
static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
{
- return !fc->initialized || (for_background && fc->blocked);
+ return !fc->initialized || (for_background && fc->blocked) ||
+ (fc->io_uring && fc->connected && !fuse_uring_ready(fc));
}
static void fuse_drop_waiting(struct fuse_conn *fc)
@@ -103,11 +191,17 @@ static void fuse_drop_waiting(struct fuse_conn *fc)
static void fuse_put_request(struct fuse_req *req);
-static struct fuse_req *fuse_get_req(struct fuse_mount *fm, bool for_background)
+static struct fuse_req *fuse_get_req(struct mnt_idmap *idmap,
+ struct fuse_mount *fm,
+ bool for_background)
{
struct fuse_conn *fc = fm->fc;
struct fuse_req *req;
+ bool no_idmap = !fm->sb || (fm->sb->s_iflags & SB_I_NOIDMAP);
+ kuid_t fsuid;
+ kgid_t fsgid;
int err;
+
atomic_inc(&fc->num_waiting);
if (fuse_block_alloc(fc, for_background)) {
@@ -135,19 +229,32 @@ static struct fuse_req *fuse_get_req(struct fuse_mount *fm, bool for_background)
goto out;
}
- req->in.h.uid = from_kuid(fc->user_ns, current_fsuid());
- req->in.h.gid = from_kgid(fc->user_ns, current_fsgid());
req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
__set_bit(FR_WAITING, &req->flags);
if (for_background)
__set_bit(FR_BACKGROUND, &req->flags);
- if (unlikely(req->in.h.uid == ((uid_t)-1) ||
- req->in.h.gid == ((gid_t)-1))) {
+ /*
+ * Keep the old behavior when idmappings support was not
+ * declared by a FUSE server.
+ *
+ * For those FUSE servers who support idmapped mounts,
+ * we send UID/GID only along with "inode creation"
+ * fuse requests, otherwise idmap == &invalid_mnt_idmap and
+ * req->in.h.{u,g}id will be equal to FUSE_INVALID_UIDGID.
+ */
+ fsuid = no_idmap ? current_fsuid() : mapped_fsuid(idmap, fc->user_ns);
+ fsgid = no_idmap ? current_fsgid() : mapped_fsgid(idmap, fc->user_ns);
+ req->in.h.uid = from_kuid(fc->user_ns, fsuid);
+ req->in.h.gid = from_kgid(fc->user_ns, fsgid);
+
+ if (no_idmap && unlikely(req->in.h.uid == ((uid_t)-1) ||
+ req->in.h.gid == ((gid_t)-1))) {
fuse_put_request(req);
return ERR_PTR(-EOVERFLOW);
}
+
return req;
out:
@@ -192,14 +299,25 @@ unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args)
}
EXPORT_SYMBOL_GPL(fuse_len_args);
-u64 fuse_get_unique(struct fuse_iqueue *fiq)
+static u64 fuse_get_unique_locked(struct fuse_iqueue *fiq)
{
fiq->reqctr += FUSE_REQ_ID_STEP;
return fiq->reqctr;
}
+
+u64 fuse_get_unique(struct fuse_iqueue *fiq)
+{
+ u64 ret;
+
+ spin_lock(&fiq->lock);
+ ret = fuse_get_unique_locked(fiq);
+ spin_unlock(&fiq->lock);
+
+ return ret;
+}
EXPORT_SYMBOL_GPL(fuse_get_unique);
-static unsigned int fuse_req_hash(u64 unique)
+unsigned int fuse_req_hash(u64 unique)
{
return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
}
@@ -215,22 +333,71 @@ __releases(fiq->lock)
spin_unlock(&fiq->lock);
}
+void fuse_dev_queue_forget(struct fuse_iqueue *fiq,
+ struct fuse_forget_link *forget)
+{
+ spin_lock(&fiq->lock);
+ if (fiq->connected) {
+ fiq->forget_list_tail->next = forget;
+ fiq->forget_list_tail = forget;
+ fuse_dev_wake_and_unlock(fiq);
+ } else {
+ kfree(forget);
+ spin_unlock(&fiq->lock);
+ }
+}
+
+void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
+{
+ spin_lock(&fiq->lock);
+ if (list_empty(&req->intr_entry)) {
+ list_add_tail(&req->intr_entry, &fiq->interrupts);
+ /*
+ * Pairs with smp_mb() implied by test_and_set_bit()
+ * from fuse_request_end().
+ */
+ smp_mb();
+ if (test_bit(FR_FINISHED, &req->flags)) {
+ list_del_init(&req->intr_entry);
+ spin_unlock(&fiq->lock);
+ } else {
+ fuse_dev_wake_and_unlock(fiq);
+ }
+ } else {
+ spin_unlock(&fiq->lock);
+ }
+}
+
+static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req)
+{
+ spin_lock(&fiq->lock);
+ if (fiq->connected) {
+ if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
+ req->in.h.unique = fuse_get_unique_locked(fiq);
+ list_add_tail(&req->list, &fiq->pending);
+ fuse_dev_wake_and_unlock(fiq);
+ } else {
+ spin_unlock(&fiq->lock);
+ req->out.h.error = -ENOTCONN;
+ clear_bit(FR_PENDING, &req->flags);
+ fuse_request_end(req);
+ }
+}
+
const struct fuse_iqueue_ops fuse_dev_fiq_ops = {
- .wake_forget_and_unlock = fuse_dev_wake_and_unlock,
- .wake_interrupt_and_unlock = fuse_dev_wake_and_unlock,
- .wake_pending_and_unlock = fuse_dev_wake_and_unlock,
+ .send_forget = fuse_dev_queue_forget,
+ .send_interrupt = fuse_dev_queue_interrupt,
+ .send_req = fuse_dev_queue_req,
};
EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops);
-static void queue_request_and_unlock(struct fuse_iqueue *fiq,
- struct fuse_req *req)
-__releases(fiq->lock)
+static void fuse_send_one(struct fuse_iqueue *fiq, struct fuse_req *req)
{
req->in.h.len = sizeof(struct fuse_in_header) +
fuse_len_args(req->args->in_numargs,
(struct fuse_arg *) req->args->in_args);
- list_add_tail(&req->list, &fiq->pending);
- fiq->ops->wake_pending_and_unlock(fiq);
+ trace_fuse_request_send(req);
+ fiq->ops->send_req(fiq, req);
}
void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
@@ -241,15 +408,7 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
forget->forget_one.nodeid = nodeid;
forget->forget_one.nlookup = nlookup;
- spin_lock(&fiq->lock);
- if (fiq->connected) {
- fiq->forget_list_tail->next = forget;
- fiq->forget_list_tail = forget;
- fiq->ops->wake_forget_and_unlock(fiq);
- } else {
- kfree(forget);
- spin_unlock(&fiq->lock);
- }
+ fiq->ops->send_forget(fiq, forget);
}
static void flush_bg_queue(struct fuse_conn *fc)
@@ -263,9 +422,7 @@ static void flush_bg_queue(struct fuse_conn *fc)
req = list_first_entry(&fc->bg_queue, struct fuse_req, list);
list_del(&req->list);
fc->active_background++;
- spin_lock(&fiq->lock);
- req->in.h.unique = fuse_get_unique(fiq);
- queue_request_and_unlock(fiq, req);
+ fuse_send_one(fiq, req);
}
}
@@ -286,6 +443,7 @@ void fuse_request_end(struct fuse_req *req)
if (test_and_set_bit(FR_FINISHED, &req->flags))
goto put_request;
+ trace_fuse_request_end(req);
/*
* test_and_set_bit() implies smp_mb() between bit
* changing and below FR_INTERRUPTED check. Pairs with
@@ -335,30 +493,31 @@ static int queue_interrupt(struct fuse_req *req)
{
struct fuse_iqueue *fiq = &req->fm->fc->iq;
- spin_lock(&fiq->lock);
/* Check for we've sent request to interrupt this req */
- if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) {
- spin_unlock(&fiq->lock);
+ if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags)))
return -EINVAL;
- }
- if (list_empty(&req->intr_entry)) {
- list_add_tail(&req->intr_entry, &fiq->interrupts);
+ fiq->ops->send_interrupt(fiq, req);
+
+ return 0;
+}
+
+bool fuse_remove_pending_req(struct fuse_req *req, spinlock_t *lock)
+{
+ spin_lock(lock);
+ if (test_bit(FR_PENDING, &req->flags)) {
/*
- * Pairs with smp_mb() implied by test_and_set_bit()
- * from fuse_request_end().
+ * FR_PENDING does not get cleared as the request will end
+ * up in destruction anyway.
*/
- smp_mb();
- if (test_bit(FR_FINISHED, &req->flags)) {
- list_del_init(&req->intr_entry);
- spin_unlock(&fiq->lock);
- return 0;
- }
- fiq->ops->wake_interrupt_and_unlock(fiq);
- } else {
- spin_unlock(&fiq->lock);
+ list_del(&req->list);
+ spin_unlock(lock);
+ __fuse_put_request(req);
+ req->out.h.error = -EINTR;
+ return true;
}
- return 0;
+ spin_unlock(lock);
+ return false;
}
static void request_wait_answer(struct fuse_req *req)
@@ -382,22 +541,20 @@ static void request_wait_answer(struct fuse_req *req)
}
if (!test_bit(FR_FORCE, &req->flags)) {
+ bool removed;
+
/* Only fatal signals may interrupt this */
err = wait_event_killable(req->waitq,
test_bit(FR_FINISHED, &req->flags));
if (!err)
return;
- spin_lock(&fiq->lock);
- /* Request is not yet in userspace, bail out */
- if (test_bit(FR_PENDING, &req->flags)) {
- list_del(&req->list);
- spin_unlock(&fiq->lock);
- __fuse_put_request(req);
- req->out.h.error = -EINTR;
+ if (test_bit(FR_URING, &req->flags))
+ removed = fuse_uring_remove_pending_req(req);
+ else
+ removed = fuse_remove_pending_req(req, &fiq->lock);
+ if (removed)
return;
- }
- spin_unlock(&fiq->lock);
}
/*
@@ -412,21 +569,15 @@ static void __fuse_request_send(struct fuse_req *req)
struct fuse_iqueue *fiq = &req->fm->fc->iq;
BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
- spin_lock(&fiq->lock);
- if (!fiq->connected) {
- spin_unlock(&fiq->lock);
- req->out.h.error = -ENOTCONN;
- } else {
- req->in.h.unique = fuse_get_unique(fiq);
- /* acquire extra reference, since request is still needed
- after fuse_request_end() */
- __fuse_get_request(req);
- queue_request_and_unlock(fiq, req);
- request_wait_answer(req);
- /* Pairs with smp_wmb() in fuse_request_end() */
- smp_rmb();
- }
+ /* acquire extra reference, since request is still needed after
+ fuse_request_end() */
+ __fuse_get_request(req);
+ fuse_send_one(fiq, req);
+
+ request_wait_answer(req);
+ /* Pairs with smp_wmb() in fuse_request_end() */
+ smp_rmb();
}
static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
@@ -466,8 +617,14 @@ static void fuse_force_creds(struct fuse_req *req)
{
struct fuse_conn *fc = req->fm->fc;
- req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
- req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
+ if (!req->fm->sb || req->fm->sb->s_iflags & SB_I_NOIDMAP) {
+ req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
+ req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
+ } else {
+ req->in.h.uid = FUSE_INVALID_UIDGID;
+ req->in.h.gid = FUSE_INVALID_UIDGID;
+ }
+
req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
}
@@ -482,7 +639,9 @@ static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args)
__set_bit(FR_ASYNC, &req->flags);
}
-ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args)
+ssize_t __fuse_simple_request(struct mnt_idmap *idmap,
+ struct fuse_mount *fm,
+ struct fuse_args *args)
{
struct fuse_conn *fc = fm->fc;
struct fuse_req *req;
@@ -499,7 +658,7 @@ ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args)
__set_bit(FR_FORCE, &req->flags);
} else {
WARN_ON(args->nocreds);
- req = fuse_get_req(fm, false);
+ req = fuse_get_req(idmap, fm, false);
if (IS_ERR(req))
return PTR_ERR(req);
}
@@ -521,7 +680,25 @@ ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args)
return ret;
}
-static bool fuse_request_queue_background(struct fuse_req *req)
+#ifdef CONFIG_FUSE_IO_URING
+static bool fuse_request_queue_background_uring(struct fuse_conn *fc,
+ struct fuse_req *req)
+{
+ struct fuse_iqueue *fiq = &fc->iq;
+
+ req->in.h.unique = fuse_get_unique(fiq);
+ req->in.h.len = sizeof(struct fuse_in_header) +
+ fuse_len_args(req->args->in_numargs,
+ (struct fuse_arg *) req->args->in_args);
+
+ return fuse_uring_queue_bq_req(req);
+}
+#endif
+
+/*
+ * @return true if queued
+ */
+static int fuse_request_queue_background(struct fuse_req *req)
{
struct fuse_mount *fm = req->fm;
struct fuse_conn *fc = fm->fc;
@@ -533,6 +710,12 @@ static bool fuse_request_queue_background(struct fuse_req *req)
atomic_inc(&fc->num_waiting);
}
__set_bit(FR_ISREPLY, &req->flags);
+
+#ifdef CONFIG_FUSE_IO_URING
+ if (fuse_uring_ready(fc))
+ return fuse_request_queue_background_uring(fc, req);
+#endif
+
spin_lock(&fc->bg_lock);
if (likely(fc->connected)) {
fc->num_background++;
@@ -560,7 +743,7 @@ int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args,
__set_bit(FR_BACKGROUND, &req->flags);
} else {
WARN_ON(args->nocreds);
- req = fuse_get_req(fm, true);
+ req = fuse_get_req(&invalid_mnt_idmap, fm, true);
if (IS_ERR(req))
return PTR_ERR(req);
}
@@ -581,9 +764,8 @@ static int fuse_simple_notify_reply(struct fuse_mount *fm,
{
struct fuse_req *req;
struct fuse_iqueue *fiq = &fm->fc->iq;
- int err = 0;
- req = fuse_get_req(fm, false);
+ req = fuse_get_req(&invalid_mnt_idmap, fm, false);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -592,16 +774,9 @@ static int fuse_simple_notify_reply(struct fuse_mount *fm,
fuse_args_to_req(req, args);
- spin_lock(&fiq->lock);
- if (fiq->connected) {
- queue_request_and_unlock(fiq, req);
- } else {
- err = -ENODEV;
- spin_unlock(&fiq->lock);
- fuse_put_request(req);
- }
+ fuse_send_one(fiq, req);
- return err;
+ return 0;
}
/*
@@ -641,22 +816,8 @@ static int unlock_request(struct fuse_req *req)
return err;
}
-struct fuse_copy_state {
- int write;
- struct fuse_req *req;
- struct iov_iter *iter;
- struct pipe_buffer *pipebufs;
- struct pipe_buffer *currbuf;
- struct pipe_inode_info *pipe;
- unsigned long nr_segs;
- struct page *pg;
- unsigned len;
- unsigned offset;
- unsigned move_pages:1;
-};
-
-static void fuse_copy_init(struct fuse_copy_state *cs, int write,
- struct iov_iter *iter)
+void fuse_copy_init(struct fuse_copy_state *cs, int write,
+ struct iov_iter *iter)
{
memset(cs, 0, sizeof(*cs));
cs->write = write;
@@ -763,6 +924,9 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
*size -= ncpy;
cs->len -= ncpy;
cs->offset += ncpy;
+ if (cs->is_uring)
+ cs->ring.copied_sz += ncpy;
+
return ncpy;
}
@@ -773,7 +937,6 @@ static int fuse_check_folio(struct folio *folio)
(folio->flags & PAGE_FLAGS_CHECK_AT_PREP &
~(1 << PG_locked |
1 << PG_referenced |
- 1 << PG_uptodate |
1 << PG_lru |
1 << PG_active |
1 << PG_workingset |
@@ -786,6 +949,12 @@ static int fuse_check_folio(struct folio *folio)
return 0;
}
+/*
+ * Attempt to steal a page from the splice() pipe and move it into the
+ * pagecache. If successful, the pointer in @pagep will be updated. The
+ * folio that was originally in @pagep will lose a reference and the new
+ * folio returned in @pagep will carry a reference.
+ */
static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
{
int err;
@@ -818,9 +987,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
newfolio = page_folio(buf->page);
- if (!folio_test_uptodate(newfolio))
- folio_mark_uptodate(newfolio);
-
+ folio_clear_uptodate(newfolio);
folio_clear_mappedtodisk(newfolio);
if (fuse_check_folio(newfolio) != 0)
@@ -980,17 +1147,27 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
struct fuse_req *req = cs->req;
struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
-
- for (i = 0; i < ap->num_pages && (nbytes || zeroing); i++) {
+ for (i = 0; i < ap->num_folios && (nbytes || zeroing); i++) {
int err;
unsigned int offset = ap->descs[i].offset;
unsigned int count = min(nbytes, ap->descs[i].length);
+ struct page *orig, *pagep;
- err = fuse_copy_page(cs, &ap->pages[i], offset, count, zeroing);
+ orig = pagep = &ap->folios[i]->page;
+
+ err = fuse_copy_page(cs, &pagep, offset, count, zeroing);
if (err)
return err;
nbytes -= count;
+
+ /*
+ * fuse_copy_page may have moved a page from a pipe instead of
+ * copying into our given page, so update the folios if it was
+ * replaced.
+ */
+ if (pagep != orig)
+ ap->folios[i] = page_folio(pagep);
}
return 0;
}
@@ -1010,9 +1187,9 @@ static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
}
/* Copy request arguments to/from userspace buffer */
-static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
- unsigned argpages, struct fuse_arg *args,
- int zeroing)
+int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
+ unsigned argpages, struct fuse_arg *args,
+ int zeroing)
{
int err = 0;
unsigned i;
@@ -1076,9 +1253,9 @@ __releases(fiq->lock)
return err ? err : reqsize;
}
-struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
- unsigned int max,
- unsigned int *countp)
+static struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
+ unsigned int max,
+ unsigned int *countp)
{
struct fuse_forget_link *head = fiq->forget_list_head.next;
struct fuse_forget_link **newhead = &head;
@@ -1097,7 +1274,6 @@ struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
return head;
}
-EXPORT_SYMBOL(fuse_dequeue_forget);
static int fuse_read_single_forget(struct fuse_iqueue *fiq,
struct fuse_copy_state *cs,
@@ -1112,7 +1288,7 @@ __releases(fiq->lock)
struct fuse_in_header ih = {
.opcode = FUSE_FORGET,
.nodeid = forget->forget_one.nodeid,
- .unique = fuse_get_unique(fiq),
+ .unique = fuse_get_unique_locked(fiq),
.len = sizeof(ih) + sizeof(arg),
};
@@ -1143,7 +1319,7 @@ __releases(fiq->lock)
struct fuse_batch_forget_in arg = { .count = 0 };
struct fuse_in_header ih = {
.opcode = FUSE_BATCH_FORGET,
- .unique = fuse_get_unique(fiq),
+ .unique = fuse_get_unique_locked(fiq),
.len = sizeof(ih) + sizeof(arg),
};
@@ -1392,7 +1568,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
if (ret < 0)
goto out;
- if (pipe_occupancy(pipe->head, pipe->tail) + cs.nr_segs > pipe->max_usage) {
+ if (pipe_buf_usage(pipe) + cs.nr_segs > pipe->max_usage) {
ret = -EIO;
goto out;
}
@@ -1468,14 +1644,10 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs)
{
struct fuse_notify_inval_entry_out outarg;
- int err = -ENOMEM;
- char *buf;
+ int err;
+ char *buf = NULL;
struct qstr name;
- buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
- if (!buf)
- goto err;
-
err = -EINVAL;
if (size < sizeof(outarg))
goto err;
@@ -1485,13 +1657,18 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
goto err;
err = -ENAMETOOLONG;
- if (outarg.namelen > FUSE_NAME_MAX)
+ if (outarg.namelen > fc->name_max)
goto err;
err = -EINVAL;
if (size != sizeof(outarg) + outarg.namelen + 1)
goto err;
+ err = -ENOMEM;
+ buf = kzalloc(outarg.namelen + 1, GFP_KERNEL);
+ if (!buf)
+ goto err;
+
name.name = buf;
name.len = outarg.namelen;
err = fuse_copy_one(cs, buf, outarg.namelen + 1);
@@ -1516,14 +1693,10 @@ static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs)
{
struct fuse_notify_delete_out outarg;
- int err = -ENOMEM;
- char *buf;
+ int err;
+ char *buf = NULL;
struct qstr name;
- buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
- if (!buf)
- goto err;
-
err = -EINVAL;
if (size < sizeof(outarg))
goto err;
@@ -1533,13 +1706,18 @@ static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
goto err;
err = -ENAMETOOLONG;
- if (outarg.namelen > FUSE_NAME_MAX)
+ if (outarg.namelen > fc->name_max)
goto err;
err = -EINVAL;
if (size != sizeof(outarg) + outarg.namelen + 1)
goto err;
+ err = -ENOMEM;
+ buf = kzalloc(outarg.namelen + 1, GFP_KERNEL);
+ if (!buf)
+ goto err;
+
name.name = buf;
name.len = outarg.namelen;
err = fuse_copy_one(cs, buf, outarg.namelen + 1);
@@ -1607,22 +1785,25 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
num = outarg.size;
while (num) {
+ struct folio *folio;
struct page *page;
unsigned int this_num;
- err = -ENOMEM;
- page = find_or_create_page(mapping, index,
- mapping_gfp_mask(mapping));
- if (!page)
+ folio = filemap_grab_folio(mapping, index);
+ err = PTR_ERR(folio);
+ if (IS_ERR(folio))
goto out_iput;
- this_num = min_t(unsigned, num, PAGE_SIZE - offset);
+ page = &folio->page;
+ this_num = min_t(unsigned, num, folio_size(folio) - offset);
err = fuse_copy_page(cs, &page, offset, this_num, 0);
- if (!err && offset == 0 &&
- (this_num == PAGE_SIZE || file_size == end))
- SetPageUptodate(page);
- unlock_page(page);
- put_page(page);
+ if (!folio_test_uptodate(folio) && !err && offset == 0 &&
+ (this_num == folio_size(folio) || file_size == end)) {
+ folio_zero_segment(folio, this_num, folio_size(folio));
+ folio_mark_uptodate(folio);
+ }
+ folio_unlock(folio);
+ folio_put(folio);
if (err)
goto out_iput;
@@ -1654,7 +1835,7 @@ static void fuse_retrieve_end(struct fuse_mount *fm, struct fuse_args *args,
struct fuse_retrieve_args *ra =
container_of(args, typeof(*ra), ap.args);
- release_pages(ra->ap.pages, ra->ap.num_pages);
+ release_pages(ra->ap.folios, ra->ap.num_folios);
kfree(ra);
}
@@ -1668,7 +1849,7 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
unsigned int num;
unsigned int offset;
size_t total_len = 0;
- unsigned int num_pages;
+ unsigned int num_pages, cur_pages = 0;
struct fuse_conn *fc = fm->fc;
struct fuse_retrieve_args *ra;
size_t args_size = sizeof(*ra);
@@ -1687,38 +1868,39 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
num_pages = min(num_pages, fc->max_pages);
- args_size += num_pages * (sizeof(ap->pages[0]) + sizeof(ap->descs[0]));
+ args_size += num_pages * (sizeof(ap->folios[0]) + sizeof(ap->descs[0]));
ra = kzalloc(args_size, GFP_KERNEL);
if (!ra)
return -ENOMEM;
ap = &ra->ap;
- ap->pages = (void *) (ra + 1);
- ap->descs = (void *) (ap->pages + num_pages);
+ ap->folios = (void *) (ra + 1);
+ ap->descs = (void *) (ap->folios + num_pages);
args = &ap->args;
args->nodeid = outarg->nodeid;
args->opcode = FUSE_NOTIFY_REPLY;
- args->in_numargs = 2;
+ args->in_numargs = 3;
args->in_pages = true;
args->end = fuse_retrieve_end;
index = outarg->offset >> PAGE_SHIFT;
- while (num && ap->num_pages < num_pages) {
- struct page *page;
+ while (num && cur_pages < num_pages) {
+ struct folio *folio;
unsigned int this_num;
- page = find_get_page(mapping, index);
- if (!page)
+ folio = filemap_get_folio(mapping, index);
+ if (IS_ERR(folio))
break;
this_num = min_t(unsigned, num, PAGE_SIZE - offset);
- ap->pages[ap->num_pages] = page;
- ap->descs[ap->num_pages].offset = offset;
- ap->descs[ap->num_pages].length = this_num;
- ap->num_pages++;
+ ap->folios[ap->num_folios] = folio;
+ ap->descs[ap->num_folios].offset = offset;
+ ap->descs[ap->num_folios].length = this_num;
+ ap->num_folios++;
+ cur_pages++;
offset = 0;
num -= this_num;
@@ -1727,9 +1909,10 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
}
ra->inarg.offset = outarg->offset;
ra->inarg.size = total_len;
- args->in_args[0].size = sizeof(ra->inarg);
- args->in_args[0].value = &ra->inarg;
- args->in_args[1].size = total_len;
+ fuse_set_zero_arg0(args);
+ args->in_args[1].size = sizeof(ra->inarg);
+ args->in_args[1].value = &ra->inarg;
+ args->in_args[2].size = total_len;
err = fuse_simple_notify_reply(fm, args, outarg->notify_unique);
if (err)
@@ -1813,15 +1996,23 @@ static void fuse_resend(struct fuse_conn *fc)
spin_unlock(&fc->lock);
list_for_each_entry_safe(req, next, &to_queue, list) {
- __set_bit(FR_PENDING, &req->flags);
+ set_bit(FR_PENDING, &req->flags);
+ clear_bit(FR_SENT, &req->flags);
/* mark the request as resend request */
req->in.h.unique |= FUSE_UNIQUE_RESEND;
}
spin_lock(&fiq->lock);
+ if (!fiq->connected) {
+ spin_unlock(&fiq->lock);
+ list_for_each_entry(req, &to_queue, list)
+ clear_bit(FR_PENDING, &req->flags);
+ fuse_dev_end_requests(&to_queue);
+ return;
+ }
/* iq and pq requests are both oldest to newest */
list_splice(&to_queue, &fiq->pending);
- fiq->ops->wake_pending_and_unlock(fiq);
+ fuse_dev_wake_and_unlock(fiq);
}
static int fuse_notify_resend(struct fuse_conn *fc)
@@ -1865,7 +2056,7 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
}
/* Look up request on processing list by unique ID */
-static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
+struct fuse_req *fuse_request_find(struct fuse_pqueue *fpq, u64 unique)
{
unsigned int hash = fuse_req_hash(unique);
struct fuse_req *req;
@@ -1877,10 +2068,17 @@ static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
return NULL;
}
-static int copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args,
- unsigned nbytes)
+int fuse_copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args,
+ unsigned nbytes)
{
- unsigned reqsize = sizeof(struct fuse_out_header);
+
+ unsigned int reqsize = 0;
+
+ /*
+ * Uring has all headers separated from args - args is payload only
+ */
+ if (!cs->is_uring)
+ reqsize = sizeof(struct fuse_out_header);
reqsize += fuse_len_args(args->out_numargs, args->out_args);
@@ -1942,7 +2140,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
spin_lock(&fpq->lock);
req = NULL;
if (fpq->connected)
- req = request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT);
+ req = fuse_request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT);
err = -ENOENT;
if (!req) {
@@ -1980,7 +2178,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
if (oh.error)
err = nbytes != sizeof(oh) ? -EINVAL : 0;
else
- err = copy_out_args(cs, req->args, nbytes);
+ err = fuse_copy_out_args(cs, req->args, nbytes);
fuse_copy_finish(cs);
spin_lock(&fpq->lock);
@@ -2022,7 +2220,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
struct file *out, loff_t *ppos,
size_t len, unsigned int flags)
{
- unsigned int head, tail, mask, count;
+ unsigned int head, tail, count;
unsigned nbuf;
unsigned idx;
struct pipe_buffer *bufs;
@@ -2039,8 +2237,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
head = pipe->head;
tail = pipe->tail;
- mask = pipe->ring_size - 1;
- count = head - tail;
+ count = pipe_occupancy(head, tail);
bufs = kvmalloc_array(count, sizeof(struct pipe_buffer), GFP_KERNEL);
if (!bufs) {
@@ -2050,8 +2247,8 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
nbuf = 0;
rem = 0;
- for (idx = tail; idx != head && rem < len; idx++)
- rem += pipe->bufs[idx & mask].len;
+ for (idx = tail; !pipe_empty(head, idx) && rem < len; idx++)
+ rem += pipe_buf(pipe, idx)->len;
ret = -EINVAL;
if (rem < len)
@@ -2062,10 +2259,10 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
struct pipe_buffer *ibuf;
struct pipe_buffer *obuf;
- if (WARN_ON(nbuf >= count || tail == head))
+ if (WARN_ON(nbuf >= count || pipe_empty(head, tail)))
goto out_free;
- ibuf = &pipe->bufs[tail & mask];
+ ibuf = pipe_buf(pipe, tail);
obuf = &bufs[nbuf];
if (rem >= ibuf->len) {
@@ -2135,7 +2332,7 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
}
/* Abort all requests on the given list (pending or processing) */
-static void end_requests(struct list_head *head)
+void fuse_dev_end_requests(struct list_head *head)
{
while (!list_empty(head)) {
struct fuse_req *req;
@@ -2191,6 +2388,9 @@ void fuse_abort_conn(struct fuse_conn *fc)
LIST_HEAD(to_end);
unsigned int i;
+ if (fc->timeout.req_timeout)
+ cancel_delayed_work(&fc->timeout.work);
+
/* Background queuing checks fc->connected under bg_lock */
spin_lock(&fc->bg_lock);
fc->connected = 0;
@@ -2238,7 +2438,13 @@ void fuse_abort_conn(struct fuse_conn *fc)
wake_up_all(&fc->blocked_waitq);
spin_unlock(&fc->lock);
- end_requests(&to_end);
+ fuse_dev_end_requests(&to_end);
+
+ /*
+ * fc->lock must not be taken to avoid conflicts with io-uring
+ * locks
+ */
+ fuse_uring_abort(fc);
} else {
spin_unlock(&fc->lock);
}
@@ -2250,6 +2456,8 @@ void fuse_wait_aborted(struct fuse_conn *fc)
/* matches implicit memory barrier in fuse_drop_waiting() */
smp_mb();
wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0);
+
+ fuse_uring_wait_stopped_queues(fc);
}
int fuse_dev_release(struct inode *inode, struct file *file)
@@ -2268,7 +2476,7 @@ int fuse_dev_release(struct inode *inode, struct file *file)
list_splice_init(&fpq->processing[i], &to_end);
spin_unlock(&fpq->lock);
- end_requests(&to_end);
+ fuse_dev_end_requests(&to_end);
/* Are we the last open device? */
if (atomic_dec_and_test(&fc->dev_count)) {
@@ -2314,21 +2522,20 @@ static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp)
int res;
int oldfd;
struct fuse_dev *fud = NULL;
- struct fd f;
if (get_user(oldfd, argp))
return -EFAULT;
- f = fdget(oldfd);
- if (!f.file)
+ CLASS(fd, f)(oldfd);
+ if (fd_empty(f))
return -EINVAL;
/*
* Check against file->f_op because CUSE
* uses the same ioctl handler.
*/
- if (f.file->f_op == file->f_op)
- fud = fuse_get_dev(f.file);
+ if (fd_file(f)->f_op == file->f_op)
+ fud = fuse_get_dev(fd_file(f));
res = -EINVAL;
if (fud) {
@@ -2337,7 +2544,6 @@ static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp)
mutex_unlock(&fuse_mutex);
}
- fdput(f);
return res;
}
@@ -2399,7 +2605,6 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
const struct file_operations fuse_dev_operations = {
.owner = THIS_MODULE,
.open = fuse_dev_open,
- .llseek = no_llseek,
.read_iter = fuse_dev_read,
.splice_read = fuse_dev_splice_read,
.write_iter = fuse_dev_write,
@@ -2409,6 +2614,9 @@ const struct file_operations fuse_dev_operations = {
.fasync = fuse_dev_fasync,
.unlocked_ioctl = fuse_dev_ioctl,
.compat_ioctl = compat_ptr_ioctl,
+#ifdef CONFIG_FUSE_IO_URING
+ .uring_cmd = fuse_uring_cmd,
+#endif
};
EXPORT_SYMBOL_GPL(fuse_dev_operations);