summaryrefslogtreecommitdiff
path: root/fs/fuse/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/fuse/file.c')
-rw-r--r--fs/fuse/file.c1860
1 files changed, 904 insertions, 956 deletions
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index bc4115288eec..01bc894e9c2b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -19,7 +19,9 @@
#include <linux/uio.h>
#include <linux/fs.h>
#include <linux/filelock.h>
-#include <linux/file.h>
+#include <linux/splice.h>
+#include <linux/task_io_accounting_ops.h>
+#include <linux/iomap.h>
static int fuse_send_open(struct fuse_mount *fm, u64 nodeid,
unsigned int open_flags, int opcode,
@@ -50,13 +52,7 @@ static int fuse_send_open(struct fuse_mount *fm, u64 nodeid,
return fuse_simple_request(fm, &args);
}
-struct fuse_release_args {
- struct fuse_args args;
- struct fuse_release_in inarg;
- struct inode *inode;
-};
-
-struct fuse_file *fuse_file_alloc(struct fuse_mount *fm)
+struct fuse_file *fuse_file_alloc(struct fuse_mount *fm, bool release)
{
struct fuse_file *ff;
@@ -65,15 +61,15 @@ struct fuse_file *fuse_file_alloc(struct fuse_mount *fm)
return NULL;
ff->fm = fm;
- ff->release_args = kzalloc(sizeof(*ff->release_args),
- GFP_KERNEL_ACCOUNT);
- if (!ff->release_args) {
- kfree(ff);
- return NULL;
+ if (release) {
+ ff->args = kzalloc(sizeof(*ff->args), GFP_KERNEL_ACCOUNT);
+ if (!ff->args) {
+ kfree(ff);
+ return NULL;
+ }
}
INIT_LIST_HEAD(&ff->write_entry);
- mutex_init(&ff->readdir.lock);
refcount_set(&ff->count, 1);
RB_CLEAR_NODE(&ff->polled_node);
init_waitqueue_head(&ff->poll_wait);
@@ -85,8 +81,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_mount *fm)
void fuse_file_free(struct fuse_file *ff)
{
- kfree(ff->release_args);
- mutex_destroy(&ff->readdir.lock);
+ kfree(ff->args);
kfree(ff);
}
@@ -105,13 +100,18 @@ static void fuse_release_end(struct fuse_mount *fm, struct fuse_args *args,
kfree(ra);
}
-static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
+static void fuse_file_put(struct fuse_file *ff, bool sync)
{
if (refcount_dec_and_test(&ff->count)) {
- struct fuse_args *args = &ff->release_args->args;
+ struct fuse_release_args *ra = &ff->args->release_args;
+ struct fuse_args *args = (ra ? &ra->args : NULL);
+
+ if (ra && ra->inode)
+ fuse_file_io_release(ff, ra->inode);
- if (isdir ? ff->fm->fc->no_opendir : ff->fm->fc->no_open) {
- /* Do nothing when client does not implement 'open' */
+ if (!args) {
+ /* Do nothing when server does not implement 'opendir' */
+ } else if (args->opcode == FUSE_RELEASE && ff->fm->fc->no_open) {
fuse_release_end(ff->fm, args, 0);
} else if (sync) {
fuse_simple_request(ff->fm, args);
@@ -132,31 +132,45 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
struct fuse_conn *fc = fm->fc;
struct fuse_file *ff;
int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
+ bool open = isdir ? !fc->no_opendir : !fc->no_open;
+ bool release = !isdir || open;
- ff = fuse_file_alloc(fm);
+ /*
+ * ff->args->release_args still needs to be allocated (so we can hold an
+ * inode reference while there are pending inflight file operations when
+ * ->release() is called, see fuse_prepare_release()) even if
+ * fc->no_open is set else it becomes possible for reclaim to deadlock
+ * if while servicing the readahead request the server triggers reclaim
+ * and reclaim evicts the inode of the file being read ahead.
+ */
+ ff = fuse_file_alloc(fm, release);
if (!ff)
return ERR_PTR(-ENOMEM);
ff->fh = 0;
/* Default for no-open */
ff->open_flags = FOPEN_KEEP_CACHE | (isdir ? FOPEN_CACHE_DIR : 0);
- if (isdir ? !fc->no_opendir : !fc->no_open) {
- struct fuse_open_out outarg;
+ if (open) {
+ /* Store outarg for fuse_finish_open() */
+ struct fuse_open_out *outargp = &ff->args->open_outarg;
int err;
- err = fuse_send_open(fm, nodeid, open_flags, opcode, &outarg);
+ err = fuse_send_open(fm, nodeid, open_flags, opcode, outargp);
if (!err) {
- ff->fh = outarg.fh;
- ff->open_flags = outarg.open_flags;
-
+ ff->fh = outargp->fh;
+ ff->open_flags = outargp->open_flags;
} else if (err != -ENOSYS) {
fuse_file_free(ff);
return ERR_PTR(err);
} else {
- if (isdir)
+ if (isdir) {
+ /* No release needed */
+ kfree(ff->args);
+ ff->args = NULL;
fc->no_opendir = 1;
- else
+ } else {
fc->no_open = 1;
+ }
}
}
@@ -195,40 +209,50 @@ static void fuse_link_write_file(struct file *file)
spin_unlock(&fi->lock);
}
-void fuse_finish_open(struct inode *inode, struct file *file)
+int fuse_finish_open(struct inode *inode, struct file *file)
{
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = get_fuse_conn(inode);
+ int err;
+
+ err = fuse_file_io_open(file, inode);
+ if (err)
+ return err;
if (ff->open_flags & FOPEN_STREAM)
stream_open(inode, file);
else if (ff->open_flags & FOPEN_NONSEEKABLE)
nonseekable_open(inode, file);
- if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
- struct fuse_inode *fi = get_fuse_inode(inode);
-
- spin_lock(&fi->lock);
- fi->attr_version = atomic64_inc_return(&fc->attr_version);
- i_size_write(inode, 0);
- spin_unlock(&fi->lock);
- file_update_time(file);
- fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
- }
if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
fuse_link_write_file(file);
+
+ return 0;
}
-int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
+static void fuse_truncate_update_attr(struct inode *inode, struct file *file)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fi->lock);
+ fi->attr_version = atomic64_inc_return(&fc->attr_version);
+ i_size_write(inode, 0);
+ spin_unlock(&fi->lock);
+ file_update_time(file);
+ fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
+}
+
+static int fuse_open(struct inode *inode, struct file *file)
{
struct fuse_mount *fm = get_fuse_mount(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = fm->fc;
+ struct fuse_file *ff;
int err;
- bool is_wb_truncate = (file->f_flags & O_TRUNC) &&
- fc->atomic_o_trunc &&
- fc->writeback_cache;
- bool dax_truncate = (file->f_flags & O_TRUNC) &&
- fc->atomic_o_trunc && FUSE_IS_DAX(inode);
+ bool is_truncate = (file->f_flags & O_TRUNC) && fc->atomic_o_trunc;
+ bool is_wb_truncate = is_truncate && fc->writeback_cache;
+ bool dax_truncate = is_truncate && FUSE_IS_DAX(inode);
if (fuse_is_bad(inode))
return -EIO;
@@ -242,7 +266,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
if (dax_truncate) {
filemap_invalidate_lock(inode->i_mapping);
- err = fuse_dax_break_layouts(inode, 0, 0);
+ err = fuse_dax_break_layouts(inode, 0, -1);
if (err)
goto out_inode_unlock;
}
@@ -250,16 +274,20 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
if (is_wb_truncate || dax_truncate)
fuse_set_nowrite(inode);
- err = fuse_do_open(fm, get_node_id(inode), file, isdir);
- if (!err)
- fuse_finish_open(inode, file);
+ err = fuse_do_open(fm, get_node_id(inode), file, false);
+ if (!err) {
+ ff = file->private_data;
+ err = fuse_finish_open(inode, file);
+ if (err)
+ fuse_sync_release(fi, ff, file->f_flags);
+ else if (is_truncate)
+ fuse_truncate_update_attr(inode, file);
+ }
if (is_wb_truncate || dax_truncate)
fuse_release_nowrite(inode);
if (!err) {
- struct fuse_file *ff = file->private_data;
-
- if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC))
+ if (is_truncate)
truncate_pagecache(inode, 0);
else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
invalidate_inode_pages2(inode->i_mapping);
@@ -274,10 +302,13 @@ out_inode_unlock:
}
static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
- unsigned int flags, int opcode)
+ unsigned int flags, int opcode, bool sync)
{
struct fuse_conn *fc = ff->fm->fc;
- struct fuse_release_args *ra = ff->release_args;
+ struct fuse_release_args *ra = &ff->args->release_args;
+
+ if (fuse_file_passthrough(ff))
+ fuse_passthrough_release(ff, fuse_inode_backing(fi));
/* Inode is NULL on error path of fuse_create_open() */
if (likely(fi)) {
@@ -292,6 +323,11 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
wake_up_interruptible_all(&ff->poll_wait);
+ if (!ra)
+ return;
+
+ /* ff->args was used for open outarg */
+ memset(ff->args, 0, sizeof(*ff->args));
ra->inarg.fh = ff->fh;
ra->inarg.flags = flags;
ra->args.in_numargs = 1;
@@ -301,23 +337,28 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
ra->args.nodeid = ff->nodeid;
ra->args.force = true;
ra->args.nocreds = true;
+
+ /*
+ * Hold inode until release is finished.
+ * From fuse_sync_release() the refcount is 1 and everything's
+ * synchronous, so we are fine with not doing igrab() here.
+ */
+ ra->inode = sync ? NULL : igrab(&fi->inode);
}
void fuse_file_release(struct inode *inode, struct fuse_file *ff,
unsigned int open_flags, fl_owner_t id, bool isdir)
{
struct fuse_inode *fi = get_fuse_inode(inode);
- struct fuse_release_args *ra = ff->release_args;
+ struct fuse_release_args *ra = &ff->args->release_args;
int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
- fuse_prepare_release(fi, ff, open_flags, opcode);
+ fuse_prepare_release(fi, ff, open_flags, opcode, false);
- if (ff->flock) {
+ if (ra && ff->flock) {
ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
ra->inarg.lock_owner = fuse_lock_owner_id(ff->fm->fc, id);
}
- /* Hold inode until release is finished */
- ra->inode = igrab(inode);
/*
* Normally this will send the RELEASE request, however if
@@ -327,8 +368,14 @@ void fuse_file_release(struct inode *inode, struct fuse_file *ff,
* Make the release synchronous if this is a fuseblk mount,
* synchronous RELEASE is allowed (and desirable) in this case
* because the server can be trusted not to screw up.
+ *
+ * Always use the asynchronous file put because the current thread
+ * might be the fuse server. This can happen if a process starts some
+ * aio and closes the fd before the aio completes. Since aio takes its
+ * own ref to the file, the IO completion has to drop the ref, which is
+ * how the fuse server can end up closing its clients' files.
*/
- fuse_file_put(ff, ff->fm->fc->destroy, isdir);
+ fuse_file_put(ff, false);
}
void fuse_release_common(struct file *file, bool isdir)
@@ -337,11 +384,6 @@ void fuse_release_common(struct file *file, bool isdir)
(fl_owner_t) file, isdir);
}
-static int fuse_open(struct inode *inode, struct file *file)
-{
- return fuse_open_common(inode, file, false);
-}
-
static int fuse_release(struct inode *inode, struct file *file)
{
struct fuse_conn *fc = get_fuse_conn(inode);
@@ -363,12 +405,8 @@ void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff,
unsigned int flags)
{
WARN_ON(refcount_read(&ff->count) > 1);
- fuse_prepare_release(fi, ff, flags, FUSE_RELEASE);
- /*
- * iput(NULL) is a no-op and since the refcount is 1 and everything's
- * synchronous, we are fine with not doing igrab() here"
- */
- fuse_file_put(ff, true, false);
+ fuse_prepare_release(fi, ff, flags, FUSE_RELEASE, true);
+ fuse_file_put(ff, true);
}
EXPORT_SYMBOL_GPL(fuse_sync_release);
@@ -396,74 +434,11 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
struct fuse_writepage_args {
struct fuse_io_args ia;
- struct rb_node writepages_entry;
struct list_head queue_entry;
- struct fuse_writepage_args *next;
struct inode *inode;
struct fuse_sync_bucket *bucket;
};
-static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
- pgoff_t idx_from, pgoff_t idx_to)
-{
- struct rb_node *n;
-
- n = fi->writepages.rb_node;
-
- while (n) {
- struct fuse_writepage_args *wpa;
- pgoff_t curr_index;
-
- wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry);
- WARN_ON(get_fuse_inode(wpa->inode) != fi);
- curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT;
- if (idx_from >= curr_index + wpa->ia.ap.num_pages)
- n = n->rb_right;
- else if (idx_to < curr_index)
- n = n->rb_left;
- else
- return wpa;
- }
- return NULL;
-}
-
-/*
- * Check if any page in a range is under writeback
- *
- * This is currently done by walking the list of writepage requests
- * for the inode, which can be pretty inefficient.
- */
-static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
- pgoff_t idx_to)
-{
- struct fuse_inode *fi = get_fuse_inode(inode);
- bool found;
-
- spin_lock(&fi->lock);
- found = fuse_find_writeback(fi, idx_from, idx_to);
- spin_unlock(&fi->lock);
-
- return found;
-}
-
-static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
-{
- return fuse_range_is_writeback(inode, index, index);
-}
-
-/*
- * Wait for page writeback to be completed.
- *
- * Since fuse doesn't rely on the VM writeback tracking, this has to
- * use some other means.
- */
-static void fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
-{
- struct fuse_inode *fi = get_fuse_inode(inode);
-
- wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
-}
-
/*
* Wait for all pending writepages on the inode to finish.
*
@@ -479,36 +454,44 @@ static void fuse_sync_writes(struct inode *inode)
fuse_release_nowrite(inode);
}
-struct fuse_flush_args {
- struct fuse_args args;
- struct fuse_flush_in inarg;
- struct work_struct work;
- struct file *file;
-};
-
-static int fuse_do_flush(struct fuse_flush_args *fa)
+static int fuse_flush(struct file *file, fl_owner_t id)
{
- int err;
- struct inode *inode = file_inode(fa->file);
+ struct inode *inode = file_inode(file);
struct fuse_mount *fm = get_fuse_mount(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_flush_in inarg;
+ FUSE_ARGS(args);
+ int err;
+
+ if (fuse_is_bad(inode))
+ return -EIO;
+
+ if (ff->open_flags & FOPEN_NOFLUSH && !fm->fc->writeback_cache)
+ return 0;
err = write_inode_now(inode, 1);
if (err)
- goto out;
-
- inode_lock(inode);
- fuse_sync_writes(inode);
- inode_unlock(inode);
+ return err;
- err = filemap_check_errors(fa->file->f_mapping);
+ err = filemap_check_errors(file->f_mapping);
if (err)
- goto out;
+ return err;
err = 0;
if (fm->fc->no_flush)
goto inval_attr_out;
- err = fuse_simple_request(fm, &fa->args);
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.fh = ff->fh;
+ inarg.lock_owner = fuse_lock_owner_id(fm->fc, id);
+ args.opcode = FUSE_FLUSH;
+ args.nodeid = get_node_id(inode);
+ args.in_numargs = 1;
+ args.in_args[0].size = sizeof(inarg);
+ args.in_args[0].value = &inarg;
+ args.force = true;
+
+ err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) {
fm->fc->no_flush = 1;
err = 0;
@@ -521,57 +504,9 @@ inval_attr_out:
*/
if (!err && fm->fc->writeback_cache)
fuse_invalidate_attr_mask(inode, STATX_BLOCKS);
-
-out:
- fput(fa->file);
- kfree(fa);
return err;
}
-static void fuse_flush_async(struct work_struct *work)
-{
- struct fuse_flush_args *fa = container_of(work, typeof(*fa), work);
-
- fuse_do_flush(fa);
-}
-
-static int fuse_flush(struct file *file, fl_owner_t id)
-{
- struct fuse_flush_args *fa;
- struct inode *inode = file_inode(file);
- struct fuse_mount *fm = get_fuse_mount(inode);
- struct fuse_file *ff = file->private_data;
-
- if (fuse_is_bad(inode))
- return -EIO;
-
- if (ff->open_flags & FOPEN_NOFLUSH && !fm->fc->writeback_cache)
- return 0;
-
- fa = kzalloc(sizeof(*fa), GFP_KERNEL);
- if (!fa)
- return -ENOMEM;
-
- fa->inarg.fh = ff->fh;
- fa->inarg.lock_owner = fuse_lock_owner_id(fm->fc, id);
- fa->args.opcode = FUSE_FLUSH;
- fa->args.nodeid = get_node_id(inode);
- fa->args.in_numargs = 1;
- fa->args.in_args[0].size = sizeof(fa->inarg);
- fa->args.in_args[0].value = &fa->inarg;
- fa->args.force = true;
- fa->file = get_file(file);
-
- /* Don't wait if the task is exiting */
- if (current->flags & PF_EXITING) {
- INIT_WORK(&fa->work, fuse_flush_async);
- schedule_work(&fa->work);
- return 0;
- }
-
- return fuse_do_flush(fa);
-}
-
int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
int datasync, int opcode)
{
@@ -662,16 +597,20 @@ void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
args->out_args[0].size = count;
}
-static void fuse_release_user_pages(struct fuse_args_pages *ap,
+static void fuse_release_user_pages(struct fuse_args_pages *ap, ssize_t nres,
bool should_dirty)
{
unsigned int i;
- for (i = 0; i < ap->num_pages; i++) {
+ for (i = 0; i < ap->num_folios; i++) {
if (should_dirty)
- set_page_dirty_lock(ap->pages[i]);
- put_page(ap->pages[i]);
+ folio_mark_dirty_lock(ap->folios[i]);
+ if (ap->args.is_pinned)
+ unpin_folio(ap->folios[i]);
}
+
+ if (nres > 0 && ap->args.invalidate_vmap)
+ invalidate_kernel_vmap_range(ap->args.vmap_base, nres);
}
static void fuse_io_release(struct kref *kref)
@@ -741,16 +680,16 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
}
static struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io,
- unsigned int npages)
+ unsigned int nfolios)
{
struct fuse_io_args *ia;
ia = kzalloc(sizeof(*ia), GFP_KERNEL);
if (ia) {
ia->io = io;
- ia->ap.pages = fuse_pages_alloc(npages, GFP_KERNEL,
- &ia->ap.descs);
- if (!ia->ap.pages) {
+ ia->ap.folios = fuse_folios_alloc(nfolios, GFP_KERNEL,
+ &ia->ap.descs);
+ if (!ia->ap.folios) {
kfree(ia);
ia = NULL;
}
@@ -760,7 +699,7 @@ static struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io,
static void fuse_io_free(struct fuse_io_args *ia)
{
- kfree(ia->ap.pages);
+ kfree(ia->ap.folios);
kfree(ia);
}
@@ -770,25 +709,29 @@ static void fuse_aio_complete_req(struct fuse_mount *fm, struct fuse_args *args,
struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args);
struct fuse_io_priv *io = ia->io;
ssize_t pos = -1;
-
- fuse_release_user_pages(&ia->ap, io->should_dirty);
+ size_t nres;
if (err) {
/* Nothing */
} else if (io->write) {
if (ia->write.out.size > ia->write.in.size) {
err = -EIO;
- } else if (ia->write.in.size != ia->write.out.size) {
- pos = ia->write.in.offset - io->offset +
- ia->write.out.size;
+ } else {
+ nres = ia->write.out.size;
+ if (ia->write.in.size != ia->write.out.size)
+ pos = ia->write.in.offset - io->offset +
+ ia->write.out.size;
}
} else {
u32 outsize = args->out_args[0].size;
+ nres = outsize;
if (ia->read.in.size != outsize)
pos = ia->read.in.offset - io->offset + outsize;
}
+ fuse_release_user_pages(&ia->ap, err ?: nres, io->should_dirty);
+
fuse_aio_complete(io, err, pos);
fuse_io_free(ia);
}
@@ -859,34 +802,31 @@ static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read,
* reached the client fs yet. So the hole is not present there.
*/
if (!fc->writeback_cache) {
- loff_t pos = page_offset(ap->pages[0]) + num_read;
+ loff_t pos = folio_pos(ap->folios[0]) + num_read;
fuse_read_update_size(inode, pos, attr_ver);
}
}
-static int fuse_do_readpage(struct file *file, struct page *page)
+static int fuse_do_readfolio(struct file *file, struct folio *folio,
+ size_t off, size_t len)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct fuse_mount *fm = get_fuse_mount(inode);
- loff_t pos = page_offset(page);
- struct fuse_page_desc desc = { .length = PAGE_SIZE };
+ loff_t pos = folio_pos(folio) + off;
+ struct fuse_folio_desc desc = {
+ .offset = off,
+ .length = len,
+ };
struct fuse_io_args ia = {
.ap.args.page_zeroing = true,
.ap.args.out_pages = true,
- .ap.num_pages = 1,
- .ap.pages = &page,
+ .ap.num_folios = 1,
+ .ap.folios = &folio,
.ap.descs = &desc,
};
ssize_t res;
u64 attr_ver;
- /*
- * Page writeback can extend beyond the lifetime of the
- * page-cache page, so make sure we read a properly synced
- * page.
- */
- fuse_wait_on_page_writeback(inode, page->index);
-
attr_ver = fuse_get_attr_version(fm->fc);
/* Don't overflow end offset */
@@ -903,26 +843,155 @@ static int fuse_do_readpage(struct file *file, struct page *page)
if (res < desc.length)
fuse_short_read(inode, attr_ver, res, &ia.ap);
- SetPageUptodate(page);
+ return 0;
+}
+
+static int fuse_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+ unsigned int flags, struct iomap *iomap,
+ struct iomap *srcmap)
+{
+ iomap->type = IOMAP_MAPPED;
+ iomap->length = length;
+ iomap->offset = offset;
+ return 0;
+}
+
+static const struct iomap_ops fuse_iomap_ops = {
+ .iomap_begin = fuse_iomap_begin,
+};
+
+struct fuse_fill_read_data {
+ struct file *file;
+
+ /* Fields below are used if sending the read request asynchronously */
+ struct fuse_conn *fc;
+ struct fuse_io_args *ia;
+ unsigned int nr_bytes;
+};
+
+/* forward declarations */
+static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos,
+ unsigned len, struct fuse_args_pages *ap,
+ unsigned cur_bytes, bool write);
+static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
+ unsigned int count, bool async);
+
+static int fuse_handle_readahead(struct folio *folio,
+ struct readahead_control *rac,
+ struct fuse_fill_read_data *data, loff_t pos,
+ size_t len)
+{
+ struct fuse_io_args *ia = data->ia;
+ size_t off = offset_in_folio(folio, pos);
+ struct fuse_conn *fc = data->fc;
+ struct fuse_args_pages *ap;
+ unsigned int nr_pages;
+
+ if (ia && fuse_folios_need_send(fc, pos, len, &ia->ap, data->nr_bytes,
+ false)) {
+ fuse_send_readpages(ia, data->file, data->nr_bytes,
+ fc->async_read);
+ data->nr_bytes = 0;
+ data->ia = NULL;
+ ia = NULL;
+ }
+ if (!ia) {
+ if (fc->num_background >= fc->congestion_threshold &&
+ rac->ra->async_size >= readahead_count(rac))
+ /*
+ * Congested and only async pages left, so skip the
+ * rest.
+ */
+ return -EAGAIN;
+
+ nr_pages = min(fc->max_pages, readahead_count(rac));
+ data->ia = fuse_io_alloc(NULL, nr_pages);
+ if (!data->ia)
+ return -ENOMEM;
+ ia = data->ia;
+ }
+ folio_get(folio);
+ ap = &ia->ap;
+ ap->folios[ap->num_folios] = folio;
+ ap->descs[ap->num_folios].offset = off;
+ ap->descs[ap->num_folios].length = len;
+ data->nr_bytes += len;
+ ap->num_folios++;
return 0;
}
+static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter,
+ struct iomap_read_folio_ctx *ctx,
+ size_t len)
+{
+ struct fuse_fill_read_data *data = ctx->read_ctx;
+ struct folio *folio = ctx->cur_folio;
+ loff_t pos = iter->pos;
+ size_t off = offset_in_folio(folio, pos);
+ struct file *file = data->file;
+ int ret;
+
+ if (ctx->rac) {
+ ret = fuse_handle_readahead(folio, ctx->rac, data, pos, len);
+ } else {
+ /*
+ * for non-readahead read requests, do reads synchronously
+ * since it's not guaranteed that the server can handle
+ * out-of-order reads
+ */
+ ret = fuse_do_readfolio(file, folio, off, len);
+ if (!ret)
+ iomap_finish_folio_read(folio, off, len, ret);
+ }
+ return ret;
+}
+
+static void fuse_iomap_read_submit(struct iomap_read_folio_ctx *ctx)
+{
+ struct fuse_fill_read_data *data = ctx->read_ctx;
+
+ if (data->ia)
+ fuse_send_readpages(data->ia, data->file, data->nr_bytes,
+ data->fc->async_read);
+}
+
+static const struct iomap_read_ops fuse_iomap_read_ops = {
+ .read_folio_range = fuse_iomap_read_folio_range_async,
+ .submit_read = fuse_iomap_read_submit,
+};
+
static int fuse_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
- struct inode *inode = page->mapping->host;
- int err;
+ struct inode *inode = folio->mapping->host;
+ struct fuse_fill_read_data data = {
+ .file = file,
+ };
+ struct iomap_read_folio_ctx ctx = {
+ .cur_folio = folio,
+ .ops = &fuse_iomap_read_ops,
+ .read_ctx = &data,
- err = -EIO;
- if (fuse_is_bad(inode))
- goto out;
+ };
- err = fuse_do_readpage(file, page);
+ if (fuse_is_bad(inode)) {
+ folio_unlock(folio);
+ return -EIO;
+ }
+
+ iomap_read_folio(&fuse_iomap_ops, &ctx);
fuse_invalidate_atime(inode);
- out:
- unlock_page(page);
- return err;
+ return 0;
+}
+
+static int fuse_iomap_read_folio_range(const struct iomap_iter *iter,
+ struct folio *folio, loff_t pos,
+ size_t len)
+{
+ struct file *file = iter->private;
+ size_t off = offset_in_folio(folio, pos);
+
+ return fuse_do_readfolio(file, folio, off, len);
}
static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
@@ -933,46 +1002,39 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
struct fuse_args_pages *ap = &ia->ap;
size_t count = ia->read.in.size;
size_t num_read = args->out_args[0].size;
- struct address_space *mapping = NULL;
-
- for (i = 0; mapping == NULL && i < ap->num_pages; i++)
- mapping = ap->pages[i]->mapping;
-
- if (mapping) {
- struct inode *inode = mapping->host;
+ struct address_space *mapping;
+ struct inode *inode;
- /*
- * Short read means EOF. If file size is larger, truncate it
- */
- if (!err && num_read < count)
- fuse_short_read(inode, ia->read.attr_ver, num_read, ap);
+ WARN_ON_ONCE(!ap->num_folios);
+ mapping = ap->folios[0]->mapping;
+ inode = mapping->host;
- fuse_invalidate_atime(inode);
- }
+ /*
+ * Short read means EOF. If file size is larger, truncate it
+ */
+ if (!err && num_read < count)
+ fuse_short_read(inode, ia->read.attr_ver, num_read, ap);
- for (i = 0; i < ap->num_pages; i++) {
- struct page *page = ap->pages[i];
+ fuse_invalidate_atime(inode);
- if (!err)
- SetPageUptodate(page);
- else
- SetPageError(page);
- unlock_page(page);
- put_page(page);
+ for (i = 0; i < ap->num_folios; i++) {
+ iomap_finish_folio_read(ap->folios[i], ap->descs[i].offset,
+ ap->descs[i].length, err);
+ folio_put(ap->folios[i]);
}
if (ia->ff)
- fuse_file_put(ia->ff, false, false);
+ fuse_file_put(ia->ff, false);
fuse_io_free(ia);
}
-static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
+static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
+ unsigned int count, bool async)
{
struct fuse_file *ff = file->private_data;
struct fuse_mount *fm = ff->fm;
struct fuse_args_pages *ap = &ia->ap;
- loff_t pos = page_offset(ap->pages[0]);
- size_t count = ap->num_pages << PAGE_SHIFT;
+ loff_t pos = folio_pos(ap->folios[0]);
ssize_t res;
int err;
@@ -983,13 +1045,13 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
/* Don't overflow end offset */
if (pos + (count - 1) == LLONG_MAX) {
count--;
- ap->descs[ap->num_pages - 1].length--;
+ ap->descs[ap->num_folios - 1].length--;
}
WARN_ON((loff_t) (pos + count) < 0);
fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
ia->read.attr_ver = fuse_get_attr_version(fm->fc);
- if (fm->fc->async_read) {
+ if (async) {
ia->ff = fuse_file_get(ff);
ap->args.end = fuse_readpages_end;
err = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
@@ -1006,44 +1068,20 @@ static void fuse_readahead(struct readahead_control *rac)
{
struct inode *inode = rac->mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
- unsigned int i, max_pages, nr_pages = 0;
+ struct fuse_fill_read_data data = {
+ .file = rac->file,
+ .fc = fc,
+ };
+ struct iomap_read_folio_ctx ctx = {
+ .ops = &fuse_iomap_read_ops,
+ .rac = rac,
+ .read_ctx = &data
+ };
if (fuse_is_bad(inode))
return;
- max_pages = min_t(unsigned int, fc->max_pages,
- fc->max_read / PAGE_SIZE);
-
- for (;;) {
- struct fuse_io_args *ia;
- struct fuse_args_pages *ap;
-
- if (fc->num_background >= fc->congestion_threshold &&
- rac->ra->async_size >= readahead_count(rac))
- /*
- * Congested and only async pages left, so skip the
- * rest.
- */
- break;
-
- nr_pages = readahead_count(rac) - nr_pages;
- if (nr_pages > max_pages)
- nr_pages = max_pages;
- if (nr_pages == 0)
- break;
- ia = fuse_io_alloc(NULL, nr_pages);
- if (!ia)
- return;
- ap = &ia->ap;
- nr_pages = __readahead_batch(rac, ap->pages, nr_pages);
- for (i = 0; i < nr_pages; i++) {
- fuse_wait_on_page_writeback(inode,
- readahead_index(rac) + i);
- ap->descs[i].length = PAGE_SIZE;
- }
- ap->num_pages = nr_pages;
- fuse_send_readpages(ia, rac->file);
- }
+ iomap_readahead(&fuse_iomap_ops, &ctx);
}
static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -1159,8 +1197,8 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
bool short_write;
int err;
- for (i = 0; i < ap->num_pages; i++)
- fuse_wait_on_page_writeback(inode, ap->pages[i]->index);
+ for (i = 0; i < ap->num_folios; i++)
+ folio_wait_writeback(ap->folios[i]);
fuse_write_args_fill(ia, ff, pos, count);
ia->write.in.flags = fuse_write_flags(iocb);
@@ -1174,24 +1212,24 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
short_write = ia->write.out.size < count;
offset = ap->descs[0].offset;
count = ia->write.out.size;
- for (i = 0; i < ap->num_pages; i++) {
- struct page *page = ap->pages[i];
+ for (i = 0; i < ap->num_folios; i++) {
+ struct folio *folio = ap->folios[i];
if (err) {
- ClearPageUptodate(page);
+ folio_clear_uptodate(folio);
} else {
- if (count >= PAGE_SIZE - offset)
- count -= PAGE_SIZE - offset;
+ if (count >= folio_size(folio) - offset)
+ count -= folio_size(folio) - offset;
else {
if (short_write)
- ClearPageUptodate(page);
+ folio_clear_uptodate(folio);
count = 0;
}
offset = 0;
}
- if (ia->write.page_locked && (i == ap->num_pages - 1))
- unlock_page(page);
- put_page(page);
+ if (ia->write.folio_locked && (i == ap->num_folios - 1))
+ folio_unlock(folio);
+ folio_put(folio);
}
return err;
@@ -1200,73 +1238,84 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
struct address_space *mapping,
struct iov_iter *ii, loff_t pos,
- unsigned int max_pages)
+ unsigned int max_folios)
{
struct fuse_args_pages *ap = &ia->ap;
struct fuse_conn *fc = get_fuse_conn(mapping->host);
unsigned offset = pos & (PAGE_SIZE - 1);
size_t count = 0;
- int err;
+ unsigned int num;
+ int err = 0;
+
+ num = min(iov_iter_count(ii), fc->max_write);
ap->args.in_pages = true;
- ap->descs[0].offset = offset;
- do {
+ while (num && ap->num_folios < max_folios) {
size_t tmp;
- struct page *page;
+ struct folio *folio;
pgoff_t index = pos >> PAGE_SHIFT;
- size_t bytes = min_t(size_t, PAGE_SIZE - offset,
- iov_iter_count(ii));
-
- bytes = min_t(size_t, bytes, fc->max_write - count);
+ unsigned int bytes;
+ unsigned int folio_offset;
again:
- err = -EFAULT;
- if (fault_in_iov_iter_readable(ii, bytes))
- break;
-
- err = -ENOMEM;
- page = grab_cache_page_write_begin(mapping, index);
- if (!page)
+ folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
+ mapping_gfp_mask(mapping));
+ if (IS_ERR(folio)) {
+ err = PTR_ERR(folio);
break;
+ }
if (mapping_writably_mapped(mapping))
- flush_dcache_page(page);
+ flush_dcache_folio(folio);
- tmp = copy_page_from_iter_atomic(page, offset, bytes, ii);
- flush_dcache_page(page);
+ folio_offset = ((index - folio->index) << PAGE_SHIFT) + offset;
+ bytes = min(folio_size(folio) - folio_offset, num);
+
+ tmp = copy_folio_from_iter_atomic(folio, folio_offset, bytes, ii);
+ flush_dcache_folio(folio);
if (!tmp) {
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
+
+ /*
+ * Ensure forward progress by faulting in
+ * while not holding the folio lock:
+ */
+ if (fault_in_iov_iter_readable(ii, bytes)) {
+ err = -EFAULT;
+ break;
+ }
+
goto again;
}
- err = 0;
- ap->pages[ap->num_pages] = page;
- ap->descs[ap->num_pages].length = tmp;
- ap->num_pages++;
+ ap->folios[ap->num_folios] = folio;
+ ap->descs[ap->num_folios].offset = folio_offset;
+ ap->descs[ap->num_folios].length = tmp;
+ ap->num_folios++;
count += tmp;
pos += tmp;
+ num -= tmp;
offset += tmp;
- if (offset == PAGE_SIZE)
+ if (offset == folio_size(folio))
offset = 0;
- /* If we copied full page, mark it uptodate */
- if (tmp == PAGE_SIZE)
- SetPageUptodate(page);
+ /* If we copied full folio, mark it uptodate */
+ if (tmp == folio_size(folio))
+ folio_mark_uptodate(folio);
- if (PageUptodate(page)) {
- unlock_page(page);
+ if (folio_test_uptodate(folio)) {
+ folio_unlock(folio);
} else {
- ia->write.page_locked = true;
+ ia->write.folio_locked = true;
break;
}
- if (!fc->big_writes)
+ if (!fc->big_writes || offset != 0)
break;
- } while (iov_iter_count(ii) && count < fc->max_write &&
- ap->num_pages < max_pages && offset == 0);
+ }
return count > 0 ? count : err;
}
@@ -1300,8 +1349,8 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, struct iov_iter *ii)
unsigned int nr_pages = fuse_wr_pages(pos, iov_iter_count(ii),
fc->max_pages);
- ap->pages = fuse_pages_alloc(nr_pages, GFP_KERNEL, &ap->descs);
- if (!ap->pages) {
+ ap->folios = fuse_folios_alloc(nr_pages, GFP_KERNEL, &ap->descs);
+ if (!ap->folios) {
err = -ENOMEM;
break;
}
@@ -1323,7 +1372,7 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, struct iov_iter *ii)
err = -EIO;
}
}
- kfree(ap->pages);
+ kfree(ap->folios);
} while (!err && iov_iter_count(ii));
fuse_write_update_attr(inode, pos, res);
@@ -1335,14 +1384,100 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, struct iov_iter *ii)
return res;
}
+static bool fuse_io_past_eof(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ return iocb->ki_pos + iov_iter_count(iter) > i_size_read(inode);
+}
+
+/*
+ * @return true if an exclusive lock for direct IO writes is needed
+ */
+static bool fuse_dio_wr_exclusive_lock(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct fuse_file *ff = file->private_data;
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ /* Server side has to advise that it supports parallel dio writes. */
+ if (!(ff->open_flags & FOPEN_PARALLEL_DIRECT_WRITES))
+ return true;
+
+ /*
+ * Append will need to know the eventual EOF - always needs an
+ * exclusive lock.
+ */
+ if (iocb->ki_flags & IOCB_APPEND)
+ return true;
+
+ /* shared locks are not allowed with parallel page cache IO */
+ if (test_bit(FUSE_I_CACHE_IO_MODE, &fi->state))
+ return true;
+
+ /* Parallel dio beyond EOF is not supported, at least for now. */
+ if (fuse_io_past_eof(iocb, from))
+ return true;
+
+ return false;
+}
+
+static void fuse_dio_lock(struct kiocb *iocb, struct iov_iter *from,
+ bool *exclusive)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ *exclusive = fuse_dio_wr_exclusive_lock(iocb, from);
+ if (*exclusive) {
+ inode_lock(inode);
+ } else {
+ inode_lock_shared(inode);
+ /*
+ * New parallal dio allowed only if inode is not in caching
+ * mode and denies new opens in caching mode. This check
+ * should be performed only after taking shared inode lock.
+ * Previous past eof check was without inode lock and might
+ * have raced, so check it again.
+ */
+ if (fuse_io_past_eof(iocb, from) ||
+ fuse_inode_uncached_io_start(fi, NULL) != 0) {
+ inode_unlock_shared(inode);
+ inode_lock(inode);
+ *exclusive = true;
+ }
+ }
+}
+
+static void fuse_dio_unlock(struct kiocb *iocb, bool exclusive)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ if (exclusive) {
+ inode_unlock(inode);
+ } else {
+ /* Allow opens in caching mode after last parallel dio end */
+ fuse_inode_uncached_io_end(fi);
+ inode_unlock_shared(inode);
+ }
+}
+
+static const struct iomap_write_ops fuse_iomap_write_ops = {
+ .read_folio_range = fuse_iomap_read_folio_range,
+};
+
static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
+ struct mnt_idmap *idmap = file_mnt_idmap(file);
struct address_space *mapping = file->f_mapping;
ssize_t written = 0;
struct inode *inode = mapping->host;
- ssize_t err;
+ ssize_t err, count;
struct fuse_conn *fc = get_fuse_conn(inode);
+ bool writeback = false;
if (fc->writeback_cache) {
/* Update size (EOF optimization) and mode (SUID clearing) */
@@ -1351,27 +1486,20 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (err)
return err;
- if (fc->handle_killpriv_v2 &&
- setattr_should_drop_suidgid(&nop_mnt_idmap,
- file_inode(file))) {
- goto writethrough;
- }
-
- return generic_file_write_iter(iocb, from);
+ if (!fc->handle_killpriv_v2 ||
+ !setattr_should_drop_suidgid(idmap, file_inode(file)))
+ writeback = true;
}
-writethrough:
inode_lock(inode);
- err = generic_write_checks(iocb, from);
+ err = count = generic_write_checks(iocb, from);
if (err <= 0)
goto out;
- err = file_remove_privs(file);
- if (err)
- goto out;
+ task_io_account_write(count);
- err = file_update_time(file);
+ err = kiocb_modified(iocb);
if (err)
goto out;
@@ -1381,6 +1509,15 @@ writethrough:
goto out;
written = direct_write_fallback(iocb, from, written,
fuse_perform_write(iocb, from));
+ } else if (writeback) {
+ /*
+ * Use iomap so that we can do granular uptodate reads
+ * and granular dirty tracking for large folios.
+ */
+ written = iomap_file_buffered_write(iocb, from,
+ &fuse_iomap_ops,
+ &fuse_iomap_write_ops,
+ file);
} else {
written = fuse_perform_write(iocb, from);
}
@@ -1405,55 +1542,97 @@ static inline size_t fuse_get_frag_size(const struct iov_iter *ii,
static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
size_t *nbytesp, int write,
- unsigned int max_pages)
+ unsigned int max_pages,
+ bool use_pages_for_kvec_io)
{
+ bool flush_or_invalidate = false;
+ unsigned int nr_pages = 0;
size_t nbytes = 0; /* # bytes already packed in req */
ssize_t ret = 0;
- /* Special case for kernel I/O: can copy directly into the buffer */
+ /* Special case for kernel I/O: can copy directly into the buffer.
+ * However if the implementation of fuse_conn requires pages instead of
+ * pointer (e.g., virtio-fs), use iov_iter_extract_pages() instead.
+ */
if (iov_iter_is_kvec(ii)) {
- unsigned long user_addr = fuse_get_user_addr(ii);
- size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
+ void *user_addr = (void *)fuse_get_user_addr(ii);
- if (write)
- ap->args.in_args[1].value = (void *) user_addr;
- else
- ap->args.out_args[0].value = (void *) user_addr;
+ if (!use_pages_for_kvec_io) {
+ size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
- iov_iter_advance(ii, frag_size);
- *nbytesp = frag_size;
- return 0;
+ if (write)
+ ap->args.in_args[1].value = user_addr;
+ else
+ ap->args.out_args[0].value = user_addr;
+
+ iov_iter_advance(ii, frag_size);
+ *nbytesp = frag_size;
+ return 0;
+ }
+
+ if (is_vmalloc_addr(user_addr)) {
+ ap->args.vmap_base = user_addr;
+ flush_or_invalidate = true;
+ }
+ }
+
+ /*
+ * Until there is support for iov_iter_extract_folios(), we have to
+ * manually extract pages using iov_iter_extract_pages() and then
+ * copy that to a folios array.
+ */
+ struct page **pages = kzalloc(max_pages * sizeof(struct page *),
+ GFP_KERNEL);
+ if (!pages) {
+ ret = -ENOMEM;
+ goto out;
}
- while (nbytes < *nbytesp && ap->num_pages < max_pages) {
- unsigned npages;
+ while (nbytes < *nbytesp && nr_pages < max_pages) {
+ unsigned nfolios, i;
size_t start;
- ret = iov_iter_get_pages2(ii, &ap->pages[ap->num_pages],
- *nbytesp - nbytes,
- max_pages - ap->num_pages,
- &start);
+
+ ret = iov_iter_extract_pages(ii, &pages,
+ *nbytesp - nbytes,
+ max_pages - nr_pages,
+ 0, &start);
if (ret < 0)
break;
nbytes += ret;
- ret += start;
- npages = DIV_ROUND_UP(ret, PAGE_SIZE);
+ nfolios = DIV_ROUND_UP(ret + start, PAGE_SIZE);
+
+ for (i = 0; i < nfolios; i++) {
+ struct folio *folio = page_folio(pages[i]);
+ unsigned int offset = start +
+ (folio_page_idx(folio, pages[i]) << PAGE_SHIFT);
+ unsigned int len = min_t(unsigned int, ret, PAGE_SIZE - start);
- ap->descs[ap->num_pages].offset = start;
- fuse_page_descs_length_init(ap->descs, ap->num_pages, npages);
+ ap->descs[ap->num_folios].offset = offset;
+ ap->descs[ap->num_folios].length = len;
+ ap->folios[ap->num_folios] = folio;
+ start = 0;
+ ret -= len;
+ ap->num_folios++;
+ }
- ap->num_pages += npages;
- ap->descs[ap->num_pages - 1].length -=
- (PAGE_SIZE - ret) & (PAGE_SIZE - 1);
+ nr_pages += nfolios;
}
+ kfree(pages);
+
+ if (write && flush_or_invalidate)
+ flush_kernel_vmap_range(ap->args.vmap_base, nbytes);
+ ap->args.invalidate_vmap = !write && flush_or_invalidate;
+ ap->args.is_pinned = iov_iter_extract_will_pin(ii);
ap->args.user_pages = true;
if (write)
ap->args.in_pages = true;
else
ap->args.out_pages = true;
+out:
*nbytesp = nbytes;
return ret < 0 ? ret : 0;
@@ -1465,7 +1644,8 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
int write = flags & FUSE_DIO_WRITE;
int cuse = flags & FUSE_DIO_CUSE;
struct file *file = io->iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fm->fc;
size_t nmax = write ? fc->max_write : fc->max_read;
@@ -1477,13 +1657,21 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
int err = 0;
struct fuse_io_args *ia;
unsigned int max_pages;
+ bool fopen_direct_io = ff->open_flags & FOPEN_DIRECT_IO;
max_pages = iov_iter_npages(iter, fc->max_pages);
ia = fuse_io_alloc(io, max_pages);
if (!ia)
return -ENOMEM;
- if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
+ if (fopen_direct_io) {
+ res = filemap_write_and_wait_range(mapping, pos, pos + count - 1);
+ if (res) {
+ fuse_io_free(ia);
+ return res;
+ }
+ }
+ if (!cuse && filemap_range_has_writeback(mapping, pos, (pos + count - 1))) {
if (!write)
inode_lock(inode);
fuse_sync_writes(inode);
@@ -1491,6 +1679,14 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
inode_unlock(inode);
}
+ if (fopen_direct_io && write) {
+ res = invalidate_inode_pages2_range(mapping, idx_from, idx_to);
+ if (res) {
+ fuse_io_free(ia);
+ return res;
+ }
+ }
+
io->should_dirty = !write && user_backed_iter(iter);
while (count) {
ssize_t nres;
@@ -1498,7 +1694,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
size_t nbytes = min(count, nmax);
err = fuse_get_user_pages(&ia->ap, iter, &nbytes, write,
- max_pages);
+ max_pages, fc->use_pages_for_kvec_io);
if (err && !nbytes)
break;
@@ -1512,7 +1708,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
}
if (!io->async || nres < 0) {
- fuse_release_user_pages(&ia->ap, io->should_dirty);
+ fuse_release_user_pages(&ia->ap, nres, io->should_dirty);
fuse_io_free(ia);
}
ia = NULL;
@@ -1542,6 +1738,15 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
if (res > 0)
*ppos = pos;
+ if (res > 0 && write && fopen_direct_io) {
+ /*
+ * As in generic_file_direct_write(), invalidate after the
+ * write, to invalidate read-ahead cache that may have competed
+ * with the write.
+ */
+ invalidate_inode_pages2_range(mapping, idx_from, idx_to);
+ }
+
return res > 0 ? res : err;
}
EXPORT_SYMBOL_GPL(fuse_direct_io);
@@ -1566,7 +1771,7 @@ static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
ssize_t res;
- if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) {
+ if (!is_sync_kiocb(iocb)) {
res = fuse_direct_IO(iocb, to);
} else {
struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
@@ -1577,61 +1782,27 @@ static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to)
return res;
}
-static bool fuse_direct_write_extending_i_size(struct kiocb *iocb,
- struct iov_iter *iter)
-{
- struct inode *inode = file_inode(iocb->ki_filp);
-
- return iocb->ki_pos + iov_iter_count(iter) > i_size_read(inode);
-}
-
static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
- struct file *file = iocb->ki_filp;
- struct fuse_file *ff = file->private_data;
- struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
ssize_t res;
- bool exclusive_lock =
- !(ff->open_flags & FOPEN_PARALLEL_DIRECT_WRITES) ||
- iocb->ki_flags & IOCB_APPEND ||
- fuse_direct_write_extending_i_size(iocb, from);
-
- /*
- * Take exclusive lock if
- * - Parallel direct writes are disabled - a user space decision
- * - Parallel direct writes are enabled and i_size is being extended.
- * This might not be needed at all, but needs further investigation.
- */
- if (exclusive_lock)
- inode_lock(inode);
- else {
- inode_lock_shared(inode);
-
- /* A race with truncate might have come up as the decision for
- * the lock type was done without holding the lock, check again.
- */
- if (fuse_direct_write_extending_i_size(iocb, from)) {
- inode_unlock_shared(inode);
- inode_lock(inode);
- exclusive_lock = true;
- }
- }
+ bool exclusive;
+ fuse_dio_lock(iocb, from, &exclusive);
res = generic_write_checks(iocb, from);
if (res > 0) {
- if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) {
+ task_io_account_write(res);
+ if (!is_sync_kiocb(iocb)) {
res = fuse_direct_IO(iocb, from);
} else {
+ struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
+
res = fuse_direct_io(&io, from, &iocb->ki_pos,
FUSE_DIO_WRITE);
fuse_write_update_attr(inode, iocb->ki_pos, res);
}
}
- if (exclusive_lock)
- inode_unlock(inode);
- else
- inode_unlock_shared(inode);
+ fuse_dio_unlock(iocb, exclusive);
return res;
}
@@ -1648,10 +1819,13 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
if (FUSE_IS_DAX(inode))
return fuse_dax_read_iter(iocb, to);
- if (!(ff->open_flags & FOPEN_DIRECT_IO))
- return fuse_cache_read_iter(iocb, to);
- else
+ /* FOPEN_DIRECT_IO overrides FOPEN_PASSTHROUGH */
+ if (ff->open_flags & FOPEN_DIRECT_IO)
return fuse_direct_read_iter(iocb, to);
+ else if (fuse_file_passthrough(ff))
+ return fuse_passthrough_read_iter(iocb, to);
+ else
+ return fuse_cache_read_iter(iocb, to);
}
static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
@@ -1666,44 +1840,69 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (FUSE_IS_DAX(inode))
return fuse_dax_write_iter(iocb, from);
- if (!(ff->open_flags & FOPEN_DIRECT_IO))
+ /* FOPEN_DIRECT_IO overrides FOPEN_PASSTHROUGH */
+ if (ff->open_flags & FOPEN_DIRECT_IO)
+ return fuse_direct_write_iter(iocb, from);
+ else if (fuse_file_passthrough(ff))
+ return fuse_passthrough_write_iter(iocb, from);
+ else
return fuse_cache_write_iter(iocb, from);
+}
+
+static ssize_t fuse_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ struct fuse_file *ff = in->private_data;
+
+ /* FOPEN_DIRECT_IO overrides FOPEN_PASSTHROUGH */
+ if (fuse_file_passthrough(ff) && !(ff->open_flags & FOPEN_DIRECT_IO))
+ return fuse_passthrough_splice_read(in, ppos, pipe, len, flags);
else
- return fuse_direct_write_iter(iocb, from);
+ return filemap_splice_read(in, ppos, pipe, len, flags);
+}
+
+static ssize_t fuse_splice_write(struct pipe_inode_info *pipe, struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags)
+{
+ struct fuse_file *ff = out->private_data;
+
+ /* FOPEN_DIRECT_IO overrides FOPEN_PASSTHROUGH */
+ if (fuse_file_passthrough(ff) && !(ff->open_flags & FOPEN_DIRECT_IO))
+ return fuse_passthrough_splice_write(pipe, out, ppos, len, flags);
+ else
+ return iter_file_splice_write(pipe, out, ppos, len, flags);
}
static void fuse_writepage_free(struct fuse_writepage_args *wpa)
{
struct fuse_args_pages *ap = &wpa->ia.ap;
- int i;
if (wpa->bucket)
fuse_sync_bucket_dec(wpa->bucket);
- for (i = 0; i < ap->num_pages; i++)
- __free_page(ap->pages[i]);
+ fuse_file_put(wpa->ia.ff, false);
- if (wpa->ia.ff)
- fuse_file_put(wpa->ia.ff, false, false);
-
- kfree(ap->pages);
+ kfree(ap->folios);
kfree(wpa);
}
-static void fuse_writepage_finish(struct fuse_mount *fm,
- struct fuse_writepage_args *wpa)
+static void fuse_writepage_finish(struct fuse_writepage_args *wpa)
{
struct fuse_args_pages *ap = &wpa->ia.ap;
struct inode *inode = wpa->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
- struct backing_dev_info *bdi = inode_to_bdi(inode);
int i;
- for (i = 0; i < ap->num_pages; i++) {
- dec_wb_stat(&bdi->wb, WB_WRITEBACK);
- dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP);
- wb_writeout_inc(&bdi->wb);
- }
+ for (i = 0; i < ap->num_folios; i++)
+ /*
+ * Benchmarks showed that ending writeback within the
+ * scope of the fi->lock alleviates xarray lock
+ * contention and noticeably improves performance.
+ */
+ iomap_finish_folio_write(inode, ap->folios[i],
+ ap->descs[i].length);
+
wake_up(&fi->page_waitq);
}
@@ -1713,12 +1912,15 @@ static void fuse_send_writepage(struct fuse_mount *fm,
__releases(fi->lock)
__acquires(fi->lock)
{
- struct fuse_writepage_args *aux, *next;
struct fuse_inode *fi = get_fuse_inode(wpa->inode);
+ struct fuse_args_pages *ap = &wpa->ia.ap;
struct fuse_write_in *inarg = &wpa->ia.write.in;
- struct fuse_args *args = &wpa->ia.ap.args;
- __u64 data_size = wpa->ia.ap.num_pages * PAGE_SIZE;
- int err;
+ struct fuse_args *args = &ap->args;
+ __u64 data_size = 0;
+ int err, i;
+
+ for (i = 0; i < ap->num_folios; i++)
+ data_size += ap->descs[i].length;
fi->writectr++;
if (inarg->offset + data_size <= size) {
@@ -1749,17 +1951,8 @@ __acquires(fi->lock)
out_free:
fi->writectr--;
- rb_erase(&wpa->writepages_entry, &fi->writepages);
- fuse_writepage_finish(fm, wpa);
+ fuse_writepage_finish(wpa);
spin_unlock(&fi->lock);
-
- /* After fuse_writepage_finish() aux request list is private */
- for (aux = wpa->next; aux; aux = next) {
- next = aux->next;
- aux->next = NULL;
- fuse_writepage_free(aux);
- }
-
fuse_writepage_free(wpa);
spin_lock(&fi->lock);
}
@@ -1787,43 +1980,6 @@ __acquires(fi->lock)
}
}
-static struct fuse_writepage_args *fuse_insert_writeback(struct rb_root *root,
- struct fuse_writepage_args *wpa)
-{
- pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT;
- pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1;
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent = NULL;
-
- WARN_ON(!wpa->ia.ap.num_pages);
- while (*p) {
- struct fuse_writepage_args *curr;
- pgoff_t curr_index;
-
- parent = *p;
- curr = rb_entry(parent, struct fuse_writepage_args,
- writepages_entry);
- WARN_ON(curr->inode != wpa->inode);
- curr_index = curr->ia.write.in.offset >> PAGE_SHIFT;
-
- if (idx_from >= curr_index + curr->ia.ap.num_pages)
- p = &(*p)->rb_right;
- else if (idx_to < curr_index)
- p = &(*p)->rb_left;
- else
- return curr;
- }
-
- rb_link_node(&wpa->writepages_entry, parent, p);
- rb_insert_color(&wpa->writepages_entry, root);
- return NULL;
-}
-
-static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa)
-{
- WARN_ON(fuse_insert_writeback(root, wpa));
-}
-
static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args,
int error)
{
@@ -1843,44 +1999,8 @@ static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args,
if (!fc->writeback_cache)
fuse_invalidate_attr_mask(inode, FUSE_STATX_MODIFY);
spin_lock(&fi->lock);
- rb_erase(&wpa->writepages_entry, &fi->writepages);
- while (wpa->next) {
- struct fuse_mount *fm = get_fuse_mount(inode);
- struct fuse_write_in *inarg = &wpa->ia.write.in;
- struct fuse_writepage_args *next = wpa->next;
-
- wpa->next = next->next;
- next->next = NULL;
- next->ia.ff = fuse_file_get(wpa->ia.ff);
- tree_insert(&fi->writepages, next);
-
- /*
- * Skip fuse_flush_writepages() to make it easy to crop requests
- * based on primary request size.
- *
- * 1st case (trivial): there are no concurrent activities using
- * fuse_set/release_nowrite. Then we're on safe side because
- * fuse_flush_writepages() would call fuse_send_writepage()
- * anyway.
- *
- * 2nd case: someone called fuse_set_nowrite and it is waiting
- * now for completion of all in-flight requests. This happens
- * rarely and no more than once per page, so this should be
- * okay.
- *
- * 3rd case: someone (e.g. fuse_do_setattr()) is in the middle
- * of fuse_set_nowrite..fuse_release_nowrite section. The fact
- * that fuse_set_nowrite returned implies that all in-flight
- * requests were completed along with all of their secondary
- * requests. Further primary requests are blocked by negative
- * writectr. Hence there cannot be any in-flight requests and
- * no invocations of fuse_writepage_end() while we're in
- * fuse_set_nowrite..fuse_release_nowrite section.
- */
- fuse_send_writepage(fm, next, inarg->offset + inarg->size);
- }
fi->writectr--;
- fuse_writepage_finish(fm, wpa);
+ fuse_writepage_finish(wpa);
spin_unlock(&fi->lock);
fuse_writepage_free(wpa);
}
@@ -1912,21 +2032,10 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc)
struct fuse_file *ff;
int err;
- /*
- * Inode is always written before the last reference is dropped and
- * hence this should not be reached from reclaim.
- *
- * Writing back the inode from reclaim can deadlock if the request
- * processing itself needs an allocation. Allocations triggering
- * reclaim while serving a request can't be prevented, because it can
- * involve any number of unrelated userspace processes.
- */
- WARN_ON(wbc->for_reclaim);
-
ff = __fuse_write_file_get(fi);
err = fuse_flush_times(inode, ff);
if (ff)
- fuse_file_put(ff, false, false);
+ fuse_file_put(ff, false);
return err;
}
@@ -1939,9 +2048,9 @@ static struct fuse_writepage_args *fuse_writepage_args_alloc(void)
wpa = kzalloc(sizeof(*wpa), GFP_NOFS);
if (wpa) {
ap = &wpa->ia.ap;
- ap->num_pages = 0;
- ap->pages = fuse_pages_alloc(1, GFP_NOFS, &ap->descs);
- if (!ap->pages) {
+ ap->num_folios = 0;
+ ap->folios = fuse_folios_alloc(1, GFP_NOFS, &ap->descs);
+ if (!ap->folios) {
kfree(wpa);
wpa = NULL;
}
@@ -1964,463 +2073,244 @@ static void fuse_writepage_add_to_bucket(struct fuse_conn *fc,
rcu_read_unlock();
}
-static int fuse_writepage_locked(struct page *page)
+static void fuse_writepage_args_page_fill(struct fuse_writepage_args *wpa, struct folio *folio,
+ uint32_t folio_index, loff_t offset, unsigned len)
{
- struct address_space *mapping = page->mapping;
- struct inode *inode = mapping->host;
+ struct fuse_args_pages *ap = &wpa->ia.ap;
+
+ ap->folios[folio_index] = folio;
+ ap->descs[folio_index].offset = offset;
+ ap->descs[folio_index].length = len;
+}
+
+static struct fuse_writepage_args *fuse_writepage_args_setup(struct folio *folio,
+ size_t offset,
+ struct fuse_file *ff)
+{
+ struct inode *inode = folio->mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_writepage_args *wpa;
struct fuse_args_pages *ap;
- struct page *tmp_page;
- int error = -ENOMEM;
-
- set_page_writeback(page);
wpa = fuse_writepage_args_alloc();
if (!wpa)
- goto err;
- ap = &wpa->ia.ap;
-
- tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
- if (!tmp_page)
- goto err_free;
-
- error = -EIO;
- wpa->ia.ff = fuse_write_file_get(fi);
- if (!wpa->ia.ff)
- goto err_nofile;
+ return NULL;
fuse_writepage_add_to_bucket(fc, wpa);
- fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0);
-
- copy_highpage(tmp_page, page);
+ fuse_write_args_fill(&wpa->ia, ff, folio_pos(folio) + offset, 0);
wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
- wpa->next = NULL;
- ap->args.in_pages = true;
- ap->num_pages = 1;
- ap->pages[0] = tmp_page;
- ap->descs[0].offset = 0;
- ap->descs[0].length = PAGE_SIZE;
- ap->args.end = fuse_writepage_end;
wpa->inode = inode;
+ wpa->ia.ff = ff;
- inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
- inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
-
- spin_lock(&fi->lock);
- tree_insert(&fi->writepages, wpa);
- list_add_tail(&wpa->queue_entry, &fi->queued_writes);
- fuse_flush_writepages(inode);
- spin_unlock(&fi->lock);
-
- end_page_writeback(page);
-
- return 0;
-
-err_nofile:
- __free_page(tmp_page);
-err_free:
- kfree(wpa);
-err:
- mapping_set_error(page->mapping, error);
- end_page_writeback(page);
- return error;
-}
-
-static int fuse_writepage(struct page *page, struct writeback_control *wbc)
-{
- struct fuse_conn *fc = get_fuse_conn(page->mapping->host);
- int err;
-
- if (fuse_page_is_writeback(page->mapping->host, page->index)) {
- /*
- * ->writepages() should be called for sync() and friends. We
- * should only get here on direct reclaim and then we are
- * allowed to skip a page which is already in flight
- */
- WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
-
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
- }
-
- if (wbc->sync_mode == WB_SYNC_NONE &&
- fc->num_background >= fc->congestion_threshold)
- return AOP_WRITEPAGE_ACTIVATE;
-
- err = fuse_writepage_locked(page);
- unlock_page(page);
+ ap = &wpa->ia.ap;
+ ap->args.in_pages = true;
+ ap->args.end = fuse_writepage_end;
- return err;
+ return wpa;
}
struct fuse_fill_wb_data {
struct fuse_writepage_args *wpa;
struct fuse_file *ff;
- struct inode *inode;
- struct page **orig_pages;
- unsigned int max_pages;
+ unsigned int max_folios;
+ /*
+ * nr_bytes won't overflow since fuse_folios_need_send() caps
+ * wb requests to never exceed fc->max_pages (which has an upper bound
+ * of U16_MAX).
+ */
+ unsigned int nr_bytes;
};
-static bool fuse_pages_realloc(struct fuse_fill_wb_data *data)
+static bool fuse_pages_realloc(struct fuse_fill_wb_data *data,
+ unsigned int max_pages)
{
struct fuse_args_pages *ap = &data->wpa->ia.ap;
- struct fuse_conn *fc = get_fuse_conn(data->inode);
- struct page **pages;
- struct fuse_page_desc *descs;
- unsigned int npages = min_t(unsigned int,
- max_t(unsigned int, data->max_pages * 2,
- FUSE_DEFAULT_MAX_PAGES_PER_REQ),
- fc->max_pages);
- WARN_ON(npages <= data->max_pages);
-
- pages = fuse_pages_alloc(npages, GFP_NOFS, &descs);
- if (!pages)
+ struct folio **folios;
+ struct fuse_folio_desc *descs;
+ unsigned int nfolios = min_t(unsigned int,
+ max_t(unsigned int, data->max_folios * 2,
+ FUSE_DEFAULT_MAX_PAGES_PER_REQ),
+ max_pages);
+ WARN_ON(nfolios <= data->max_folios);
+
+ folios = fuse_folios_alloc(nfolios, GFP_NOFS, &descs);
+ if (!folios)
return false;
- memcpy(pages, ap->pages, sizeof(struct page *) * ap->num_pages);
- memcpy(descs, ap->descs, sizeof(struct fuse_page_desc) * ap->num_pages);
- kfree(ap->pages);
- ap->pages = pages;
+ memcpy(folios, ap->folios, sizeof(struct folio *) * ap->num_folios);
+ memcpy(descs, ap->descs, sizeof(struct fuse_folio_desc) * ap->num_folios);
+ kfree(ap->folios);
+ ap->folios = folios;
ap->descs = descs;
- data->max_pages = npages;
+ data->max_folios = nfolios;
return true;
}
-static void fuse_writepages_send(struct fuse_fill_wb_data *data)
+static void fuse_writepages_send(struct inode *inode,
+ struct fuse_fill_wb_data *data)
{
struct fuse_writepage_args *wpa = data->wpa;
- struct inode *inode = data->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
- int num_pages = wpa->ia.ap.num_pages;
- int i;
- wpa->ia.ff = fuse_file_get(data->ff);
spin_lock(&fi->lock);
list_add_tail(&wpa->queue_entry, &fi->queued_writes);
fuse_flush_writepages(inode);
spin_unlock(&fi->lock);
-
- for (i = 0; i < num_pages; i++)
- end_page_writeback(data->orig_pages[i]);
}
-/*
- * Check under fi->lock if the page is under writeback, and insert it onto the
- * rb_tree if not. Otherwise iterate auxiliary write requests, to see if there's
- * one already added for a page at this offset. If there's none, then insert
- * this new request onto the auxiliary list, otherwise reuse the existing one by
- * swapping the new temp page with the old one.
- */
-static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa,
- struct page *page)
+static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos,
+ unsigned len, struct fuse_args_pages *ap,
+ unsigned cur_bytes, bool write)
{
- struct fuse_inode *fi = get_fuse_inode(new_wpa->inode);
- struct fuse_writepage_args *tmp;
- struct fuse_writepage_args *old_wpa;
- struct fuse_args_pages *new_ap = &new_wpa->ia.ap;
-
- WARN_ON(new_ap->num_pages != 0);
- new_ap->num_pages = 1;
-
- spin_lock(&fi->lock);
- old_wpa = fuse_insert_writeback(&fi->writepages, new_wpa);
- if (!old_wpa) {
- spin_unlock(&fi->lock);
- return true;
- }
-
- for (tmp = old_wpa->next; tmp; tmp = tmp->next) {
- pgoff_t curr_index;
-
- WARN_ON(tmp->inode != new_wpa->inode);
- curr_index = tmp->ia.write.in.offset >> PAGE_SHIFT;
- if (curr_index == page->index) {
- WARN_ON(tmp->ia.ap.num_pages != 1);
- swap(tmp->ia.ap.pages[0], new_ap->pages[0]);
- break;
- }
- }
-
- if (!tmp) {
- new_wpa->next = old_wpa->next;
- old_wpa->next = new_wpa;
- }
+ struct folio *prev_folio;
+ struct fuse_folio_desc prev_desc;
+ unsigned bytes = cur_bytes + len;
+ loff_t prev_pos;
+ size_t max_bytes = write ? fc->max_write : fc->max_read;
- spin_unlock(&fi->lock);
-
- if (tmp) {
- struct backing_dev_info *bdi = inode_to_bdi(new_wpa->inode);
-
- dec_wb_stat(&bdi->wb, WB_WRITEBACK);
- dec_node_page_state(new_ap->pages[0], NR_WRITEBACK_TEMP);
- wb_writeout_inc(&bdi->wb);
- fuse_writepage_free(new_wpa);
- }
-
- return false;
-}
-
-static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page,
- struct fuse_args_pages *ap,
- struct fuse_fill_wb_data *data)
-{
- WARN_ON(!ap->num_pages);
-
- /*
- * Being under writeback is unlikely but possible. For example direct
- * read to an mmaped fuse file will set the page dirty twice; once when
- * the pages are faulted with get_user_pages(), and then after the read
- * completed.
- */
- if (fuse_page_is_writeback(data->inode, page->index))
- return true;
+ WARN_ON(!ap->num_folios);
/* Reached max pages */
- if (ap->num_pages == fc->max_pages)
+ if ((bytes + PAGE_SIZE - 1) >> PAGE_SHIFT > fc->max_pages)
return true;
- /* Reached max write bytes */
- if ((ap->num_pages + 1) * PAGE_SIZE > fc->max_write)
+ if (bytes > max_bytes)
return true;
/* Discontinuity */
- if (data->orig_pages[ap->num_pages - 1]->index + 1 != page->index)
- return true;
-
- /* Need to grow the pages array? If so, did the expansion fail? */
- if (ap->num_pages == data->max_pages && !fuse_pages_realloc(data))
+ prev_folio = ap->folios[ap->num_folios - 1];
+ prev_desc = ap->descs[ap->num_folios - 1];
+ prev_pos = folio_pos(prev_folio) + prev_desc.offset + prev_desc.length;
+ if (prev_pos != pos)
return true;
return false;
}
-static int fuse_writepages_fill(struct folio *folio,
- struct writeback_control *wbc, void *_data)
+static ssize_t fuse_iomap_writeback_range(struct iomap_writepage_ctx *wpc,
+ struct folio *folio, u64 pos,
+ unsigned len, u64 end_pos)
{
- struct fuse_fill_wb_data *data = _data;
+ struct fuse_fill_wb_data *data = wpc->wb_ctx;
struct fuse_writepage_args *wpa = data->wpa;
struct fuse_args_pages *ap = &wpa->ia.ap;
- struct inode *inode = data->inode;
+ struct inode *inode = wpc->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = get_fuse_conn(inode);
- struct page *tmp_page;
- int err;
+ loff_t offset = offset_in_folio(folio, pos);
+
+ WARN_ON_ONCE(!data);
if (!data->ff) {
- err = -EIO;
data->ff = fuse_write_file_get(fi);
if (!data->ff)
- goto out_unlock;
- }
-
- if (wpa && fuse_writepage_need_send(fc, &folio->page, ap, data)) {
- fuse_writepages_send(data);
- data->wpa = NULL;
+ return -EIO;
}
- err = -ENOMEM;
- tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
- if (!tmp_page)
- goto out_unlock;
+ if (wpa) {
+ bool send = fuse_folios_need_send(fc, pos, len, ap,
+ data->nr_bytes, true);
- /*
- * The page must not be redirtied until the writeout is completed
- * (i.e. userspace has sent a reply to the write request). Otherwise
- * there could be more than one temporary page instance for each real
- * page.
- *
- * This is ensured by holding the page lock in page_mkwrite() while
- * checking fuse_page_is_writeback(). We already hold the page lock
- * since clear_page_dirty_for_io() and keep it held until we add the
- * request to the fi->writepages list and increment ap->num_pages.
- * After this fuse_page_is_writeback() will indicate that the page is
- * under writeback, so we can release the page lock.
- */
- if (data->wpa == NULL) {
- err = -ENOMEM;
- wpa = fuse_writepage_args_alloc();
- if (!wpa) {
- __free_page(tmp_page);
- goto out_unlock;
+ if (!send) {
+ /*
+ * Need to grow the pages array? If so, did the
+ * expansion fail?
+ */
+ send = (ap->num_folios == data->max_folios) &&
+ !fuse_pages_realloc(data, fc->max_pages);
}
- fuse_writepage_add_to_bucket(fc, wpa);
- data->max_pages = 1;
+ if (send) {
+ fuse_writepages_send(inode, data);
+ data->wpa = NULL;
+ data->nr_bytes = 0;
+ }
+ }
+ if (data->wpa == NULL) {
+ wpa = fuse_writepage_args_setup(folio, offset, data->ff);
+ if (!wpa)
+ return -ENOMEM;
+ fuse_file_get(wpa->ia.ff);
+ data->max_folios = 1;
ap = &wpa->ia.ap;
- fuse_write_args_fill(&wpa->ia, data->ff, folio_pos(folio), 0);
- wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
- wpa->next = NULL;
- ap->args.in_pages = true;
- ap->args.end = fuse_writepage_end;
- ap->num_pages = 0;
- wpa->inode = inode;
}
- folio_start_writeback(folio);
- copy_highpage(tmp_page, &folio->page);
- ap->pages[ap->num_pages] = tmp_page;
- ap->descs[ap->num_pages].offset = 0;
- ap->descs[ap->num_pages].length = PAGE_SIZE;
- data->orig_pages[ap->num_pages] = &folio->page;
+ fuse_writepage_args_page_fill(wpa, folio, ap->num_folios,
+ offset, len);
+ data->nr_bytes += len;
- inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
- inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
+ ap->num_folios++;
+ if (!data->wpa)
+ data->wpa = wpa;
+
+ return len;
+}
+
+static int fuse_iomap_writeback_submit(struct iomap_writepage_ctx *wpc,
+ int error)
+{
+ struct fuse_fill_wb_data *data = wpc->wb_ctx;
+
+ WARN_ON_ONCE(!data);
- err = 0;
if (data->wpa) {
- /*
- * Protected by fi->lock against concurrent access by
- * fuse_page_is_writeback().
- */
- spin_lock(&fi->lock);
- ap->num_pages++;
- spin_unlock(&fi->lock);
- } else if (fuse_writepage_add(wpa, &folio->page)) {
- data->wpa = wpa;
- } else {
- folio_end_writeback(folio);
+ WARN_ON(!data->wpa->ia.ap.num_folios);
+ fuse_writepages_send(wpc->inode, data);
}
-out_unlock:
- folio_unlock(folio);
- return err;
+ if (data->ff)
+ fuse_file_put(data->ff, false);
+
+ return error;
}
+static const struct iomap_writeback_ops fuse_writeback_ops = {
+ .writeback_range = fuse_iomap_writeback_range,
+ .writeback_submit = fuse_iomap_writeback_submit,
+};
+
static int fuse_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_fill_wb_data data;
- int err;
+ struct fuse_fill_wb_data data = {};
+ struct iomap_writepage_ctx wpc = {
+ .inode = inode,
+ .iomap.type = IOMAP_MAPPED,
+ .wbc = wbc,
+ .ops = &fuse_writeback_ops,
+ .wb_ctx = &data,
+ };
- err = -EIO;
if (fuse_is_bad(inode))
- goto out;
+ return -EIO;
if (wbc->sync_mode == WB_SYNC_NONE &&
fc->num_background >= fc->congestion_threshold)
return 0;
- data.inode = inode;
- data.wpa = NULL;
- data.ff = NULL;
-
- err = -ENOMEM;
- data.orig_pages = kcalloc(fc->max_pages,
- sizeof(struct page *),
- GFP_NOFS);
- if (!data.orig_pages)
- goto out;
-
- err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
- if (data.wpa) {
- WARN_ON(!data.wpa->ia.ap.num_pages);
- fuse_writepages_send(&data);
- }
- if (data.ff)
- fuse_file_put(data.ff, false, false);
-
- kfree(data.orig_pages);
-out:
- return err;
-}
-
-/*
- * It's worthy to make sure that space is reserved on disk for the write,
- * but how to implement it without killing performance need more thinking.
- */
-static int fuse_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, struct page **pagep, void **fsdata)
-{
- pgoff_t index = pos >> PAGE_SHIFT;
- struct fuse_conn *fc = get_fuse_conn(file_inode(file));
- struct page *page;
- loff_t fsize;
- int err = -ENOMEM;
-
- WARN_ON(!fc->writeback_cache);
-
- page = grab_cache_page_write_begin(mapping, index);
- if (!page)
- goto error;
-
- fuse_wait_on_page_writeback(mapping->host, page->index);
-
- if (PageUptodate(page) || len == PAGE_SIZE)
- goto success;
- /*
- * Check if the start this page comes after the end of file, in which
- * case the readpage can be optimized away.
- */
- fsize = i_size_read(mapping->host);
- if (fsize <= (pos & PAGE_MASK)) {
- size_t off = pos & ~PAGE_MASK;
- if (off)
- zero_user_segment(page, 0, off);
- goto success;
- }
- err = fuse_do_readpage(file, page);
- if (err)
- goto cleanup;
-success:
- *pagep = page;
- return 0;
-
-cleanup:
- unlock_page(page);
- put_page(page);
-error:
- return err;
-}
-
-static int fuse_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
-{
- struct inode *inode = page->mapping->host;
-
- /* Haven't copied anything? Skip zeroing, size extending, dirtying. */
- if (!copied)
- goto unlock;
-
- pos += copied;
- if (!PageUptodate(page)) {
- /* Zero any unwritten bytes at the end of the page */
- size_t endoff = pos & ~PAGE_MASK;
- if (endoff)
- zero_user_segment(page, endoff, PAGE_SIZE);
- SetPageUptodate(page);
- }
-
- if (pos > inode->i_size)
- i_size_write(inode, pos);
-
- set_page_dirty(page);
-
-unlock:
- unlock_page(page);
- put_page(page);
-
- return copied;
+ return iomap_writepages(&wpc);
}
static int fuse_launder_folio(struct folio *folio)
{
int err = 0;
- if (folio_clear_dirty_for_io(folio)) {
- struct inode *inode = folio->mapping->host;
+ struct fuse_fill_wb_data data = {};
+ struct iomap_writepage_ctx wpc = {
+ .inode = folio->mapping->host,
+ .iomap.type = IOMAP_MAPPED,
+ .ops = &fuse_writeback_ops,
+ .wb_ctx = &data,
+ };
- /* Serialize with pending writeback for the same page */
- fuse_wait_on_page_writeback(inode, folio->index);
- err = fuse_writepage_locked(&folio->page);
+ if (folio_clear_dirty_for_io(folio)) {
+ err = iomap_writeback_folio(&wpc, folio);
+ err = fuse_iomap_writeback_submit(&wpc, err);
if (!err)
- fuse_wait_on_page_writeback(inode, folio->index);
+ folio_wait_writeback(folio);
}
return err;
}
@@ -2454,17 +2344,17 @@ static void fuse_vma_close(struct vm_area_struct *vma)
*/
static vm_fault_t fuse_page_mkwrite(struct vm_fault *vmf)
{
- struct page *page = vmf->page;
+ struct folio *folio = page_folio(vmf->page);
struct inode *inode = file_inode(vmf->vma->vm_file);
file_update_time(vmf->vma->vm_file);
- lock_page(page);
- if (page->mapping != inode->i_mapping) {
- unlock_page(page);
+ folio_lock(folio);
+ if (folio->mapping != inode->i_mapping) {
+ folio_unlock(folio);
return VM_FAULT_NOPAGE;
}
- fuse_wait_on_page_writeback(inode, page->index);
+ folio_wait_writeback(folio);
return VM_FAULT_LOCKED;
}
@@ -2478,19 +2368,53 @@ static const struct vm_operations_struct fuse_file_vm_ops = {
static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fm->fc;
+ struct inode *inode = file_inode(file);
+ int rc;
/* DAX mmap is superior to direct_io mmap */
- if (FUSE_IS_DAX(file_inode(file)))
+ if (FUSE_IS_DAX(inode))
return fuse_dax_mmap(file, vma);
+ /*
+ * If inode is in passthrough io mode, because it has some file open
+ * in passthrough mode, either mmap to backing file or fail mmap,
+ * because mixing cached mmap and passthrough io mode is not allowed.
+ */
+ if (fuse_file_passthrough(ff))
+ return fuse_passthrough_mmap(file, vma);
+ else if (fuse_inode_backing(get_fuse_inode(inode)))
+ return -ENODEV;
+
+ /*
+ * FOPEN_DIRECT_IO handling is special compared to O_DIRECT,
+ * as does not allow MAP_SHARED mmap without FUSE_DIRECT_IO_ALLOW_MMAP.
+ */
if (ff->open_flags & FOPEN_DIRECT_IO) {
- /* Can't provide the coherency needed for MAP_SHARED */
- if (vma->vm_flags & VM_MAYSHARE)
+ /*
+ * Can't provide the coherency needed for MAP_SHARED
+ * if FUSE_DIRECT_IO_ALLOW_MMAP isn't set.
+ */
+ if ((vma->vm_flags & VM_MAYSHARE) && !fc->direct_io_allow_mmap)
return -ENODEV;
invalidate_inode_pages2(file->f_mapping);
- return generic_file_mmap(file, vma);
+ if (!(vma->vm_flags & VM_MAYSHARE)) {
+ /* MAP_PRIVATE */
+ return generic_file_mmap(file, vma);
+ }
+
+ /*
+ * First mmap of direct_io file enters caching inode io mode.
+ * Also waits for parallel dio writers to go into serial mode
+ * (exclusive instead of shared lock).
+ * After first mmap, the inode stays in caching io mode until
+ * the direct_io file release.
+ */
+ rc = fuse_file_cached_io_open(inode, ff);
+ if (rc)
+ return rc;
}
if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
@@ -2523,14 +2447,14 @@ static int convert_fuse_file_lock(struct fuse_conn *fc,
* translate it into the caller's pid namespace.
*/
rcu_read_lock();
- fl->fl_pid = pid_nr_ns(find_pid_ns(ffl->pid, fc->pid_ns), &init_pid_ns);
+ fl->c.flc_pid = pid_nr_ns(find_pid_ns(ffl->pid, fc->pid_ns), &init_pid_ns);
rcu_read_unlock();
break;
default:
return -EIO;
}
- fl->fl_type = ffl->type;
+ fl->c.flc_type = ffl->type;
return 0;
}
@@ -2544,10 +2468,10 @@ static void fuse_lk_fill(struct fuse_args *args, struct file *file,
memset(inarg, 0, sizeof(*inarg));
inarg->fh = ff->fh;
- inarg->owner = fuse_lock_owner_id(fc, fl->fl_owner);
+ inarg->owner = fuse_lock_owner_id(fc, fl->c.flc_owner);
inarg->lk.start = fl->fl_start;
inarg->lk.end = fl->fl_end;
- inarg->lk.type = fl->fl_type;
+ inarg->lk.type = fl->c.flc_type;
inarg->lk.pid = pid;
if (flock)
inarg->lk_flags |= FUSE_LK_FLOCK;
@@ -2584,8 +2508,8 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
struct fuse_lk_in inarg;
- int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
- struct pid *pid = fl->fl_type != F_UNLCK ? task_tgid(current) : NULL;
+ int opcode = (fl->c.flc_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
+ struct pid *pid = fl->c.flc_type != F_UNLCK ? task_tgid(current) : NULL;
pid_t pid_nr = pid_nr_ns(pid, fm->fc->pid_ns);
int err;
@@ -2594,10 +2518,6 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
return -ENOLCK;
}
- /* Unlock on close is handled by the flush method */
- if ((fl->fl_flags & FL_CLOSE_POSIX) == FL_CLOSE_POSIX)
- return 0;
-
fuse_lk_fill(&args, file, fl, opcode, pid_nr, flock, &inarg);
err = fuse_simple_request(fm, &args);
@@ -2885,7 +2805,7 @@ static void fuse_do_truncate(struct file *file)
attr.ia_file = file;
attr.ia_valid |= ATTR_FILE;
- fuse_do_setattr(file_dentry(file), &attr, file);
+ fuse_do_setattr(file_mnt_idmap(file), file_dentry(file), &attr, file);
}
static inline loff_t fuse_round_up(struct fuse_conn *fc, loff_t off)
@@ -3028,7 +2948,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
inode_lock(inode);
if (block_faults) {
filemap_invalidate_lock(inode->i_mapping);
- err = fuse_dax_break_layouts(inode, 0, 0);
+ err = fuse_dax_break_layouts(inode, 0, -1);
if (err)
goto out;
}
@@ -3115,6 +3035,8 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
.flags = flags
};
struct fuse_write_out outarg;
+ struct fuse_copy_file_range_out outarg_64;
+ u64 bytes_copied;
ssize_t err;
/* mark unstable when write-back is not used, and file_out gets
* extended */
@@ -3164,30 +3086,51 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
if (is_unstable)
set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
- args.opcode = FUSE_COPY_FILE_RANGE;
+ args.opcode = FUSE_COPY_FILE_RANGE_64;
args.nodeid = ff_in->nodeid;
args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
args.out_numargs = 1;
- args.out_args[0].size = sizeof(outarg);
- args.out_args[0].value = &outarg;
+ args.out_args[0].size = sizeof(outarg_64);
+ args.out_args[0].value = &outarg_64;
+ if (fc->no_copy_file_range_64) {
+fallback:
+ /* Fall back to old op that can't handle large copy length */
+ args.opcode = FUSE_COPY_FILE_RANGE;
+ args.out_args[0].size = sizeof(outarg);
+ args.out_args[0].value = &outarg;
+ inarg.len = len = min_t(size_t, len, UINT_MAX & PAGE_MASK);
+ }
err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) {
- fc->no_copy_file_range = 1;
- err = -EOPNOTSUPP;
+ if (fc->no_copy_file_range_64) {
+ fc->no_copy_file_range = 1;
+ err = -EOPNOTSUPP;
+ } else {
+ fc->no_copy_file_range_64 = 1;
+ goto fallback;
+ }
}
if (err)
goto out;
+ bytes_copied = fc->no_copy_file_range_64 ?
+ outarg.size : outarg_64.bytes_copied;
+
+ if (bytes_copied > len) {
+ err = -EIO;
+ goto out;
+ }
+
truncate_inode_pages_range(inode_out->i_mapping,
ALIGN_DOWN(pos_out, PAGE_SIZE),
- ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1);
+ ALIGN(pos_out + bytes_copied, PAGE_SIZE) - 1);
file_update_time(file_out);
- fuse_write_update_attr(inode_out, pos_out + outarg.size, outarg.size);
+ fuse_write_update_attr(inode_out, pos_out + bytes_copied, bytes_copied);
- err = outarg.size;
+ err = bytes_copied;
out:
if (is_unstable)
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
@@ -3210,8 +3153,8 @@ static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off,
len, flags);
if (ret == -EOPNOTSUPP || ret == -EXDEV)
- ret = generic_copy_file_range(src_file, src_off, dst_file,
- dst_off, len, flags);
+ ret = splice_copy_file_range(src_file, src_off, dst_file,
+ dst_off, len);
return ret;
}
@@ -3227,8 +3170,8 @@ static const struct file_operations fuse_file_operations = {
.lock = fuse_file_lock,
.get_unmapped_area = thp_get_unmapped_area,
.flock = fuse_file_flock,
- .splice_read = filemap_splice_read,
- .splice_write = iter_file_splice_write,
+ .splice_read = fuse_splice_read,
+ .splice_write = fuse_splice_write,
.unlocked_ioctl = fuse_file_ioctl,
.compat_ioctl = fuse_file_compat_ioctl,
.poll = fuse_file_poll,
@@ -3239,28 +3182,33 @@ static const struct file_operations fuse_file_operations = {
static const struct address_space_operations fuse_file_aops = {
.read_folio = fuse_read_folio,
.readahead = fuse_readahead,
- .writepage = fuse_writepage,
.writepages = fuse_writepages,
.launder_folio = fuse_launder_folio,
- .dirty_folio = filemap_dirty_folio,
+ .dirty_folio = iomap_dirty_folio,
+ .release_folio = iomap_release_folio,
+ .invalidate_folio = iomap_invalidate_folio,
+ .is_partially_uptodate = iomap_is_partially_uptodate,
+ .migrate_folio = filemap_migrate_folio,
.bmap = fuse_bmap,
.direct_IO = fuse_direct_IO,
- .write_begin = fuse_write_begin,
- .write_end = fuse_write_end,
};
void fuse_init_file_inode(struct inode *inode, unsigned int flags)
{
struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_conn *fc = get_fuse_conn(inode);
inode->i_fop = &fuse_file_operations;
inode->i_data.a_ops = &fuse_file_aops;
+ if (fc->writeback_cache)
+ mapping_set_writeback_may_deadlock_on_reclaim(&inode->i_data);
INIT_LIST_HEAD(&fi->write_files);
INIT_LIST_HEAD(&fi->queued_writes);
fi->writectr = 0;
+ fi->iocachectr = 0;
init_waitqueue_head(&fi->page_waitq);
- fi->writepages = RB_ROOT;
+ init_waitqueue_head(&fi->direct_io_waitq);
if (IS_ENABLED(CONFIG_FUSE_DAX))
fuse_dax_inode_init(inode, flags);